Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
D
Downsampling
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Anne-Sophie Denommé-Pichon
Downsampling
Commits
6639cf8d
Commit
6639cf8d
authored
Jan 29, 2021
by
Anne-Sophie Denommé-Pichon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Create scripts to downsampling bam
parents
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
114 additions
and
0 deletions
+114
-0
bam_downsampling.py
bam_downsampling.py
+86
-0
downsampling.sh
downsampling.sh
+28
-0
No files found.
bam_downsampling.py
0 → 100755
View file @
6639cf8d
#!/usr/bin/env python
import
sys
import
getopt
import
pysam
import
logging
import
math
import
threading
import
random
# ~ import matplotlib.pyplot as plt
currentThread
=
0
nbThread
=
1
downSamplingRate
=
0
inputFile
=
""
logFile
=
""
opts
,
args
=
getopt
.
getopt
(
sys
.
argv
[
1
:],
'i:o:d:l:'
)
for
opt
,
arg
in
opts
:
if
opt
in
(
"-i"
):
inputFile
=
arg
# elif opt in ("-l"):
# ratioThreshold = int(arg)
elif
opt
in
(
"-o"
):
outputFile
=
arg
elif
opt
in
(
"-d"
):
downSamplingRate
=
int
(
arg
)
elif
opt
in
(
"-l"
):
logFile
=
arg
sys
.
stderr
=
open
(
logFile
,
'w'
)
sys
.
stderr
.
write
(
"Parsing bam file :
%
s ..."
%
(
inputFile
)
)
bamIterRef
=
pysam
.
AlignmentFile
(
inputFile
,
"r"
)
bamCodonReference
=
{}
# samStream = open( samFile , "r" )
bamOutStream
=
pysam
.
AlignmentFile
(
outputFile
,
"wb"
,
template
=
bamIterRef
)
for
line
in
bamIterRef
:
#logging.info('########\nNew read to parse : ' + str(line) )
# pass bad alignements
if
(
line
.
is_unmapped
==
True
)
or
(
line
.
is_secondary
==
True
)
or
(
line
.
is_supplementary
==
True
)
:
#~ logging.info('Passing sequence : bad quality ' )
continue
if
random
.
randint
(
0
,
100
)
<=
downSamplingRate
:
bamOutStream
.
write
(
line
)
bamIterRef
.
close
()
bamOutStream
.
close
()
downsampling.sh
0 → 100755
View file @
6639cf8d
#! /bin/sh
DIJEN
=
"dijen017"
INPUT
=
"/work/gad/shared/analyse/STR/Data/
${
DIJEN
}
/
${
DIJEN
}
/
${
DIJEN
}
.bam"
DSDIR
=
"/work/gad/shared/analyse/STR/Data/downsampling"
DSR
=
10
#DSR : downsampling rate (0 - 100)
#DSRDIR : downsampled BAM directory
set
-x
for
i
in
$(
seq
20
)
do
(
mkdir
-p
"
${
DSDIR
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
"
/user1/gad/an1770de/Scripts/bam_downsampling.py
\
-i
"
${
INPUT
}
"
\
-o
"
${
DSDIR
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
.bam"
\
-d
"
${
DSR
}
"
\
-l
"
${
DSDIR
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
.log"
samtools index
"
${
DSDIR
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
/
${
DIJEN
}
_downsampling_
${
DSR
}
_
${
i
}
.bam"
)
&
done
wait
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment