Skip to content

Commit 317dd41

Browse files
committed
Add nanoporeModule.nf
1 parent c16bb12 commit 317dd41

File tree

1 file changed

+251
-0
lines changed

1 file changed

+251
-0
lines changed

nanoporeModule.nf

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
#!/usr/bin/env nextflow
2+
3+
process doradoTask {
4+
executor 'slurm'
5+
queue params.gpuPartition
6+
cpus params.defaultCpus
7+
memory params.gpuMemory
8+
time params.time
9+
clusterOptions "--account=${params.gpuAccount} ${params.gpuRes} --output=logs/doradoTask-%j.out --error=logs/doradoTask-%j.err"
10+
input:
11+
path inputFile
12+
val rnaMod
13+
val rnaModel
14+
15+
output:
16+
path "${inputFile.simpleName}.${rnaMod}.bam"
17+
publishDir params.dorDir, mode: 'copy'
18+
19+
script:
20+
"""
21+
mkdir -p ${params.dorDir}
22+
fullfile=\$(basename $inputFile)
23+
basefile=\${fullfile%.*}
24+
module load dorado
25+
dorado basecaller ${rnaModel} --models-directory ${params.rnaModelDir} --batchsize 32 $inputFile > "${inputFile.simpleName}.${rnaMod}.bam"
26+
"""
27+
}
28+
29+
process mergeBamsTask {
30+
executor 'slurm'
31+
queue params.cpuPartition
32+
cpus params.mergeCpus
33+
memory params.cpuMemory
34+
time params.time
35+
clusterOptions "--account=${params.cpuAccount}"
36+
37+
input:
38+
val fileCount
39+
val rnaMod
40+
output:
41+
path "${params.sample}.${rnaMod}.unmapped.bam"
42+
43+
publishDir params.bamDir, mode: 'copy'
44+
45+
script:
46+
"""
47+
module load samtools
48+
samtools merge --threads ${params.mergeCpus} -o ${params.sample}.${rnaMod}.unmapped.bam ${params.dorDir}/*.${rnaMod}.bam
49+
chmod 775 "${params.sample}.${rnaMod}.unmapped.bam"
50+
"""
51+
}
52+
53+
process minimapTask {
54+
executor 'slurm'
55+
queue params.cpuPartition
56+
memory params.minimapMemory
57+
time params.time
58+
clusterOptions "--account=${params.cpuAccount}"
59+
60+
input:
61+
path inputFile
62+
val rnaMod
63+
output:
64+
path "${params.sample}.${rnaMod}.bam"
65+
path "${params.sample}.${rnaMod}.bam.bai"
66+
67+
publishDir params.bamDir, mode: 'copy'
68+
69+
script:
70+
"""
71+
module load samtools
72+
module load minimap2
73+
samtools fastq --threads 64 -T MM,ML ${params.sample}.${rnaMod}.unmapped.bam | \
74+
minimap2 -t 64 -ax splice --junc-bed ${params.annotRef} --secondary=no --MD -y ${params.genomeRef} - | \
75+
samtools sort - --threads 64 > ${params.sample}.${rnaMod}.bam \
76+
&& samtools index -@ 64 ${params.sample}.${rnaMod}.bam
77+
chmod 775 "${params.sample}.${rnaMod}.bam"
78+
chmod 775 "${params.sample}.${rnaMod}.bam.bai"
79+
"""
80+
}
81+
82+
process modkitTask {
83+
executor 'slurm'
84+
queue params.cpuPartition
85+
cpus params.modkitCpus
86+
memory params.cpuMemory
87+
time params.time
88+
clusterOptions "--account=${params.cpuAccount}"
89+
90+
input:
91+
path inputFile
92+
path inputFileBai
93+
val rnaMod
94+
output:
95+
path "${params.sample}.${rnaMod}.bed"
96+
97+
publishDir params.bedDir, mode: 'copy'
98+
99+
script:
100+
"""
101+
${params.modkitPath}/modkit pileup -t 12 --filter-threshold 0.9 ${params.sample}.${rnaMod}.bam ${params.sample}.${rnaMod}.bed
102+
chmod 775 "${params.sample}.${rnaMod}.bed"
103+
"""
104+
}
105+
106+
process filterbedTask {
107+
executor 'slurm'
108+
queue params.cpuPartition
109+
cpus params.modkitCpus
110+
memory params.cpuMemory
111+
time params.time
112+
clusterOptions "--account=${params.cpuAccount}"
113+
114+
input:
115+
path inputFile
116+
val rnaMod
117+
output:
118+
path "${params.sample}.${rnaMod}.filtered-${params.minCov}-${params.perMod}.bed"
119+
120+
publishDir params.bedDir, mode: 'copy'
121+
122+
script:
123+
"""
124+
module load python/3.10.2
125+
python /share/crsp/lab/seyedam/share/bridge_dRNA/scripts/filterbed.py ${params.minCov} ${params.perMod} ${params.sample}.${rnaMod}.bed "${params.sample}.${rnaMod}.filtered-${params.minCov}-${params.perMod}.bed"
126+
chmod 775 "${params.sample}.${rnaMod}.filtered-${params.minCov}-${params.perMod}.bed"
127+
"""
128+
}
129+
130+
process extractfastqTask {
131+
executor 'slurm'
132+
queue params.cpuPartition
133+
memory params.minimapMemory
134+
time params.time
135+
clusterOptions "--account=${params.cpuAccount}"
136+
137+
input:
138+
path inputFile
139+
val rnaMod
140+
output:
141+
path "${params.sample}.${rnaMod}.fastq.gz"
142+
143+
publishDir params.fastqDir, mode: 'copy'
144+
145+
script:
146+
"""
147+
module load samtools
148+
samtools fastq --threads 6 ${params.sample}.${rnaMod}.unmapped.bam > ${params.sample}.${rnaMod}.fastq
149+
gzip -v ${params.sample}.${rnaMod}.fastq
150+
"""
151+
}
152+
153+
process kallistoTask {
154+
executor 'slurm'
155+
queue params.cpuPartition
156+
cpus params.kallistoCpus
157+
memory params.cpuMemory
158+
time params.time
159+
clusterOptions "--account=${params.cpuAccount}"
160+
161+
input:
162+
path inputFile
163+
output:
164+
path "${params.sample}"
165+
166+
publishDir params.kallistoDir, mode: 'copy'
167+
168+
script:
169+
"""
170+
mkdir -p ${params.sample}
171+
module load kallisto
172+
module load bustools
173+
174+
kallisto bus --long --threshold 0.8 -x bulk -i ${params.kallistoIndex} -t ${task.cpus} -o ${params.sample} "${inputFile}"
175+
176+
bustools sort -t ${task.cpus} ${params.sample}/output.bus -o ${params.sample}/sorted.bus
177+
178+
bustools count ${params.sample}/sorted.bus -t ${params.sample}/transcripts.txt -e ${params.sample}/matrix.ec -o ${params.sample}/count --cm -m -g ${params.t2g}
179+
180+
kallisto quant-tcc -t ${task.cpus} --long -P ONT ${params.sample}/count.mtx -i ${params.kallistoIndex} -f ${params.sample}/flens.txt -e ${params.sample}/count.ec.txt -o ${params.sample}
181+
182+
chmod 775 "${params.sample}"
183+
"""
184+
}
185+
186+
187+
process splitM6aInosineTask {
188+
executor 'slurm'
189+
queue params.cpuPartition
190+
cpus params.modkitCpus
191+
memory params.cpuMemory
192+
time params.time
193+
clusterOptions "--account=${params.cpuAccount}"
194+
195+
input:
196+
path inputFile
197+
output:
198+
path "${params.sample}.m6a.filtered-${params.minCov}-${params.perMod}.bed"
199+
path "${params.sample}.inosine.filtered-${params.minCov}-${params.perMod}.bed"
200+
201+
publishDir params.bedDir, mode: 'copy'
202+
203+
script:
204+
"""
205+
grep a ${inputFile} > "${params.sample}.m6a.filtered-${params.minCov}-${params.perMod}.bed"
206+
grep -v a ${inputFile} > "${params.sample}.inosine.filtered-${params.minCov}-${params.perMod}.bed"
207+
chmod 775 "${params.sample}.m6a.filtered-${params.minCov}-${params.perMod}.bed" "${params.sample}.inosine.filtered-${params.minCov}-${params.perMod}.bed"
208+
"""
209+
}
210+
211+
212+
workflow modWorkflow {
213+
take:
214+
theMod
215+
theModel
216+
main:
217+
println "theMod: " + theMod
218+
def pod5FilesChannel = Channel.fromPath("${params.podDir}/*.pod5")
219+
// Run doradoTask for each input file
220+
bamFiles = doradoTask(pod5FilesChannel, theMod, theModel).collectFile()
221+
222+
// Count all of the files as a way to force synchronization before merging
223+
fileCount = bamFiles.map { it.size() }.first()
224+
225+
// Run merge task using the file count
226+
unmappedbam = mergeBamsTask(fileCount, theMod)
227+
228+
// Run minimap
229+
mappedBams = minimapTask(unmappedbam, theMod)
230+
231+
// Run extractFastq
232+
if (theMod == 'm5c') {
233+
fastqFile = extractfastqTask(unmappedbam, theMod)
234+
235+
// Run kallistoTask using the extracted FASTQ file
236+
kallistoResults = kallistoTask(fastqFile)
237+
}
238+
239+
// Run modkit
240+
bedfile = modkitTask(mappedBams, theMod)
241+
242+
// Filter BED file
243+
filterbed = filterbedTask(bedfile, theMod)
244+
245+
if (theMod == 'm6aInosine') {
246+
splitResults = splitM6aInosineTask(filterbed)
247+
}
248+
}
249+
250+
// Export the workflow
251+
//export modWorkflow

0 commit comments

Comments
 (0)