Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ mutational_signatures:
# select events (callsets, defined under calling/fdr-control/events) to evaluate
- some_id

impact_graphs:
activate: false

# Sets the minimum average coverage for each gene.
# Genes with lower average coverage will not be concidered in gene coverage datavzrd report
# If not present min_avg_coverage will be set to 0 rendering all genes.
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ include: "rules/fusion_calling.smk"
include: "rules/testcase.smk"
include: "rules/population.smk"
include: "rules/mutational_signatures.smk"
include: "rules/impact_graphs.smk"


batches = "all"
Expand Down
4 changes: 0 additions & 4 deletions workflow/envs/arriba.post-deploy.sh

This file was deleted.

2 changes: 1 addition & 1 deletion workflow/envs/arriba.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- conda-forge
- bioconda
dependencies:
- arriba =2.4
- arriba =2.5
5 changes: 5 additions & 0 deletions workflow/envs/predictosaurus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- predictosaurus =0.4.0
25 changes: 17 additions & 8 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ samples = (
pd.read_csv(
config["samples"],
sep="\t",
dtype={"sample_name": str, "group": str},
dtype={"sample_name": str, "group": str, "umi_len": "Int64"},
comment="#",
)
.set_index("sample_name", drop=False)
Expand Down Expand Up @@ -248,6 +248,7 @@ def get_final_output(wildcards):
)
final_output.extend(get_mutational_burden_targets())
final_output.extend(get_mutational_signature_targets())
final_output.extend(get_impact_graph_targets())

if is_activated("population/db"):
final_output.append(lookup(dpath="population/db/path", within=config))
Expand Down Expand Up @@ -689,17 +690,25 @@ def get_mutational_burden_targets():
def get_mutational_signature_targets():
mutational_signature_targets = []
if is_activated("mutational_signatures"):
for group in variants_groups:
mutational_signature_targets.extend(
expand(
"results/plots/mutational_signatures/{group}.{event}.html",
group=variants_groups,
event=config["mutational_signatures"].get("events"),
)
mutational_signature_targets.extend(
expand(
"results/plots/mutational_signatures/{group}.{event}.html",
group=variants_groups,
event=config["mutational_signatures"].get("events"),
)
)
return mutational_signature_targets


def get_impact_graph_targets():
impact_graph_targets = []
if is_activated("impact_graphs"):
impact_graph_targets.extend(
expand("results/impact_graphs/{group}", group=variants_groups)
)
return impact_graph_targets


def get_scattered_calls(ext="bcf"):
def inner(wildcards):
caller = "arriba" if wildcards.calling_type == "fusions" else variant_caller
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/fusion_calling.smk
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module fusion_calling:
meta_wrapper:
"v3.13.3/meta/bio/star_arriba"
"v7.1.0/meta/bio/star_arriba"
config:
config

Expand Down
88 changes: 88 additions & 0 deletions workflow/rules/impact_graphs.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
rule gather_calls:
input:
calls=gather.calling("results/calls/{{group}}.{{caller}}.{scatteritem}.bcf"),
output:
"results/calls/{group}/{caller}.bcf",
log:
"logs/gather-calls/{group}/{caller}.log",
params:
uncompressed_bcf=False,
extra="",
threads: 4
resources:
mem_mb=10,
wrapper:
"v5.8.2/bio/bcftools/concat"


def get_predictosaurus_build_aux(wildcards):
aux = []
for sample in get_group_samples(wildcards.group):
aux.append(
"{alias}=results/observations/{group}/{sample}.{caller}.all.bcf".format(
alias=samples.loc[samples["sample_name"] == sample]["alias"].iloc[0],
caller=wildcards.caller,
group=wildcards.group,
sample=sample,
)
)
return " ".join(aux)


def get_all_group_observations(wildcards):
return expand(
"results/observations/{group}/{sample}.{caller}.all.bcf",
caller=wildcards.caller,
group=wildcards.group,
sample=get_group_samples(wildcards.group),
)


rule predictosaurus_build:
input:
calls="results/calls/{group}.{caller}.bcf",
obs=get_all_group_observations,
output:
"results/impact_graphs/{group}.{caller}.graphs.duckdb",
params:
obs_aux=get_predictosaurus_build_aux,
log:
"logs/predictosaurus/build/{group}_{caller}.log",
conda:
"../envs/predictosaurus.yaml"
shell:
"""
predictosaurus build -v --calls {input.calls} --observations {params.obs_aux} --output {output} 2> {log}
"""


rule predictosaurus_process:
input:
ref=genome,
gff="resources/annotation.gff3",
graph="results/impact_graphs/{group}.freebayes.graphs.duckdb",
output:
"results/impact_graphs/{group}.scores.duckdb",
log:
"logs/predictosaurus/process/{group}.log",
conda:
"../envs/predictosaurus.yaml"
shell:
"""
predictosaurus process -v --features {input.gff} --reference {input.ref} --graph {input.graph} --output {output} 2> {log}
"""


rule predictosaurus_plot:
input:
"results/impact_graphs/{group}.paths.duckdb",
output:
"results/impact_graphs/{group}",
log:
"logs/predictosaurus/plot/{group}.log",
conda:
"../envs/predictosaurus.yaml"
shell:
"""
predictosaurus plot --input {input} --output {output} 2> {log}
"""
6 changes: 4 additions & 2 deletions workflow/rules/ref.smk
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,16 @@ rule get_known_variants:

rule get_annotation:
output:
"resources/annotation.gtf",
"resources/annotation.{ext}",
params:
species=config["ref"]["species"],
release=config["ref"]["release"],
build=config["ref"]["build"],
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
wildcard_constraints:
ext="gft|gff3",
log:
"logs/get_annotation.log",
"logs/get_annotation_{ext}.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"v2.3.2/bio/reference/ensembl-annotation"
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/table.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ rule vembrane_table:
bcf="results/final-calls/{group}.{event}.{calling_type}.fdr-controlled.normal-probs.bcf",
scenario="results/scenarios/{group}.yaml",
output:
bcf="results/tables/{group}.{event}.{calling_type}.fdr-controlled.tsv",
tsv="results/tables/{group}.{event}.{calling_type}.fdr-controlled.tsv",
conda:
"../envs/vembrane.yaml"
params:
Expand All @@ -12,7 +12,7 @@ rule vembrane_table:
"logs/vembrane-table/{group}.{event}.{calling_type}.log",
shell:
'vembrane table --header "{params.config[header]}" "{params.config[expr]}" '
"{input.bcf} > {output.bcf} 2> {log}"
"{input.bcf} > {output.tsv} 2> {log}"


rule tsv_to_excel:
Expand Down
38 changes: 38 additions & 0 deletions workflow/rules/testcase.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
rule sort_observations:
input:
"results/observations/{group}/{sample}.{caller}.{scatteritem}.bcf",
output:
temp("results/observations/{group}/{sample}.{caller}.{scatteritem}.sorted.bcf"),
params:
# Set to True, in case you want uncompressed BCF output
uncompressed_bcf=False,
# Extra arguments
extras="",
log:
"logs/bcf-sort/{group}/{sample}.{caller}.{scatteritem}.log",
resources:
mem_mb=8000,
wrapper:
"v5.5.0/bio/bcftools/sort"


rule index_observations:
input:
"results/observations/{group}/{sample}.{caller}.{scatteritem}.sorted.bcf",
output:
temp(
"results/observations/{group}/{sample}.{caller}.{scatteritem}.sorted.bcf.csi"
),
params:
# Set to True, in case you want uncompressed BCF output
uncompressed_bcf=False,
# Extra arguments
extras="",
log:
"logs/bcf-index/{group}/{sample}.{caller}.{scatteritem}.log",
resources:
mem_mb=8000,
wrapper:
"v5.5.0/bio/bcftools/index"


rule gather_observations:
input:
calls=gather.calling(
Expand Down
8 changes: 8 additions & 0 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ properties:
type: boolean
required:
- activate

impact_graphs:
type: object
properties:
activate:
type: boolean
required:
- activate


calling:
Expand Down
2 changes: 0 additions & 2 deletions workflow/scripts/split-call-tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ def variants(
self.pos = pos
self._variants = self._load_variants()
for variant in self._variants:
print(variant.pos)
if variant.pos == pos and variant.alts[0] == alt:
yield variant
if variant.pos > pos:
Expand All @@ -196,7 +195,6 @@ def annotate_row(self, row):
)

def _load_variants(self):
print(self.pos)
return self.bcf.fetch(str(self.contig), self.pos - 1, self.end)

@property
Expand Down
Loading