Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ mutational_signatures:
# select events (callsets, defined under calling/fdr-control/events) to evaluate
- some_id

impact_graphs:
activate: false

# Sets the minimum average coverage for each gene.
# Genes with lower average coverage will not be concidered in gene coverage datavzrd report
# If not present min_avg_coverage will be set to 0 rendering all genes.
Expand Down
1 change: 1 addition & 0 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ include: "rules/fusion_calling.smk"
include: "rules/testcase.smk"
include: "rules/population.smk"
include: "rules/mutational_signatures.smk"
include: "rules/impact_graphs.smk"


batches = "all"
Expand Down
5 changes: 5 additions & 0 deletions workflow/envs/predictosaurus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- predictosaurus =0.2.10
23 changes: 16 additions & 7 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ def get_final_output(wildcards):
)
final_output.extend(get_mutational_burden_targets())
final_output.extend(get_mutational_signature_targets())
final_output.extend(get_impact_graph_targets())

if is_activated("population/db"):
final_output.append(lookup(dpath="population/db/path", within=config))
Expand Down Expand Up @@ -689,17 +690,25 @@ def get_mutational_burden_targets():
def get_mutational_signature_targets():
mutational_signature_targets = []
if is_activated("mutational_signatures"):
for group in variants_groups:
mutational_signature_targets.extend(
expand(
"results/plots/mutational_signatures/{group}.{event}.html",
group=variants_groups,
event=config["mutational_signatures"].get("events"),
)
mutational_signature_targets.extend(
expand(
"results/plots/mutational_signatures/{group}.{event}.html",
group=variants_groups,
event=config["mutational_signatures"].get("events"),
)
)
return mutational_signature_targets


def get_impact_graph_targets():
impact_graph_targets = []
if is_activated("impact_graphs"):
impact_graph_targets.extend(
expand("results/impact_graphs/{group}", group=variants_groups)
)
return impact_graph_targets


def get_scattered_calls(ext="bcf"):
def inner(wildcards):
caller = "arriba" if wildcards.calling_type == "fusions" else variant_caller
Expand Down
88 changes: 88 additions & 0 deletions workflow/rules/impact_graphs.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
rule gather_calls:
input:
calls=gather.calling("results/calls/{{group}}.{{caller}}.{scatteritem}.bcf"),
output:
"results/calls/{group}/{caller}.bcf",
log:
"logs/gather-calls/{group}/{caller}.log",
params:
uncompressed_bcf=False,
extra="",
threads: 4
resources:
mem_mb=10,
wrapper:
"v5.8.2/bio/bcftools/concat"


def get_predictosaurus_build_aux(wildcards):
aux = []
for sample in get_group_samples(wildcards.group):
aux.append(
"{alias}=results/observations/{group}/{sample}.{caller}.all.bcf".format(
alias=samples.loc[samples["sample_name"] == sample]["alias"].iloc[0],
caller=wildcards.caller,
group=wildcards.group,
sample=sample,
)
)
return " ".join(aux)


def get_all_group_observations(wildcards):
return expand(
"results/observations/{group}/{sample}.{caller}.all.bcf",
caller=wildcards.caller,
group=wildcards.group,
sample=get_group_samples(wildcards.group),
)


rule predictosaurus_build:
input:
calls="results/calls/{group}.{caller}.bcf",
obs=get_all_group_observations,
output:
"results/impact_graphs/{group}.{caller}.graphs.duckdb",
params:
obs_aux=get_predictosaurus_build_aux,
log:
"logs/predictosaurus/build/{group}_{caller}.log",
conda:
"../envs/predictosaurus.yaml"
shell:
"""
predictosaurus build -v --calls {input.calls} --observations {params.obs_aux} --output {output} 2> {log}
"""


rule predictosaurus_process:
input:
ref=genome,
gff="resources/annotation.gff3",
graph="results/impact_graphs/{group}.freebayes.graphs.duckdb",
output:
"results/impact_graphs/{group}.paths.duckdb",
log:
"logs/predictosaurus/process/{group}.log",
conda:
"../envs/predictosaurus.yaml"
shell:
"""
predictosaurus process -v --features {input.gff} --reference {input.ref} --graph {input.graph} --output {output} 2> {log}
"""


rule predictosaurus_plot:
input:
"results/impact_graphs/{group}.paths.duckdb",
output:
"results/impact_graphs/{group}",
log:
"logs/predictosaurus/plot/{group}.log",
conda:
"../envs/predictosaurus.yaml"
shell:
"""
predictosaurus plot --input {input} --format html --output {output} 2> {log}
"""
6 changes: 4 additions & 2 deletions workflow/rules/ref.smk
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,16 @@ rule get_known_variants:

rule get_annotation:
output:
"resources/annotation.gtf",
"resources/annotation.{ext}",
params:
species=config["ref"]["species"],
release=config["ref"]["release"],
build=config["ref"]["build"],
flavor="", # optional, e.g. chr_patch_hapl_scaff, see Ensembl FTP.
wildcard_constraints:
ext="gft|gff3",
log:
"logs/get_annotation.log",
"logs/get_annotation_{ext}.log",
cache: "omit-software" # save space and time with between workflow caching (see docs)
wrapper:
"v2.3.2/bio/reference/ensembl-annotation"
Expand Down
4 changes: 2 additions & 2 deletions workflow/rules/table.smk
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ rule vembrane_table:
bcf="results/final-calls/{group}.{event}.{calling_type}.fdr-controlled.normal-probs.bcf",
scenario="results/scenarios/{group}.yaml",
output:
bcf="results/tables/{group}.{event}.{calling_type}.fdr-controlled.tsv",
tsv="results/tables/{group}.{event}.{calling_type}.fdr-controlled.tsv",
conda:
"../envs/vembrane.yaml"
params:
Expand All @@ -12,7 +12,7 @@ rule vembrane_table:
"logs/vembrane-table/{group}.{event}.{calling_type}.log",
shell:
'vembrane table --header "{params.config[header]}" "{params.config[expr]}" '
"{input.bcf} > {output.bcf} 2> {log}"
"{input.bcf} > {output.tsv} 2> {log}"


rule tsv_to_excel:
Expand Down
38 changes: 38 additions & 0 deletions workflow/rules/testcase.smk
Original file line number Diff line number Diff line change
@@ -1,3 +1,41 @@
rule sort_observations:
input:
"results/observations/{group}/{sample}.{caller}.{scatteritem}.bcf",
output:
temp("results/observations/{group}/{sample}.{caller}.{scatteritem}.sorted.bcf"),
params:
# Set to True, in case you want uncompressed BCF output
uncompressed_bcf=False,
# Extra arguments
extras="",
log:
"logs/bcf-sort/{group}/{sample}.{caller}.{scatteritem}.log",
resources:
mem_mb=8000,
wrapper:
"v5.5.0/bio/bcftools/sort"


rule index_observations:
input:
"results/observations/{group}/{sample}.{caller}.{scatteritem}.sorted.bcf",
output:
temp(
"results/observations/{group}/{sample}.{caller}.{scatteritem}.sorted.bcf.csi"
),
params:
# Set to True, in case you want uncompressed BCF output
uncompressed_bcf=False,
# Extra arguments
extras="",
log:
"logs/bcf-index/{group}/{sample}.{caller}.{scatteritem}.log",
resources:
mem_mb=8000,
wrapper:
"v5.5.0/bio/bcftools/index"


rule gather_observations:
input:
calls=gather.calling(
Expand Down
8 changes: 8 additions & 0 deletions workflow/schemas/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,14 @@ properties:
type: boolean
required:
- activate

impact_graphs:
type: object
properties:
activate:
type: boolean
required:
- activate


calling:
Expand Down
Loading