Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion VariantValidator/modules/liftover.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va
:param reverse_normalizer:
:param evm:
:param validator: Validator obj
:param specify_tx: Specify a specific transcript = False or str(transcript_ID)
:param liftover_level: False or 'primary'
:param g_to_g: True or False
:return:
"""

Expand Down Expand Up @@ -131,7 +134,7 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va
# Get a list of overlapping RefSeq transcripts
# Note, due to 0 base positions in UTA (I think) occasionally tx will
rts_list = validator.hdp.get_tx_for_region(hgvs_genomic.ac, 'splign', hgvs_genomic.posedit.pos.start.base - 1,
hgvs_genomic.posedit.pos.end.base) #- 1)
hgvs_genomic.posedit.pos.end.base) # - 1)
rts_dict = {}
tx_list = False
if g_to_g is True:
Expand Down Expand Up @@ -160,6 +163,8 @@ def liftover(hgvs_genomic, build_from, build_to, hn, reverse_normalizer, evm, va
for op in options:
sff = None
sft = None
if liftover_level is None:
continue
if op[1].startswith('NC_'):
if build_to.startswith('GRC'):
sft = seq_data.to_chr_num_refseq(op[1], build_to)
Expand Down
22 changes: 17 additions & 5 deletions VariantValidator/modules/mappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,15 +696,20 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
return False


def final_tx_to_multiple_genomic(variant, validator, tx_variant):
def final_tx_to_multiple_genomic(variant, validator, tx_variant, liftover_level=False):

warnings = ''
rec_var = ''
gap_compensation = True

# Multiple genomic variants
# multi_gen_vars = []
variant.hgvs_coding = validator.hp.parse_hgvs_variant(str(tx_variant))
try:
tx_variant.ac
variant.hgvs_coding = tx_variant
except AttributeError:
variant.hgvs_coding = validator.hp.parse_hgvs_variant(str(tx_variant))

# Gap gene black list
if variant.gene_symbol:
# If the gene symbol is not in the list, the value False will be returned
Expand All @@ -727,10 +732,17 @@ def final_tx_to_multiple_genomic(variant, validator, tx_variant):
multi_list = []
mapping_options = validator.hdp.get_tx_mapping_options(variant.hgvs_coding.ac)
mapping_options = sorted(mapping_options, key=itemgetter(1))

for alt_chr in mapping_options:
if ('NC_' in alt_chr[1] or 'NT_' in alt_chr[1] or 'NW_' in alt_chr[1]) and \
alt_chr[2] == validator.alt_aln_method:
multi_list.append(alt_chr[1])
if liftover_level is None:
multi_list.append(variant.genomic_g.split(":")[0])
elif liftover_level is 'primary':
if ('NC_' in alt_chr[1]) and alt_chr[2] == validator.alt_aln_method:
multi_list.append(alt_chr[1])
else:
if ('NC_' in alt_chr[1] or 'NT_' in alt_chr[1] or 'NW_' in alt_chr[1]) and \
alt_chr[2] == validator.alt_aln_method:
multi_list.append(alt_chr[1])

for alt_chr in multi_list:
logger.debug("Trying to do final gap mapping with %s", alt_chr)
Expand Down
34 changes: 24 additions & 10 deletions VariantValidator/modules/vvMixinCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,18 @@ class Mixin(vvMixinConverters.Mixin):
It's added to the Validator object in the vvObjects file.
"""

def validate(self, batch_variant, selected_assembly, select_transcripts, transcript_set="refseq"):
def validate(self,
batch_variant,
selected_assembly,
select_transcripts,
transcript_set="refseq",
liftover_level=False):
"""
This is the main validator function.
:param batch_variant: A string containing the variant to be validated
:param selected_assembly: The version of the genome assembly to use.
:param select_transcripts: Can be an array of different transcripts, or 'all'
:param liftover_level: True or False - liftover to different gene/genome builds or not
Selecting multiple transcripts will lead to a multiple variant outputs.
:param transcript_set: 'refseq' or 'ensembl'. Currently only 'refseq' is supported
:return:
Expand All @@ -52,14 +58,12 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
transcript_set)

primary_assembly = None

self.selected_assembly = selected_assembly
self.select_transcripts = select_transcripts

# Validation
############
try:
# Validation
############

# Create a dictionary of transcript ID : ''
select_transcripts_dict = {}
select_transcripts_dict_plus_version = {}
Expand Down Expand Up @@ -670,7 +674,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
variant.gene_symbol = ''

if tx_variant != '':
multi_gen_vars = mappers.final_tx_to_multiple_genomic(variant, self, tx_variant)
multi_gen_vars = mappers.final_tx_to_multiple_genomic(variant,
self,
tx_variant,
liftover_level=liftover_level)

else:
# HGVS genomic in the absence of a transcript variant
Expand Down Expand Up @@ -1066,8 +1073,10 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
ref_records = self.db.get_urls(variant.output_dict())
if ref_records != {}:
variant.reference_sequence_records = ref_records
if (variant.output_type_flag == 'intergenic') or ('grch37' not in variant.primary_assembly_loci.keys())\
or ('grch38' not in variant.primary_assembly_loci.keys()):
if (variant.output_type_flag == 'intergenic' and liftover_level is not None) or \
('grch37' not in variant.primary_assembly_loci.keys())\
or ('grch38' not in variant.primary_assembly_loci.keys()
and liftover_level is not None):

# Simple cache
lo_cache = {}
Expand Down Expand Up @@ -1104,13 +1113,18 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
g_to_g = False
if variant.output_type_flag != 'intergenic':
g_to_g = True
# Liftover

# Lift-over
if genomic_position_info[g_p_key]['hgvs_genomic_description'] not in lo_cache.keys():
lifted_response = liftover(genomic_position_info[g_p_key]['hgvs_genomic_description'],
build_from,
build_to, variant.hn, variant.reverse_normalizer,
variant.evm, self, specify_tx=False, liftover_level=False,
variant.evm,
self,
specify_tx=False,
liftover_level=liftover_level,
g_to_g=g_to_g)

lo_cache[genomic_position_info[g_p_key]['hgvs_genomic_description']] = lifted_response
else:
lifted_response = lo_cache[genomic_position_info[g_p_key]['hgvs_genomic_description']]
Expand Down
23 changes: 23 additions & 0 deletions tests/test_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -30501,6 +30501,29 @@ def test_issue_326(self):
print(results)
assert 'NM_000546.6:c.1182_*1insT' in results.keys()

def test_issue_330a(self):
variant = 'NM_001353.6:c.2del'
results = self.vv.validate(variant, 'GRCh37', 'all', liftover_level=None).format_as_dict(test=True)
print(results)
assert 'grch37' in results['NM_001353.6:c.2del']['primary_assembly_loci'].keys()
assert 'grch38' not in results['NM_001353.6:c.2del']['primary_assembly_loci'].keys()

def test_issue_330b(self):
variant = 'NM_001040114.1:c.3055_3056inv'
results = self.vv.validate(variant, 'GRCh37', 'all', liftover_level='primary').format_as_dict(test=True)
print(results)
assert 'grch37' in results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci'].keys()
assert 'grch38' in results['NM_001040114.1:c.3055_3056inv']['primary_assembly_loci'].keys()
assert len(results['NM_001040114.1:c.3055_3056inv']['alt_genomic_loci']) == 0

def test_issue_330c(self):
variant = 'NC_000017.10:g.48279242G>T'
results = self.vv.validate(variant, 'GRCh37', 'all', liftover_level='primary').format_as_dict(test=True)
print(results)
assert 'grch37' in results['intergenic_variant_1']['primary_assembly_loci'].keys()
assert 'grch38' in results['intergenic_variant_1']['primary_assembly_loci'].keys()
assert len(results['intergenic_variant_1']['alt_genomic_loci']) == 0


# <LICENSE>
# Copyright (C) 2016-2022 VariantValidator Contributors
Expand Down