Skip to content

Commit 15b33e1

Browse files
author
Peter J. Freeman
authored
Merge pull request #443 from openvar/update_to_vvta
bug fix for #419 an…
2 parents 41edfd3 + eb1f69d commit 15b33e1

File tree

4 files changed

+37
-28
lines changed

4 files changed

+37
-28
lines changed

VariantValidator/modules/mappers.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ def gene_to_transcripts(variant, validator, select_transcripts_dict):
3737

3838
# Set test to see if Norm alters the coords
3939
g_test = variant.hn.normalize(g_query)
40+
try:
41+
if "N" in g_test.posedit.edit.ref:
42+
variant.warnings.append("Submitted variant description cannot be validated as it is located in a region of "
43+
"the reference sequence represented by base 'N' and not 'GATC'")
44+
logger.warning(error)
45+
return True
46+
except AttributeError:
47+
pass
48+
except TypeError:
49+
pass
4050

4151
# Perform test
4252
if g_query.posedit.pos != g_test.posedit.pos:
@@ -389,6 +399,7 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
389399
return True
390400
else:
391401
logger.debug("Except passed, %s", e)
402+
392403
# genome back to C coordinates
393404
try:
394405
post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc)
@@ -397,13 +408,12 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
397408
logger.warning(str(error))
398409
return True
399410
test = validator.hp.parse_hgvs_variant(quibble_input)
400-
401411
if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
402412
post_var.posedit.pos.end.base != test.posedit.pos.end.base:
403-
caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \
404-
'selected transcript'
413+
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for transcript " \
414+
"%s" % (test.posedit.pos, test.ac)
405415
variant.warnings.extend([caution])
406-
# raise MappersError(caution)
416+
raise MappersError(caution)
407417

408418
else: # del not in formatted_variant
409419

@@ -415,14 +425,13 @@ def transcripts_to_gene(variant, validator, select_transcripts_dict_plus_version
415425

416426
# genome back to C coordinates
417427
post_var = validator.myevm_g_to_t(variant.evm, pre_var, trans_acc)
418-
419428
test = validator.hp.parse_hgvs_variant(quibble_input)
420429
if post_var.posedit.pos.start.base != test.posedit.pos.start.base or \
421430
post_var.posedit.pos.end.base != test.posedit.pos.end.base:
422-
caution = 'The entered coordinates do not agree with the intron/exon boundaries for the ' \
423-
'selected transcript'
431+
caution = "ExonBoundaryError: Position c.%s does not correspond with an exon boundary for transcript " \
432+
"%s" % (test.posedit.pos, test.ac)
424433
variant.warnings.extend([caution])
425-
# raise MappersError(caution)
434+
raise MappersError(caution)
426435

427436
elif ':g.' not in quibble_input:
428437
query = validator.hp.parse_hgvs_variant(formatted_variant)

VariantValidator/modules/use_checking.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,6 @@ def structure_checks_c(variant, validator):
347347
try:
348348
validator.vr.validate(variant.input_parses)
349349
except vvhgvs.exceptions.HGVSInvalidVariantError as e:
350-
print("The error")
351-
print(e)
352350
error = str(e)
353351
if 'bounds' in error:
354352
try:
@@ -388,7 +386,7 @@ def structure_checks_c(variant, validator):
388386
if (variant.input_parses.posedit.pos.end.base > int(tx_info[4]) or variant.input_parses.posedit.pos.end.base
389387
> int(tx_info[4])) and ("*" not in str(variant.input_parses.posedit.pos.end) or "*" not in
390388
str(variant.input_parses.posedit.pos.start)):
391-
errors = ["Variant start position and/or end position are beyond the CDS end position "
389+
errors = ["CDSError: Variant start position and/or end position are beyond the CDS end position "
392390
"and likely also beyond the end of the selected reference sequence"]
393391
else:
394392
errors = ['Required information for ' + variant.input_parses.ac + ' is missing from the Universal '

VariantValidator/modules/vvMixinCore.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -666,8 +666,9 @@ def validate(self,
666666
logger.warning(error)
667667
continue
668668

669-
elif my_variant.hgvs_formatted.posedit.pos.end.base < my_variant.hgvs_formatted.posedit.pos.start.base:
670-
if "NC_012920.1" not in my_variant.hgvs_formatted.ac and\
669+
elif my_variant.hgvs_formatted.posedit.pos.end.base < \
670+
my_variant.hgvs_formatted.posedit.pos.start.base:
671+
if "NC_012920.1" not in my_variant.hgvs_formatted.ac and \
671672
"NC_001807.4" not in my_variant.hgvs_formatted.ac:
672673
error = 'Interval end position ' + str(my_variant.hgvs_formatted.posedit.pos.end.base) + \
673674
' < interval start position ' + str(my_variant.hgvs_formatted.posedit.pos.start.base)

tests/test_warnings.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ def test_issue_169(self):
2222
variant = 'NC_000017.10(NM_007294.3):c.4421-63A>G'
2323
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
2424
print(results)
25-
assert 'The entered coordinates do not agree with the intron/exon boundaries for the selected transcript' in \
26-
results['validation_warning_1']['validation_warnings'][0]
25+
assert 'ExonBoundaryError: Position c.4421-63 does not correspond with an exon boundary for transcript NM_007294.3' in \
26+
results['validation_warning_1']['validation_warnings']
2727

2828
def test_issue_176(self):
2929
variant = 'NC_000023.10(NM_004006.2):c.8810A>G'
@@ -600,15 +600,13 @@ def test_vv_series_1(self):
600600
variant = 'NC_000004.11:g.140811117C>A'
601601
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
602602
print(results)
603-
604603
assert "NM_018717.4 contains 3 fewer bases between c.2276_2277, and 12 fewer bases between c.1467_1468 than NC_000004.11" in \
605604
results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['validation_warnings']
606605

607606
def test_vv_series_2(self):
608607
variant = 'NC_000008.10:g.24811072C>T'
609608
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
610609
print(results)
611-
612610
assert "NM_006158.5 contains 1 fewer bases between c.1413_1414 than NC_000008.10" in \
613611
results['NM_006158.5:c.1407delinsAC']['validation_warnings']
614612
assert "NM_006158.4 contains 1 fewer bases between c.1407_1408 than NC_000008.10" in \
@@ -620,7 +618,6 @@ def test_vv_series_3(self):
620618
variant = 'NC_000015.9:g.72105933del'
621619
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
622620
print(results)
623-
624621
assert "NM_016346.4 contains 1 fewer bases between c.951_952 than NC_000015.9" in \
625622
results['NM_016346.4:c.951_952=']['validation_warnings']
626623
assert "NM_016346.3 contains 1 fewer bases between c.947_948 than NC_000015.9" in \
@@ -638,7 +635,6 @@ def test_vv_series_4(self):
638635
variant = 'NC_000019.9:g.41123095dup'
639636
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
640637
print(results)
641-
642638
assert "NM_003573.2 contains 1 extra bases between c.3122_3124 than NC_000019.9" in \
643639
results['NM_003573.2:c.3122_3124=']['validation_warnings']
644640
assert "NM_001042545.2 contains 1 extra bases between c.3034_3036 than NC_000019.9" in \
@@ -652,7 +648,6 @@ def test_vv_series_5(self):
652648
variant = 'NC_000017.10:g.5286863_5286889AGTGTTTGGAATTTTCTGTTCATATAG='
653649
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
654650
print(results)
655-
656651
assert "NM_004703.6 contains 25 fewer bases between c.*369_*370 than NC_000017.10" in \
657652
results['NM_004703.6:c.*344_*368dup']['validation_warnings']
658653
assert "NM_004703.5 contains 25 fewer bases between c.*344_*345 than NC_000017.10" in \
@@ -674,15 +669,13 @@ def test_vv_series_6(self):
674669
variant = 'NC_000012.11:g.122064777C>A'
675670
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
676671
print(results)
677-
678672
assert "NM_032790.3 contains 6 fewer bases between c.126_127 than NC_000012.11" in \
679673
results['NM_032790.3:c.129_130insACACCG']['validation_warnings']
680674

681675
def test_vv_series_7(self):
682676
variant = 'NC_000002.11:g.95847041_95847043GCG='
683677
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
684678
print(results)
685-
686679
assert "NM_021088.3 contains 3 fewer bases between c.467_468 than NC_000002.11" in \
687680
results['NM_021088.3:c.471_473dup']['validation_warnings']
688681
assert "NM_021088.2 contains 3 fewer bases between c.467_468 than NC_000002.11" in \
@@ -702,7 +695,6 @@ def test_vv_series_8(self):
702695
variant = 'NC_000003.11:g.14561629_14561630insG'
703696
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
704697
print(results)
705-
706698
assert "NM_001080423.4 contains 1 extra bases between c.1019_1021 than NC_000003.11" in \
707699
results['NM_001080423.4:c.1019_1021=']['validation_warnings']
708700
assert "NM_001080423.3 contains 1 extra bases between c.1017_1019 than NC_000003.11" in \
@@ -714,15 +706,13 @@ def test_vv_series_9(self):
714706
variant = 'NC_000004.11:g.140811117C>A'
715707
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
716708
print(results)
717-
718709
assert "NM_018717.4 contains 3 fewer bases between c.2276_2277, and 12 fewer bases between c.1467_1468 than NC_000004.11" in \
719710
results['NM_018717.4:c.1472_1473insTCAGCAGCAGCA']['validation_warnings']
720711

721712
def test_vv_series_10(self):
722713
variant = 'NC_000009.11:g.136132908_136132909TA='
723714
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
724715
print(results)
725-
726716
assert "NM_020469.3 contains 22 extra bases between c.*756_*757, and 2 extra bases between c.*797_*798, and 110 extra bases between c.*840_*841, and 2 extra bases between c.*4648_*4649, and 1 extra bases between c.260_262 than NC_000009.11" in \
727717
results['NM_020469.3:c.261del']['validation_warnings']
728718
assert "NM_020469.2 contains 1 extra bases between c.260_262 than NC_000009.11" in \
@@ -732,7 +722,6 @@ def test_vv_series_11(self):
732722
variant = 'NC_000019.10:g.50378563_50378564insTAC'
733723
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
734724
print(results)
735-
736725
assert "NM_007121.5 contains 3 extra bases between c.514_518 than NC_000019.10" in \
737726
results['NM_007121.5:c.515A>T']['validation_warnings']
738727
assert "NM_001256647.1 contains 3 extra bases between c.223_227 than NC_000019.10" in \
@@ -742,18 +731,30 @@ def test_vv_series_12(self):
742731
variant = 'NC_000007.13:g.149476664_149476666delinsTC'
743732
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
744733
print(results)
745-
746734
assert "NR_163594.1 contains 1 extra bases between n.1129_1131, and 1 fewer bases between n.11675_11676 than NC_000007.13" in \
747735
results['NR_163594.1:n.1122_1124delinsT']['validation_warnings']
748736

749737
def test_vv_series_13(self):
750738
variant = 'NC_000004.12:g.139889957_139889968del'
751739
results = self.vv.validate(variant, 'GRCh37', 'all').format_as_dict(test=True)
752740
print(results)
753-
754741
assert "NM_018717.4 contains 3 fewer bases between c.2276_2277, and 12 fewer bases between c.1467_1468 than NC_000004.12" in \
755742
results['NM_018717.4:c.1466_1468=']['validation_warnings']
756743

744+
def test_vv_series_14(self):
745+
variant = 'NM_000516.7:c.2780+73C>T'
746+
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
747+
print(results)
748+
assert "CDSError: Variant start position and/or end position are beyond the CDS end position and likely also beyond the end of the selected reference sequence" in \
749+
results['validation_warning_1']['validation_warnings']
750+
751+
def test_vv_series_15(self):
752+
variant = 'NM_000518.5:c.89+25del'
753+
results = self.vv.validate(variant, 'GRCh38', 'all').format_as_dict(test=True)
754+
print(results)
755+
assert "ExonBoundaryError: Position c.89+25 does not correspond with an exon boundary for transcript NM_000518.5" in \
756+
results['validation_warning_1']['validation_warnings']
757+
757758

758759
# <LICENSE>
759760
# Copyright (C) 2016-2022 VariantValidator Contributors

0 commit comments

Comments
 (0)