Skip to content

Inconsistent output between LOVD and VV endpoints as well as hg19 and hg38 input. #392

@ifokkema

Description

@ifokkema

Describe the bug
Not sure if related to #202.

Variant NC_000008.10:g.6673379del (hg19/GRCh37) provides very strange mappings when using the VV endpoint, but not when using the LOVD endpoint. There is a gap warning, but somehow both endpoints deal with this differently.

The LOVD endpoint produces results like I would expect; filtered output:

{
  "NC_000008.10:g.6673379del": {
    "NC_000008.10:g.6673379del": {
      "g_hgvs": "NC_000008.10:g.6673379del",
      "hgvs_t_and_p": {
        "NM_001289973.1": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_001289973.1:c.380C>T",
        },
        "NM_001289973.2": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_001289973.2:c.380C>T",
        },
        "NM_207411.4": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_207411.4:c.869C>T",
        },
        "NM_207411.5": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_207411.5:c.869C>T",
        }
      }
    }
  }
}

You can see that all mappings to transcripts are substitutions and not deletions (likely the result of the misalignment), but the mappings back to the genome are deletions again. So, what I put in, I also get out.

However, VV does something else entirely. Again, filtered output:

{
  "NM_001289973.1:c.380C>T": {
    "hgvs_transcript_variant": "NM_001289973.1:c.380C>T",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379G>A",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857G>A",
      }
    },
    "submitted_variant": "NC_000008.10:g.6673379del",
  },
  "NM_001289973.2:c.380C>T": {
    "hgvs_transcript_variant": "NM_001289973.2:c.380C>T",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379G>A",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857G>A",
      }
    },
    "submitted_variant": "NC_000008.10:g.6673379del",
  },
  "NM_207411.4:c.869C>T": {
    "hgvs_transcript_variant": "NM_207411.4:c.869C>T",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379G>A",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857G>A",
      }
    },
    "submitted_variant": "NC_000008.10:g.6673379del",
  },
  "NM_207411.5:c.869C>T": {
    "hgvs_transcript_variant": "NM_207411.5:c.869C>T",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379G>A",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857G>A",
      },
    },
    "submitted_variant": "NC_000008.10:g.6673379del",
  }
}

So, even though again the mapping to the transcripts results in substitutions, all of a sudden mapping back to the genome results in substitutions as well. So, when mapping the genomic variant to the transcript and then back, I lose my original variant description!

I thought this was maybe due to coding differences between VV and VF, but now I'm not so sure.

The LOVD endpoint again but this time with the hg38 variant displays a new issue:

{
  "NC_000008.11:g.6815857del": {
    "NC_000008.11:g.6815857del": {
      "g_hgvs": "NC_000008.11:g.6815857del",
      "hgvs_t_and_p": {
        "NM_001289973.1": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_001289973.1:c.380del",
        },
        "NM_001289973.2": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_001289973.2:c.380del",
        },
        "NM_207411.4": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_207411.4:c.869del",
        },
        "NM_207411.5": {
          "primary_assembly_loci": {
            "grch37": {
              "NC_000008.10": {
                "hgvs_genomic_description": "NC_000008.10:g.6673379del",
              }
            },
            "grch38": {
              "NC_000008.11": {
                "hgvs_genomic_description": "NC_000008.11:g.6815857del",
              }
            }
          },
          "t_hgvs": "NM_207411.5:c.869del",
        }
      }
    }
  }
}

Now, the mappings to the transcripts aren't substitutions anymore. There's also no gap warning.

Finally, the hg38 variant on the VV endpoint presents only deletions, and returns my input as well.

{
  "NM_001289973.1:c.380del": {
    "hgvs_transcript_variant": "NM_001289973.1:c.380del",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379_6673380del",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857del",
      }
    },
    "submitted_variant": "NC_000008.11:g.6815857del",
  },
  "NM_001289973.2:c.380del": {
    "hgvs_transcript_variant": "NM_001289973.2:c.380del",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379_6673380del",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857del",
      }
    },
    "submitted_variant": "NC_000008.11:g.6815857del",
  },
  "NM_207411.4:c.869del": {
    "hgvs_transcript_variant": "NM_207411.4:c.869del",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379_6673380del",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857del",
      }
    },
    "submitted_variant": "NC_000008.11:g.6815857del",
  },
  "NM_207411.5:c.869del": {
    "hgvs_transcript_variant": "NM_207411.5:c.869del",
    "primary_assembly_loci": {
      "grch37": {
        "hgvs_genomic_description": "NC_000008.10:g.6673379_6673380del",
      },
      "grch38": {
        "hgvs_genomic_description": "NC_000008.11:g.6815857del",
      }
    },
    "submitted_variant": "NC_000008.11:g.6815857del",
  }
}

Note that for the hg19 mappings, all of a sudden the deletions are ranges.

What could cause these inconsistencies?

  • VV returns genomic mappings that do not match the input, but only when using the hg19 variant.
  • When using the hg19 input, VV returns all substitutions. When using the hg38 input, VV returns all deletions, of which the hg19 mappings are deletions of two bases.
  • The hg38 mapping provided by the LOVD endpoint isn't handled the same as the same variant on hg19, either, but differently from how VV handles them.

To Reproduce
See links in the above report.

Expected behavior

  • No matter which endpoint I use, for one specific input, the given mappings should be the same for all endpoints.
  • My genomic input should also be returned in the output as a valid mapping.
  • The hg19 variant and its equivalent on hg38 should produce the same mappings.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions