The American College of Medical Genetics and Genomics (ACMG) have recommended that genetic variants that may be pathogenic or likely pathogenic in certain genes should be reported back to the patient. The latest list of genes can be found here. How do I assess whether a variant is pathogenic or likely pathogenic? Use this tool, which follows the recommendations of ACMG.
We can use GEMINI to find variants that may be pathogenic by creating a query that returns variants that are in the list of 58 genes and have some evidence of being pathogenic or likely pathogenic based on the ClinVar database. I have created a GEMINI database using the gnomAD exome variants (gnomad.exomes.r2.0.1.sites.vcf.gz) and will use it as an example.
# how many variants? gemini query -q 'select count(*) from variants' *.db 17009588 gemini query --header -q 'select chrom, start, end, ref, alt, gene, clinvar_sig, clinvar_disease_name, clinvar_dbsource, clinvar_dbsource_id, clinvar_origin, clinvar_dsdb, clinvar_dsdbid, clinvar_disease_acc, clinvar_in_locus_spec_db, clinvar_on_diag_assay, clinvar_causal_allele from variants where (gene == "ACTA2" OR gene == "ACTC1" OR gene == "APC" OR gene == "APOB" OR gene == "ATP7B" OR gene == "BMPR1A" OR gene == "BRCA1" OR gene == "BRCA2" OR gene == "CACNA1S" OR gene == "COL3A1" OR gene == "DSG2" OR gene == "DSP" OR gene == "FBN1" OR gene == "GLA" OR gene == "KCNH2" OR gene == "KCNQ1" OR gene == "LDLR" OR gene == "LMNA" OR gene == "MEN1" OR gene == "MLH1" OR gene == "MSH2" OR gene == "MSH6" OR gene == "MUTYH" OR gene == "MYBPC3" OR gene == "MYH11" OR gene == "MYH7" OR gene == "MYL2" OR gene == "MYL3" OR gene == "NF2" OR gene == "OTC" OR gene == "PCSK9" OR gene == "PKP2" OR gene == "PMS2" OR gene == "PRKAG2" OR gene == "PTEN" OR gene == "RB1" OR gene == "RET" OR gene == "RYR1" OR gene == "RYR2" OR gene == "SCN5A" OR gene == "SDHAF2" OR gene == "SDHB" OR gene == "SDHC" OR gene == "SDHD" OR gene == "SMAD3" OR gene == "SMAD4" OR gene == "STK11" OR gene == "TGFBR1" OR gene == "TGFBR2" OR gene == "TMEM43" OR gene == "TNNI3" OR gene == "TNNT2" OR gene == "TP53" OR gene == "TPM1" OR gene == "TSC1" OR gene == "TSC2" OR gene == "VHL" OR gene == "WT1" ) and clinvar_sig like "%pathogenic%" and clinvar_causal_allele == alt' *.db | head chrom start end ref alt gene clinvar_sig clinvar_disease_name clinvar_dbsource clinvar_dbsource_id clinvar_origin clinvar_dsdb clinvar_dsdbid clinvar_disease_acc clinvar_in_locus_spec_db clinvar_on_diag_assay clinvar_causal_allele chr10 90701550 90701551 G A ACTA2 pathogenic Aortic_aneurysm,_familial_thoracic_6|Thoracic_aortic_aneurysm_and_aortic_dissection OMIM_Allelic_Variant|UniProtKB_(protein) 102620.0001|P62736#VAR_045918 germline MedGen:OMIM|MedGen:Orphanet C2673186:611788|CN118826:ORPHA91387 RCV000019938.27|RCV000246692.1 1 0 A chr10 90703569 90703570 C T ACTA2 pathogenic,likely-pathogenic not_provided|Aortic_aneurysm,_familial_thoracic_6 UniProtKB_(protein) P62736#VAR_045916 germline MedGen|MedGen:OMIM CN221809|C2673186:611788 RCV000181019.2|RCV000234479.1 0 0 T chr15 35082634 35082635 A G ACTC1 uncertain,likely-pathogenic not_provided|Cardiovascular_phenotype . . germline MedGen|MedGen CN221809|CN230736 RCV000157802.1|RCV000245634.1 1 0 G chr15 35083363 35083364 C T ACTC1 pathogenic Dilated_cardiomyopathy_1R OMIM_Allelic_Variant|UniProtKB_(protein) 102540.0001|P68032#VAR_012860 germline MedGen:OMIM C3150681:613424 RCV000019988.26 1 0 T chr15 35085598 35085599 C T ACTC1 pathogenic,other Familial_hypertrophic_cardiomyopathy_11|Left_ventricular_noncompaction_4|Primary_familial_hypertrophic_cardiomyopathy|not_provided OMIM_Allelic_Variant|UniProtKB_(protein) 102540.0009|P68032#VAR_012857 germline MedGen:OMIM|MedGen|MedGen:Orphanet:SNOMED_CT|MedGen C2677506:612098|C3150682|C0949658:ORPHA155:83978005|CN221809 RCV000019996.31|RCV000019997.27|RCV000029295.3|RCV000157780.3 1 0 T chr15 35085618 35085619 T C ACTC1 likely-pathogenic Familial_hypertrophic_cardiomyopathy_11 . . germline MedGen:OMIM C2677506:612098 RCV000201495.1 0 0 C chr15 35085631 35085632 G A ACTC1 uncertain,pathogenic Familial_hypertrophic_cardiomyopathy_11|not_specified OMIM_Allelic_Variant|UniProtKB_(protein) 102540.0004|P68032#VAR_045924 germline MedGen:OMIM|MedGen C2677506:612098|CN169374 RCV000019991.28|RCV000038323.3 1 0 A chr5 112090656 112090657 C T APC pathogenic,other Hereditary_cancer-predisposing_syndrome|Familial_adenomatous_polyposis_1 . . germline MedGen:SNOMED_CT|MedGen:OMIM C0027672:699346009|C2713442:175100 RCV000164002.2|RCV000227124.1 1 0 T chr5 112102883 112102884 A G APC pathogenic Hereditary_cancer-predisposing_syndrome . . germline MedGen:SNOMED_CT C0027672:699346009 RCV000163246.2 1 0 G gemini query --header -q 'select chrom, start, end, ref, alt, gene, clinvar_sig, clinvar_disease_name, clinvar_dbsource, clinvar_dbsource_id, clinvar_origin, clinvar_dsdb, clinvar_dsdbid, clinvar_disease_acc, clinvar_in_locus_spec_db, clinvar_on_diag_assay, clinvar_causal_allele from variants where (gene == "ACTA2" OR gene == "ACTC1" OR gene == "APC" OR gene == "APOB" OR gene == "ATP7B" OR gene == "BMPR1A" OR gene == "BRCA1" OR gene == "BRCA2" OR gene == "CACNA1S" OR gene == "COL3A1" OR gene == "DSG2" OR gene == "DSP" OR gene == "FBN1" OR gene == "GLA" OR gene == "KCNH2" OR gene == "KCNQ1" OR gene == "LDLR" OR gene == "LMNA" OR gene == "MEN1" OR gene == "MLH1" OR gene == "MSH2" OR gene == "MSH6" OR gene == "MUTYH" OR gene == "MYBPC3" OR gene == "MYH11" OR gene == "MYH7" OR gene == "MYL2" OR gene == "MYL3" OR gene == "NF2" OR gene == "OTC" OR gene == "PCSK9" OR gene == "PKP2" OR gene == "PMS2" OR gene == "PRKAG2" OR gene == "PTEN" OR gene == "RB1" OR gene == "RET" OR gene == "RYR1" OR gene == "RYR2" OR gene == "SCN5A" OR gene == "SDHAF2" OR gene == "SDHB" OR gene == "SDHC" OR gene == "SDHD" OR gene == "SMAD3" OR gene == "SMAD4" OR gene == "STK11" OR gene == "TGFBR1" OR gene == "TGFBR2" OR gene == "TMEM43" OR gene == "TNNI3" OR gene == "TNNT2" OR gene == "TP53" OR gene == "TPM1" OR gene == "TSC1" OR gene == "TSC2" OR gene == "VHL" OR gene == "WT1" ) and clinvar_sig like "%pathogenic%" and clinvar_causal_allele == alt' *.db | wc -l 1623
Obviously you'll have to consider the inheritance model of the disorder, the evidence for pathogenicity, genotype quality, etc. The query above may find possible incidental findings. The list of genes will likely be changed in the future, so I wrote a script that can generate the above query based on a text file with a list of genes. I have shared incidental_finding.tsv here.
head incidental_finding.tsv ACTA2 ACTC1 APC APOB ATP7B BMPR1A BRCA1 BRCA2 CACNA1S COL3A1 create_query.pl incidental_finding.tsv gemini query --header -q 'select chrom, start, end, ref, alt, gene, clinvar_sig, clinvar_disease_name, clinvar_dbsource, clinvar_dbsource_id, clinvar_origin, clinvar_dsdb, clinvar_dsdbid, clinvar_disease_acc, clinvar_in_locus_spec_db, clinvar_on_diag_assay, clinvar_causal_allele from variants where (gene == "ACTA2" OR gene == "ACTC1" OR gene == "APC" OR gene == "APOB" OR gene == "ATP7B" OR gene == "BMPR1A" OR gene == "BRCA1" OR gene == "BRCA2" OR gene == "CACNA1S" OR gene == "COL3A1" OR gene == "DSG2" OR gene == "DSP" OR gene == "FBN1" OR gene == "GLA" OR gene == "KCNH2" OR gene == "KCNQ1" OR gene == "LDLR" OR gene == "LMNA" OR gene == "MEN1" OR gene == "MLH1" OR gene == "MSH2" OR gene == "MSH6" OR gene == "MUTYH" OR gene == "MYBPC3" OR gene == "MYH11" OR gene == "MYH7" OR gene == "MYL2" OR gene == "MYL3" OR gene == "NF2" OR gene == "OTC" OR gene == "PCSK9" OR gene == "PKP2" OR gene == "PMS2" OR gene == "PRKAG2" OR gene == "PTEN" OR gene == "RB1" OR gene == "RET" OR gene == "RYR1" OR gene == "RYR2" OR gene == "SCN5A" OR gene == "SDHAF2" OR gene == "SDHB" OR gene == "SDHC" OR gene == "SDHD" OR gene == "SMAD3" OR gene == "SMAD4" OR gene == "STK11" OR gene == "TGFBR1" OR gene == "TGFBR2" OR gene == "TMEM43" OR gene == "TNNI3" OR gene == "TNNT2" OR gene == "TP53" OR gene == "TPM1" OR gene == "TSC1" OR gene == "TSC2" OR gene == "VHL" OR gene == "WT1" ) and clinvar_sig like "%pathogenic%" and clinvar_causal_allele == alt' *.db
And the script.
#!/usr/bin/env perl use strict; use warnings; my $usage = "Usage: $0 <gene_list.txt>\n"; my $infile = shift or die $usage; my $query = "gemini query --header -q 'select chrom, start, end, ref, alt, gene, clinvar_sig, clinvar_disease_name, clinvar_dbsource, clinvar_dbsource_id, clinvar_origin, clinvar_dsdb, clinvar_dsdbid, clinvar_disease_acc, clinvar_in_locus_spec_db, clinvar_on_diag_assay, clinvar_causal_allele from variants where ("; open(IN, '<', $infile) || die "Could not open $infile: $!\n"; while(<IN>){ chomp; $query .= "gene == \"$_\" OR "; } close(IN); $query =~ s/OR\s$/) and clinvar_sig like "%pathogenic%" and clinvar_causal_allele == alt' *.db/; print "$query\n";
This work is licensed under a Creative Commons
Attribution 4.0 International License.