#!/usr/bin/env python3 """ Example Python script to query SynVar and parse the results. Demonstrates how to use the SynVar API and extract variant information. """ import requests import xml.etree.ElementTree as ET import json from typing import Dict, List, Optional def query_synvar_xml(gene: str, variant: str, level: str = "protein", format_type: str = "xml", iso: bool = False, map_genome: bool = True) -> str: """ Query the SynVar API and return the response. Args: gene: Gene name or reference (e.g., "BRAF", "7") variant: Variant description (e.g., "V600E", "c.1799T>A") level: Level of variant (protein, transcript, genome, any) format_type: Output format (xml, json, beacon) iso: Whether to expand to all isoforms map_genome: Whether to require genome mapping Returns: Response content as string """ url = "https://synvar.sibils.org/generate/literature/fromMutation" params = { "ref": gene, "variant": variant, "level": level, "format": format_type, "iso": "true" if iso else "false", "map": "true" if map_genome else "false" } try: response = requests.get(url, params=params) response.raise_for_status() return response.text except requests.exceptions.RequestException as e: print(f"Error downloading: {e}") return None def parse_xml_response(xml_content: str) -> Dict: """ Parse XML response and extract variant information. Args: xml_content: XML response from SynVar Returns: Dictionary with parsed variant information """ try: root = ET.fromstring(xml_content) results = [] # Get all variant elements for variant in root.findall('.//variant'): variant_info = { 'valid': variant.get('valid'), 'mapped': variant.get('mapped'), 'gene_synonyms': [], 'protein_synonyms': [], 'hgvs': [], 'rsid': None, 'caid': None, 'isoforms': [] } # Get gene synonyms for synonym in variant.findall('.//gene-synonym-list/synonym'): variant_info['gene_synonyms'].append(synonym.text.strip()) # Get protein synonyms for synonym in variant.findall('.//protein-synonym-list/synonym'): variant_info['protein_synonyms'].append(synonym.text.strip()) # Get main HGVS for hgvs in variant.findall('./hgvs'): variant_info['hgvs'].append(hgvs.text.strip()) # Get rsID rsid_elem = variant.find('./rsid') if rsid_elem is not None: variant_info['rsid'] = rsid_elem.text.strip() # Get ClinGen Allele Registry ID caid_elem = variant.find('./caid') if caid_elem is not None: variant_info['caid'] = caid_elem.text.strip() # Get genome level information genome_level = variant.find('.//genome-level') if genome_level is not None: genome_info = { 'hgvs': [], 'syntactic_variations': [] } for hgvs in genome_level.findall('.//hgvs-list/hgvs'): assembly = hgvs.get('assembly', '') hgvs_text = hgvs.text.strip() genome_info['hgvs'].append({ 'assembly': assembly, 'notation': hgvs_text }) for syn_var in genome_level.findall('.//syntactic-variation'): genome_info['syntactic_variations'].append(syn_var.text.strip()) variant_info['genome_level'] = genome_info # Get isoform information for isoform in variant.findall('.//isoform-list/isoform'): isoform_info = { 'canonical': isoform.get('canonical-isoform') == 'true', 'transcript_level': None, 'protein_level': None } # Transcript level transcript = isoform.find('.//transcript-level') if transcript is not None: transcript_info = { 'hgvs': [], 'syntactic_variations': [] } for hgvs in transcript.findall('.//hgvs'): transcript_info['hgvs'].append(hgvs.text.strip()) for syn_var in transcript.findall('.//syntactic-variation'): transcript_info['syntactic_variations'].append(syn_var.text.strip()) isoform_info['transcript_level'] = transcript_info # Protein level protein = isoform.find('.//protein-level') if protein is not None: protein_info = { 'isoform_name': None, 'hgvs': [], 'syntactic_variations': [] } iso_name = protein.find('.//isoform-name') if iso_name is not None: protein_info['isoform_name'] = iso_name.text.strip() for hgvs in protein.findall('.//hgvs'): protein_info['hgvs'].append(hgvs.text.strip()) for syn_var in protein.findall('.//syntactic-variation'): protein_info['syntactic_variations'].append(syn_var.text.strip()) isoform_info['protein_level'] = protein_info variant_info['isoforms'].append(isoform_info) results.append(variant_info) return {'variants': results} except ET.ParseError as e: print(f"Variant failed to be normalized by SynVar: {e}") return None def query_synvar_json(gene: str, variant: str, level: str = "protein", iso: bool = False, map_genome: bool = True) -> Optional[Dict]: """ Query the SynVar API and return JSON response. Args: gene: Gene name or reference variant: Variant description level: Level of variant (protein, transcript, genome, any) iso: Whether to expand to all isoforms map_genome: Whether to require genome mapping Returns: Parsed JSON response as dictionary """ url = "https://synvar.sibils.org/generate/literature/fromMutation" params = { "ref": gene, "variant": variant, "level": level, "format": "json", "iso": "true" if iso else "false", "map": "true" if map_genome else "false" } try: response = requests.get(url, params=params) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: print(f"Error downloading: {e}") return None except json.JSONDecodeError as e: print(f"Error parsing JSON: {e}") return None def print_variant_info(variant_data: Dict): """Print variant information in a readable format.""" for variant in variant_data.get('variants', []): print(f"\n{'='*60}") print(f"Variant: Valid={variant['valid']}, Mapped={variant['mapped']}") if variant['gene_synonyms']: print(f"Gene synonyms: {', '.join(variant['gene_synonyms'])}") if variant['rsid']: print(f"dbSNP: {variant['rsid']}") if variant['caid']: print(f"ClinGen Allele Registry: {variant['caid']}") if variant['hgvs']: print(f"Main HGVS: {', '.join(variant['hgvs'])}") # Print genome level if 'genome_level' in variant: print("\n--- Genome Level ---") for hgvs_info in variant['genome_level']['hgvs']: print(f" {hgvs_info['assembly']}: {hgvs_info['notation']}") # Print isoforms for idx, isoform in enumerate(variant['isoforms'], 1): canonical = " (canonical)" if isoform['canonical'] else "" print(f"\n--- Isoform {idx}{canonical} ---") if isoform['protein_level']: print(f" Protein level:") if isoform['protein_level']['isoform_name']: print(f" Name: {isoform['protein_level']['isoform_name']}") for hgvs in isoform['protein_level']['hgvs']: print(f" HGVS: {hgvs}") # Print first few syntactic variations as examples syn_vars = isoform['protein_level']['syntactic_variations'][:5] if syn_vars: print(f" Syntactic variations (first 5): {', '.join(syn_vars)}") if isoform['transcript_level']: print(f" Transcript level:") for hgvs in isoform['transcript_level']['hgvs']: print(f" HGVS: {hgvs}") # Print first few syntactic variations as examples syn_vars = isoform['transcript_level']['syntactic_variations'][:5] if syn_vars: print(f" Syntactic variations (first 5): {', '.join(syn_vars)}") def main(): """Example usage of the SynVar API.""" # Example 1: Query with XML format and parse print("Example 1: Querying BRAF V600E (XML format)") print("-" * 60) xml_content = query_synvar_xml("BRAF", "V600E", "protein") if xml_content: variant_data = parse_xml_response(xml_content) if variant_data: print_variant_info(variant_data) # Example 2: Query with JSON format print("\n\n" + "="*60) print("Example 2: Querying JAK2 V617F (JSON format)") print("-" * 60) json_data = query_synvar_json("JAK2", "V617F", "protein") if json_data: # Extract relevant information from JSON variants = json_data.get('variant-list', {}).get('variant', []) if not isinstance(variants, list): variants = [variants] for variant in variants: print(f"\nValid: {variant.get('@valid')}, Mapped: {variant.get('@mapped')}") gene_syns = variant.get('gene-synonym-list', {}).get('synonym', []) if gene_syns: print(f"Gene synonyms: {', '.join(gene_syns if isinstance(gene_syns, list) else [gene_syns])}") rsid = variant.get('rsid') if rsid: print(f"dbSNP: {rsid}") caid = variant.get('caid') if caid: print(f"ClinGen: {caid}") # Get protein HGVS from first isoform isoforms = variant.get('isoform-list', {}).get('isoform', []) if not isinstance(isoforms, list): isoforms = [isoforms] if isoforms: protein_level = isoforms[0].get('protein-level', {}) hgvs_list = protein_level.get('hgvs-list', {}).get('hgvs', []) if hgvs_list: if not isinstance(hgvs_list, list): hgvs_list = [hgvs_list] print(f"Protein HGVS: {hgvs_list[0]}") # Example 3: Query with isoform expansion print("\n\n" + "="*60) print("Example 3: Querying TP53 R248W with isoform expansion") print("-" * 60) json_data = query_synvar_json("TP53", "R248W", "protein", iso=True) if json_data: variants = json_data.get('variant-list', {}).get('variant', []) if not isinstance(variants, list): variants = [variants] for variant in variants: isoforms = variant.get('isoform-list', {}).get('isoform', []) if not isinstance(isoforms, list): isoforms = [isoforms] print(f"Found {len(isoforms)} isoform(s)") for idx, isoform in enumerate(isoforms, 1): canonical = " (canonical)" if isoform.get('@canonical-isoform') == 'true' else "" protein_level = isoform.get('protein-level', {}) iso_name = protein_level.get('isoform-name', 'Unknown') hgvs_list = protein_level.get('hgvs-list', {}).get('hgvs', []) if hgvs_list and not isinstance(hgvs_list, list): hgvs_list = [hgvs_list] hgvs = hgvs_list[0] if hgvs_list else "N/A" print(f" Isoform {idx}{canonical}: {iso_name} - {hgvs}") if __name__ == "__main__": main()