InterPro API

GET /api/?format=api

HTTP 200 OK
Allow: GET, HEAD
Content-Type: application/json
InterPro-Version: 107.0
InterPro-Version-Minor: 0
Vary: Accept

{
    "endpoints": [
        "entry",
        "protein",
        "structure",
        "taxonomy",
        "proteome",
        "set",
        "utils"
    ],
    "databases": {
        "unreviewed": {
            "canonical": "unreviewed",
            "name": "UniProtKB/TrEMBL",
            "description": "UniProtKB/TrEMBL is a computer-annotated protein sequence database complementing the UniProtKB/Swiss-Prot Protein Knowledgebase. UniProtKB/TrEMBL contains the translations of all coding sequences (CDS) present in the EMBL/GenBank/DDBJ Nucleotide Sequence Databases and also protein sequences extracted from the literature or submitted to UniProtKB/Swiss-Prot. The database is enriched with automated classification and annotation.",
            "version": "2025_04",
            "releaseDate": "2025-10-15T00:00:00Z",
            "type": "protein"
        },
        "uniprot": {
            "canonical": "uniprot",
            "name": "UniProtKB",
            "description": "UniProt is a freely accessible database of protein sequence and functional information, many entries being derived from genome sequencing projects",
            "version": "2025_04",
            "releaseDate": "2025-10-15T00:00:00Z",
            "type": "protein"
        },
        "reviewed": {
            "canonical": "reviewed",
            "name": "UniProtKB/Swiss-Prot",
            "description": "UniProtKB/Swiss-Prot is a high quality manually annotated and non-redundant protein sequence database, which brings together experimental results, computed features and scientific conclusions. UniProtKB/Swiss-Prot provides annotated entries for many species, but concentrates on the annotation of entries from model organisms of distinct taxonomic groups to ensure the presence of high quality annotation for representative members of all protein families.",
            "version": "2025_04",
            "releaseDate": "2025-10-15T00:00:00Z",
            "type": "protein"
        },
        "prints": {
            "canonical": "prints",
            "name": "PRINTS",
            "description": "PRINTS is a compendium of protein fingerprints. A fingerprint is a group of conserved motifs used to characterise a protein family or domain. PRINTS is based at the University of Manchester, UK.",
            "version": "42.0",
            "releaseDate": "2012-06-14T00:00:00Z",
            "type": "entry"
        },
        "pirsf": {
            "canonical": "pirsf",
            "name": "PIRSF",
            "description": "PIRSF protein classification system is a network with multiple levels of sequence diversity from superfamilies to subfamilies that reflects the evolutionary relationship of full-length proteins and domains. PIRSF is based at the Protein Information Resource, Georgetown University Medical Centre, Washington DC, US.",
            "version": "3.10",
            "releaseDate": "2020-04-07T00:00:00Z",
            "type": "entry"
        },
        "profile": {
            "canonical": "profile",
            "name": "PROSITE profiles",
            "description": "PROSITE is a database of protein families and domains. It consists of biologically significant sites, patterns and profiles that help to reliably identify to which known protein family a new sequence belongs. PROSITE is based at the Swiss Institute of Bioinformatics (SIB), Geneva, Switzerland.",
            "version": "2025_01",
            "releaseDate": "2025-02-05T00:00:00Z",
            "type": "entry"
        },
        "antifam": {
            "canonical": "antifam",
            "name": "AntiFam",
            "description": "AntiFam is a resource of profile-HMMs designed to identify spurious protein predictions. AntiFam is based at EMBL-EBI",
            "version": "8.0",
            "releaseDate": "2024-09-23T00:00:00Z",
            "type": "entry"
        },
        "pfam": {
            "canonical": "pfam",
            "name": "Pfam",
            "description": "Pfam is a large collection of multiple sequence alignments and hidden Markov models covering many common protein domains. Pfam is based at EMBL-EBI, Hinxton, UK.",
            "version": "38.0",
            "releaseDate": "2025-09-01T00:00:00Z",
            "type": "entry"
        },
        "prosite": {
            "canonical": "prosite",
            "name": "PROSITE patterns",
            "description": "PROSITE is a database of protein families and domains. It consists of biologically significant sites, patterns and profiles that help to reliably identify to which known protein family a new sequence belongs. PROSITE is based at the Swiss Institute of Bioinformatics (SIB), Geneva, Switzerland.",
            "version": "2025_01",
            "releaseDate": "2025-02-05T00:00:00Z",
            "type": "entry"
        },
        "panther": {
            "canonical": "panther",
            "name": "PANTHER",
            "description": "PANTHER is a large collection of protein families that have been subdivided into functionally related subfamilies, using human expertise. These subfamilies model the divergence of specific functions within protein families, allowing more accurate association with function, as well as inference of amino acids important for functional specificity. Hidden Markov models (HMMs) are built for each family and subfamily for classifying additional protein sequences. PANTHER is based at the University of Southern California, CA, US.",
            "version": "19.0",
            "releaseDate": "2024-06-20T00:00:00Z",
            "type": "entry"
        },
        "ncbifam": {
            "canonical": "ncbifam",
            "name": "NCBIFAM",
            "description": "NCBIfam is a collection of protein families, featuring curated multiple sequence alignments, hidden Markov models (HMMs) and annotation, which provides a tool for identifying functionally related proteins based on sequence homology. NCBIfam is maintained at the National Center for Biotechnology Information (Bethesda, MD). NCBIfam includes models from TIGRFAMs, another database of protein families developed at The Institute for Genomic Research, then at the J. Craig Venter Institute (Rockville, MD, US).",
            "version": "17.0",
            "releaseDate": "2024-12-16T00:00:00Z",
            "type": "entry"
        },
        "sfld": {
            "canonical": "sfld",
            "name": "SFLD",
            "description": "SFLD (Structure-Function Linkage Database) is a hierarchical classification of enzymes that relates specific sequence-structure features to specific chemical capabilities.",
            "version": "4",
            "releaseDate": "2018-09-07T00:00:00Z",
            "type": "entry"
        },
        "interpro": {
            "canonical": "interpro",
            "name": "InterPro",
            "description": "InterPro provides functional analysis of proteins by classifying them into families and predicting domains and important sites. We combine protein signatures from a number of member databases into a single searchable resource, capitalising on their individual strengths to produce a powerful integrated database and diagnostic tool. To classify proteins in this way, InterPro uses predictive models, known as signatures, provided by several different databases (referred to as member databases) that make up the InterPro consortium.",
            "version": "107.0",
            "releaseDate": "2025-10-16T00:00:00Z",
            "type": "entry"
        },
        "hamap": {
            "canonical": "hamap",
            "name": "HAMAP",
            "description": "HAMAP stands for High-quality Automated and Manual Annotation of Proteins. HAMAP profiles are manually created by expert curators. They identify proteins that are part of well-conserved protein families or subfamilies. HAMAP is based at the SIB Swiss Institute of Bioinformatics, Geneva, Switzerland.",
            "version": "2025_01",
            "releaseDate": "2025-02-05T00:00:00Z",
            "type": "entry"
        },
        "smart": {
            "canonical": "smart",
            "name": "SMART",
            "description": "SMART (a Simple Modular Architecture Research Tool) allows the identification and annotation of genetically mobile domains and the analysis of domain architectures. SMART is based at EMBL, Heidelberg, Germany.",
            "version": "9.0",
            "releaseDate": "2020-02-14T00:00:00Z",
            "type": "entry"
        },
        "ssf": {
            "canonical": "ssf",
            "name": "SUPERFAMILY",
            "description": "SUPERFAMILY is a library of profile hidden Markov models that represent all proteins of known structure. The library is based on the SCOP classification of proteins: each model corresponds to a SCOP domain and aims to represent the entire SCOP superfamily that the domain belongs to. SUPERFAMILY is based at the University of Bristol, UK.",
            "version": "1.75",
            "releaseDate": "2010-11-08T00:00:00Z",
            "type": "entry"
        },
        "cdd": {
            "canonical": "cdd",
            "name": "CDD",
            "description": "CDD is a protein annotation resource that consists of a collection of annotated multiple sequence alignment models for ancient domains and full-length proteins. These are available as position-specific score matrices (PSSMs) for fast identification of conserved domains in protein sequences via RPS-BLAST. CDD content includes NCBI-curated domain models, which use 3D-structure information to explicitly define domain boundaries and provide insights into sequence/structure/function relationships, as well as domain models imported from a number of external source databases.",
            "version": "3.21",
            "releaseDate": "2024-04-18T00:00:00Z",
            "type": "entry"
        },
        "cathgene3d": {
            "canonical": "cathgene3d",
            "name": "CATH-Gene3D",
            "description": "The CATH-Gene3D database describes protein families and domain architectures in complete genomes. Protein families are formed using a Markov clustering algorithm, followed by multi-linkage clustering according to sequence identity. Mapping of predicted structure and sequence domains is undertaken using hidden Markov models libraries representing CATH and Pfam domains. CATH-Gene3D is based at University College, London, UK.",
            "version": "4.3.0",
            "releaseDate": "2020-10-21T00:00:00Z",
            "type": "entry"
        },
        "signalp_e": {
            "canonical": "signalp_e",
            "name": "SignalP_Euk",
            "description": "SignalP predicts the presence and location of signal peptide cleavage sites in amino acid sequences from different organisms",
            "version": "4.1",
            "releaseDate": "2020-05-23T00:32:31Z",
            "type": "feature"
        },
        "signalp_g-": {
            "canonical": "signalp_g-",
            "name": "SignalP_Gram_negative",
            "description": "SignalP predicts the presence and location of signal peptide cleavage sites in amino acid sequences from different organisms",
            "version": "4.1",
            "releaseDate": "2020-05-23T00:32:41Z",
            "type": "feature"
        },
        "signalp_g+": {
            "canonical": "signalp_g+",
            "name": "SignalP_Gram_positive",
            "description": "SignalP predicts the presence and location of signal peptide cleavage sites in amino acid sequences from different organisms",
            "version": "4.1",
            "releaseDate": "2020-05-23T00:32:37Z",
            "type": "feature"
        },
        "tmhmm": {
            "canonical": "tmhmm",
            "name": "TMHMM",
            "description": "Prediction of transmembrane helices in proteins",
            "version": "2.0c",
            "releaseDate": "2020-05-23T00:32:28Z",
            "type": "feature"
        },
        "pfam-n": {
            "canonical": "pfam-n",
            "name": "Pfam-N",
            "description": "Pfam-N is a collection of protein sequence annotations identified by ProtENN, a machine learning technique that predicts the function of proteins. ProtENN is developed at Google Research.",
            "version": "37.0",
            "releaseDate": "2024-07-25T00:00:00Z",
            "type": "feature"
        },
        "phobius": {
            "canonical": "phobius",
            "name": "Phobius",
            "description": "A combined transmembrane topology and signal peptide predictor",
            "version": "1.01",
            "releaseDate": "2020-05-23T00:32:24Z",
            "type": "feature"
        },
        "mobidblt": {
            "canonical": "mobidblt",
            "name": "MobiDB Lite",
            "description": "MobiDB offers a centralized resource for annotations of intrinsic protein disorder. The database features three levels of annotation: manually curated, indirect and predicted. The different sources present a clear tradeoff between quality and coverage. By combining them all into a consensus annotation, MobiDB aims at giving the best possible picture of the “disorder landscape” of a given protein of interest.",
            "version": "4.0",
            "releaseDate": "2024-10-03T00:00:00Z",
            "type": "feature"
        },
        "elm": {
            "canonical": "elm",
            "name": "ELM",
            "description": "The Eukaryotic Linear Motif (ELM) resource provides a comprehensive, regularly updated and well-organised repository of manually curated, experimentally validated short linear motifs (SLiMs). ELM is based at EMBL, Heidelberg, Germany.",
            "version": "2023.04.11",
            "releaseDate": "2023-04-11T00:00:00Z",
            "type": "feature"
        },
        "coils": {
            "canonical": "coils",
            "name": "COILS",
            "description": "Prediction of Coiled Coil Regions in Proteins",
            "version": "2.2.1",
            "releaseDate": "2023-11-09T00:00:00Z",
            "type": "feature"
        },
        "cathfunfam": {
            "canonical": "cathfunfam",
            "name": "CATH-FunFam",
            "description": "The homologous superfamilies in CATH-Gene3D can often be functionally and structurally diverse even though they share a conserved structural core. Therefore, the superfamilies have been sub-classified into functional families (FunFams) using a subclassification protocol purely based on sequence patterns. Relatives within these FunFams are likely to share highly similar structures and functions. FunFams are useful in function prediction and in providing information on the evolution of function. FunFams are part of the CATH-Gene3D database, based at University College, London, UK.",
            "version": "4.3.0",
            "releaseDate": "2020-10-21T00:00:00Z",
            "type": "feature"
        },
        "priam": {
            "canonical": "priam",
            "name": "PRIAM",
            "description": "ENZYME-SPECIFIC PROFILES for metabolic pathway prediction.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "pirsr": {
            "canonical": "pirsr",
            "name": "PIRSR",
            "description": "PIR Site Rules (PIRSR) are manually curated annotation rules based on the PIRSF protein classification system. They provide annotation of functional residues within experimentally-uncharacterized proteins. PIRSR is based at the Protein Information Resource, Georgetown University Medical Centre, Washington DC, US.",
            "version": "2025_01",
            "releaseDate": "2025-02-05T00:00:00Z",
            "type": "other"
        },
        "pdb": {
            "canonical": "pdb",
            "name": "PDB",
            "description": "The Protein Data Bank (PDB) is a repository for the 3-D structural data of large biological molecules, such as proteins and nucleic acids.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "prositedoc": {
            "canonical": "prositedoc",
            "name": "PROSITE doc",
            "description": "PROSITE is a database of protein families and domains. It consists of biologically significant sites, patterns and profiles that help to reliably identify to which known protein family a new sequence belongs. PROSITE is base at the Swiss Institute of Bioinformatics (SIB), Geneva, Switzerland.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "reactome": {
            "canonical": "reactome",
            "name": "Reactome",
            "description": "Reactome is pathway database which provides intuitive bioinformatics tools for the visualisation, interpretation and analysis of pathway knowledge.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "metacyc": {
            "canonical": "metacyc",
            "name": "MetaCyc",
            "description": "MetaCyc is a curated database of experimentally elucidated metabolic pathways from all domains of life. MetaCyc contains pathways involved in both primary and secondary metabolism, as well as associated metabolites, reactions, enzymes, and genes. The goal of MetaCyc is to catalog the universe of metabolism by storing a representative sample of each experimentally elucidated pathway.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "iuphar": {
            "canonical": "iuphar",
            "name": "IUPHAR receptor code",
            "description": null,
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "gp": {
            "canonical": "gp",
            "name": "Genome Properties",
            "description": "Genome properties (GP) is an annotation system whereby functional attributes can be assigned to a genome, based on the presence of a defined set of protein family markers within that genome. For example, a species can be proposed to synthesise proline if it can be shown that the genome for that species encodes all the necessary proteins required to carry out the various biochemical steps in the proline biosynthesis pathway.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "go": {
            "canonical": "go",
            "name": "Gene Ontology",
            "description": "The Gene Ontology (GO) describes knowledge of the biological domain with respect to three aspects: Molecular function, Biological process, and Cellular component.",
            "version": "2025-09-24",
            "releaseDate": "2025-09-24T02:23:34Z",
            "type": "other"
        },
        "ec": {
            "canonical": "ec",
            "name": "ENZYME",
            "description": "ENZYME is a repository of information relative to the nomenclature of enzymes. It is primarily based on the recommendations of the Nomenclature Committee of the International Union of Biochemistry and Molecular Biology (IUBMB) and it describes each type of characterized enzyme for which an EC (Enzyme Commission) number has been provided.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        },
        "cazy": {
            "canonical": "cazy",
            "name": "CAZy",
            "description": "The CAZy database describes the families of structurally-related catalytic and carbohydrate-binding modules (or functional domains) of enzymes that degrade, modify, or create glycosidic bonds.",
            "version": null,
            "releaseDate": null,
            "type": "other"
        }
    },
    "sources": {
        "mysql": {
            "server": "pg-interpro-rel-test",
            "status": "OK"
        },
        "elasticsearch": {
            "server": "hh-interpro-rel-es8-01",
            "status": "OK"
        },
        "cache": {
            "server": "redis://wp-p1m2-c4.ebi.ac.uk:6379/1",
            "status": "enabled"
        }
    }
}

General Handler