jaeswift-website/api/data/awesomelist/inoue0426--awesome-computational-biology.json

1 line
No EOL
44 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{"slug": "inoue0426--awesome-computational-biology", "title": "Awesome Computational Biology", "description": "Awesome list of computational biology.", "github_url": "https://github.com/inoue0426/awesome-computational-biology", "stars": "122", "tag": "Miscellaneous", "entry_count": 242, "subcategory_count": 13, "subcategories": [{"name": "Table of Contents", "parent": "", "entries": [{"name": "Awesome Computational Biology", "url": "#awesome-computational-biology-", "description": ""}]}, {"name": "scRNA", "parent": "Databases", "entries": [{"name": "CZ CELLxGENE", "url": "https://cellxgene.cziscience.com/", "description": "Single-cell dataset repository and interactive explorer from the Chan Zuckerberg Initiative."}, {"name": "Gene Expression Omnibus", "url": "https://www.ncbi.nlm.nih.gov/geo/", "description": "Public functional genomics database."}, {"name": "Human Cell Atlas", "url": "https://www.humancellatlas.org/", "description": "Open global atlas of all cells in the human body."}, {"name": "Single Cell PORTAL", "url": "https://singlecell.broadinstitute.org/single_cell", "description": "Public database for single-cell RNA."}, {"name": "Single Cell Expression Atlas", "url": "https://www.ebi.ac.uk/gxa/sc/home", "description": "Public database for single-cell RNA."}]}, {"name": "Compound", "parent": "Databases", "entries": [{"name": "PubChem", "url": "https://pubchem.ncbi.nlm.nih.gov/", "description": "One of the largest chemical databases (compounds, genes, and proteins)."}, {"name": "ChEBI", "url": "https://www.ebi.ac.uk/chebi/", "description": "Database focused on small chemical compounds."}, {"name": "ChEMBL", "url": "https://www.ebi.ac.uk/chembl/", "description": "Bioactive molecules with drug-like properties."}, {"name": "ChemSpider", "url": "http://www.chemspider.com/", "description": "Chemical structure database."}, {"name": "DrugTargetCommons", "url": "https://drugtargetcommons.fimm.fi/", "description": "Community platform for curating and integrating experimental bioactivity data across drugs and targets."}, {"name": "HMDB (Human Metabolome Database)", "url": "https://hmdb.ca/", "description": "Comprehensive database of small molecule metabolites found in the human body."}, {"name": "KEGG COMPOUND", "url": "https://www.genome.jp/kegg/compound/", "description": "Collection of small molecules and biopolymers."}, {"name": "LIPID MAPS", "url": "https://www.lipidmaps.org/databases/lmsd/overview", "description": "Database of lipids."}, {"name": "Rhea", "url": "https://www.rhea-db.org/", "description": "Database of chemical reactions."}, {"name": "DrugCentral", "url": "http://drugcentral.org/", "description": "Online drug compendium with drug mode of action and indication information."}, {"name": "Drug Repurposing Hub", "url": "https://repo-hub.broadinstitute.org/repurposing#download-data", "description": "Collections of drug repurposing data (drug, MoA, target, etc)."}, {"name": "Therapeutic Target Database", "url": "https://idrblab.net/ttd/full-data-download", "description": "Drug-target, target-disease, and drug-disease datasets."}, {"name": "ZINC ligand discovery database", "url": "https://zinc.docking.org/", "description": "Free database of commercially-available compounds for virtual screening."}]}, {"name": "Pathway", "parent": "Databases", "entries": [{"name": "PathwayCommons", "url": "https://www.pathwaycommons.org/", "description": "Database of pathways and interactions."}, {"name": "KEGG PATHWAY", "url": "https://www.genome.jp/kegg/pathway.html", "description": "Collection of pathway maps."}, {"name": "WikiPathways", "url": "https://wikipathways.org/", "description": "Database of biological pathways."}, {"name": "Reactome", "url": "https://reactome.org/", "description": "Expert-curated, peer-reviewed pathway database with detailed reaction mechanisms."}, {"name": "BioCyc", "url": "https://biocyc.org/", "description": "Collection of pathway/genome databases across thousands of organisms."}, {"name": "SIGNOR", "url": "https://signor.uniroma2.it/", "description": "Database of causal signaling interactions and pathways."}, {"name": "MSigDB (Molecular Signatures Database)", "url": "https://www.gsea-msigdb.org/gsea/msigdb", "description": "Curated gene sets derived from pathways and biological processes."}]}, {"name": "Mass Spectra", "parent": "Databases", "entries": [{"name": "MassBank", "url": "http://www.massbank.jp/", "description": "Open source databases and tools for mass spectrometry reference spectra."}, {"name": "MoNA MassBank of North America", "url": "https://mona.fiehnlab.ucdavis.edu/", "description": "Meta-database of metabolite mass spectra, metadata, and associated compounds."}]}, {"name": "Protein", "parent": "Databases", "entries": [{"name": "THE HUMAN PROTEIN ATLAS", "url": "https://www.proteinatlas.org/", "description": "Comprehensive human protein database (cells, tissues, organs)."}, {"name": "PROTEIN DATA BANK (PDB)", "url": "https://www.rcsb.org/", "description": "3D structures of proteins, nucleic acids, complexes."}, {"name": "UniProt", "url": "https://www.uniprot.org/", "description": "Functional information on proteins."}, {"name": "AlphaFold Protein Structure Database", "url": "https://alphafold.ebi.ac.uk/api-docs", "description": "3D protein structure predictions."}, {"name": "RCSB Protein Data Bank", "url": "https://www.rcsb.org/", "description": "Repository for structural data of biological molecules."}, {"name": "Critical Assessment of Structure Prediction (CASP)", "url": "https://predictioncenter.org/", "description": "Assessing methods for protein structure prediction."}, {"name": "Uniclust", "url": "https://uniclust.mmseqs.com/", "description": "Clustered protein sequence databases."}, {"name": "UniRef", "url": "https://www.uniprot.org/uniref/", "description": "Non-redundant sequence database clustering UniProtKB entries at multiple sequence identity thresholds."}, {"name": "CATH database", "url": "https://www.cathdb.info/", "description": "Hierarchical classification of protein domain structures."}, {"name": "SAbDab", "url": "https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab", "description": "Structural Antibody Database containing all antibody structures in the PDB."}, {"name": "OADB (Observed Antibody Space Database)", "url": "http://opig.stats.ox.ac.uk/webapps/oas/", "description": "Database of antibody sequences from immune repertoire sequencing."}, {"name": "InterPro", "url": "https://www.ebi.ac.uk/interpro/", "description": "Protein families, domains, and functional sites database integrating 14 member databases including Pfam and PROSITE."}, {"name": "Pfam", "url": "https://www.ebi.ac.uk/interpro/entry/pfam/", "description": "Database of protein families described by multiple sequence alignments and hidden Markov models."}, {"name": "NeXtProt", "url": "https://www.nextprot.org/", "description": "Expert knowledge base on human proteins with deep functional annotation, complementary to UniProt."}]}, {"name": "Genome", "parent": "Databases", "entries": [{"name": "ENCODE", "url": "https://www.encodeproject.org/", "description": "Encyclopedia of DNA Elements; regulatory and functional genomic elements across the genome."}, {"name": "Ensembl", "url": "https://www.ensembl.org/", "description": "Genome browser and annotation database for vertebrate and other eukaryotic genomes."}, {"name": "Human Genome Resources at NCBI", "url": "https://www.ncbi.nlm.nih.gov/projects/genome/guide/human/index.shtml", "description": "Database for genomics, proteomics, transcriptomics, and systems biology."}, {"name": "GenBank", "url": "https://www.ncbi.nlm.nih.gov/genbank/", "description": "NCBI's database of genetic sequences."}, {"name": "UCSC Genome Browser", "url": "https://genome.ucsc.edu/", "description": "UCSC's genome browser."}, {"name": "cBioPortal", "url": "https://www.cbioportal.org/", "description": "Cancer genomics database; aggregating many patient datasets."}, {"name": "10x Genomics Dataset", "url": "https://www.10xgenomics.com/resources/datasets", "description": "Collection of single-cell datasets."}, {"name": "The Genotype-Tissue Expression (GTEx)", "url": "https://gtexportal.org/home/", "description": "Human gene expression and regulation resource."}, {"name": "Dependency Map (DepMap)", "url": "https://depmap.org/portal/", "description": "CRISPR-Cas9 screens in cancer cell lines."}, {"name": "Catalogue Of Somatic Mutations In Cancer (COSMIC)", "url": "https://cancer.sanger.ac.uk/cosmic", "description": "Resource on somatic mutations in cancers."}, {"name": "MGnify", "url": "https://www.ebi.ac.uk/metagenomics/", "description": "Resource for metagenomic and metatranscriptomic data."}, {"name": "JASPAR", "url": "http://jaspar.genereg.net/", "description": "Database of transcription factor binding profiles."}, {"name": "gnomAD", "url": "https://gnomad.broadinstitute.org/", "description": "Genome Aggregation Database; genetic variation from large-scale sequencing projects."}, {"name": "Rfam", "url": "https://rfam.org/", "description": "Database of RNA families with sequence alignments and consensus structures."}, {"name": "ROADMAP Epigenomics", "url": "http://www.roadmapepigenomics.org/", "description": "Reference epigenome maps for 111 primary human cell types and tissues, including histone modifications, chromatin accessibility, and DNA methylation."}, {"name": "FANTOM5", "url": "https://fantom.gsc.riken.jp/5/", "description": "Functional annotation of mammalian genome; comprehensive atlas of active enhancers, promoters, and transcription start sites across human and mouse cell types."}]}, {"name": "Disease", "parent": "Databases", "entries": [{"name": "KEGG DRUG", "url": "https://www.genome.jp/kegg/drug/", "description": "Comprehensive, approved drug information."}, {"name": "DrugBank", "url": "https://go.drugbank.com/", "description": "Database of drugs and targets (University of Alberta)."}, {"name": "DisGeNET", "url": "https://www.disgenet.org/", "description": "Database of gene-disease associations integrating expert-curated and GWAS data."}, {"name": "OMIM (Online Mendelian Inheritance in Man)", "url": "https://www.omim.org/", "description": "Comprehensive database of human genes and genetic disorders."}, {"name": "Open Targets Platform", "url": "https://platform.opentargets.org/", "description": "Systematic target identification and prioritization platform integrating genetics, genomics, and drug data for drug discovery."}, {"name": "Human Phenotype Ontology (HPO)", "url": "https://hpo.jax.org/", "description": "Standardized vocabulary of phenotypic abnormalities in human disease, linking genes, variants, and clinical features."}, {"name": "DISEASES", "url": "https://diseases.jensenlab.org/", "description": "Genedisease association database integrating evidence from text mining, curated databases, and experimental data."}]}, {"name": "Interaction", "parent": "Databases", "entries": [{"name": "DGIdb", "url": "https://www.dgidb.org/", "description": "Drug-gene interactions and the druggable genome."}, {"name": "Comparative Toxicogenomics Database", "url": "http://ctdbase.org/", "description": "Chemical-gene interactions, chemical-disease and gene-disease associations, chemical-phenotype associations."}, {"name": "SNAP", "url": "https://snap.stanford.edu/biodata/datasets/10002/10002-ChG-Miner.html", "description": "Dataset of drug-gene interactions."}, {"name": "NCI60", "url": "https://dtp.cancer.gov/discovery_development/nci-60/", "description": "Focuses on 60 cancer cell lines and many drugs."}, {"name": "Genomics of Drug Sensitivity in Cancer (GDSC)", "url": "https://www.cancerrxgene.org/", "description": "Drug sensitivity for \\~1000 human cancer cell lines and hundreds of compounds."}, {"name": "Cancer Cell Line Encyclopedia", "url": "https://sites.broadinstitute.org/ccle/", "description": "Database of \\~1000 cancer cell lines."}, {"name": "CellMiner Cross Database (CellMinerCDB)", "url": "https://discover.nci.nih.gov/cellminercdb/", "description": "Integrates multiple cancer cell line databases."}, {"name": "STITCH", "url": "http://stitch.embl.de/", "description": "Chemical-protein interactions."}, {"name": "BindingDB", "url": "https://www.bindingdb.org/rwd/bind/index.jsp", "description": "Compounds and target database."}, {"name": "Davis kinase inhibitors DB", "url": "http://staff.cs.utu.fi/~aijrinas/dti/", "description": "Experimental kinase inhibitor binding affinity dataset for proteinligand interaction research."}, {"name": "Kinase Inhibitor Bioactivity Data (KIBA)", "url": "https://janeliascicomp.github.io/KIBA/", "description": "Integrated bioactivity scores for kinase inhibitors combining Ki, Kd, and IC50 measurements."}, {"name": "PDBBind", "url": "https://www.pdbbind-plus.org.cn/", "description": "Binding affinity data for biomolecular complexes."}, {"name": "STRING", "url": "https://string-db.org/", "description": "PPI networks for multiple organisms."}, {"name": "BioGRID", "url": "https://thebiogrid.org/", "description": "Protein, genetic, and chemical interactions."}, {"name": "HIPPIE", "url": "http://cbdm-01.zdv.uni-mainz.de/~mschaefer/hippie/", "description": "Human protein-protein interaction database."}, {"name": "IntAct", "url": "https://www.ebi.ac.uk/intact/home", "description": "Open-source molecular interaction database and analysis system from EMBL-EBI."}, {"name": "Drug Mechanism Database (DrugMechDB)", "url": "https://github.com/SuLab/DrugMechDB/tree/2.0.1", "description": "Mechanisms of action from drug to disease.", "stars": "71"}, {"name": "DRKG", "url": "https://github.com/gnn4dr/DRKG", "description": "Large-scale biological knowledge graph for drug discovery.", "stars": "678"}, {"name": "Hetionet", "url": "https://github.com/hetio/hetionet", "description": "Heterogeneous network integrating genes, diseases, drugs, pathways, and more.", "stars": "347"}, {"name": "PrimeKG", "url": "https://github.com/mims-harvard/PrimeKG", "description": "Multi-modal precision medicine knowledge graph integrating clinical, genetic, and drug data.", "stars": "723"}, {"name": "TRRUST", "url": "https://www.grnpedia.org/trrust/", "description": "Manually curated database of human and mouse transcriptional regulatory interactions between transcription factors and their target genes."}, {"name": "RegNetwork", "url": "http://www.regnetworkweb.org/", "description": "Database of gene regulatory networks covering transcription factortarget gene and miRNAgene interaction data across multiple species."}, {"name": "miRBase", "url": "https://www.mirbase.org/", "description": "Reference repository for microRNA gene annotations, sequences, and experimentally validated targets."}]}, {"name": "Clinical Trial", "parent": "Databases", "entries": [{"name": "ClinicalTrials.gov", "url": "https://clinicaltrials.gov/", "description": "Privately and publicly funded clinical studies."}, {"name": "ICD10", "url": "https://icd.who.int/browse10/2019/en", "description": "International Classification of Diseases, 10th revision."}, {"name": "EU Drug Regulating Authorities Clinical Trials DB (EudraCT)", "url": "https://eudract.ema.europa.eu/", "description": "European clinical trial database."}, {"name": "MIMIC-IV", "url": "https://mimic.mit.edu/", "description": "Freely accessible critical care database."}, {"name": "1000 Genomes Project", "url": "https://www.internationalgenome.org/", "description": "Reference panel of human genetic variation from 2,504 individuals across 26 populations."}, {"name": "BACE", "url": "https://www.kaggle.com/datasets/gokturkkoch/bace", "description": "Binary classification and regression dataset for β-secretase 1 (BACE-1) inhibitor binding affinity."}, {"name": "BEAT AML", "url": "https://biodev.github.io/BeatAML2/", "description": "Functional ex vivo drug sensitivity measurements paired with genomics for acute myeloid leukemia."}, {"name": "BindingDB Curated Sets", "url": "https://www.bindingdb.org/rwd/bind/chemsearch/marvin/SDFdownload.jsp?all_download=yes", "description": "Curated binding affinity datasets for proteinligand interaction benchmarking."}, {"name": "Cancer Therapeutics Response Portal (CTRP)", "url": "https://portals.broadinstitute.org/ctrp/", "description": "Drug sensitivity profiles across \\~900 cancer cell lines for >400 compounds."}, {"name": "ClinTox", "url": "https://tdcommons.ai/single_pred_tasks/tox/#clintox", "description": "Clinical toxicity dataset contrasting FDA-approved drugs with those that failed clinical trials due to toxicity."}, {"name": "CPTAC (Clinical Proteomic Tumor Analysis Consortium)", "url": "https://proteomics.cancer.gov/programs/cptac", "description": "Multi-omic proteogenomic datasets for multiple cancer types linking proteomics with genomics."}, {"name": "CrossDocked2020", "url": "https://arxiv.org/abs/2001.01037", "description": "Large-scale dataset for structure-based virtual screening."}, {"name": "FLIP (Fitness Landscape Inference for Proteins)", "url": "https://github.com/J-SNACKKB/FLIP", "description": "Benchmark collection of protein fitness landscape datasets for evaluating protein ML models.", "stars": "117"}, {"name": "Genomics of Drug Sensitivity in Cancer (GDSC)", "url": "https://www.cancerrxgene.org/", "description": "Drug sensitivity for \\~1000 human cancer cell lines and hundreds of compounds."}, {"name": "GuacaMol", "url": "https://github.com/BenevolentAI/guacamol", "description": "Benchmark suite for generative molecular design models.", "stars": "511"}, {"name": "LINCS L1000", "url": "https://lincsproject.org/LINCS/tools/workflows/find-the-best-place-to-obtain-the-lincs-l1000-data", "description": "Gene expression profiles (978 landmark genes) for >20,000 chemical and genetic perturbations across cell lines."}, {"name": "MoleculeNet", "url": "http://moleculenet.ai/", "description": "Benchmark datasets for molecular machine learning."}, {"name": "MOSES", "url": "https://github.com/molecularsets/moses", "description": "Benchmarking platform for molecular generation models.", "stars": "962"}, {"name": "NCI60", "url": "https://dtp.cancer.gov/discovery_development/nci-60/", "description": "Drug sensitivity benchmark across 60 diverse human cancer cell lines."}, {"name": "OGB (Open Graph Benchmark)", "url": "https://ogb.stanford.edu/", "description": "Large-scale graph ML benchmark suite including biological datasets such as ogbl-ppa (protein-protein associations) and ogbg-molhiv."}, {"name": "OpenBioLink", "url": "https://github.com/OpenBioLink/OpenBioLink", "description": "Benchmark datasets for biological knowledge graph completion.", "stars": "158"}, {"name": "PharmGKB", "url": "https://www.pharmgkb.org/", "description": "Curated pharmacogenomics dataset linking genetic variants to drug response phenotypes across thousands of drugs."}, {"name": "PK-DB", "url": "https://pk-db.com/", "description": "Open database of experimental pharmacokinetics (PK) and ADME data from clinical and preclinical studies."}, {"name": "PRISM", "url": "https://depmap.org/portal/prism/", "description": "Cancer drug sensitivity profiling of >4,500 drugs across >900 cancer cell lines using pooled-cell-line barcoding."}, {"name": "ProteinGym", "url": "https://github.com/OATML-Markslab/ProteinGym", "description": "Large-scale benchmark of deep mutational scanning assays for evaluating protein fitness landscape models.", "stars": "407"}, {"name": "QM9", "url": "https://figshare.com/collections/Quantum_chemistry_structures_and_properties_of_134_kilo_molecules/978904", "description": "Quantum chemistry properties for 134K stable small organic molecules computed at DFT level."}, {"name": "scIB (Single-cell Integration Benchmarks)", "url": "https://github.com/theislab/scib", "description": "Comprehensive benchmarking framework for single-cell data integration methods.", "stars": "408"}, {"name": "SIDER (Side Effect Resource)", "url": "http://sideeffects.embl.de/", "description": "Database of 1,430 approved drugs with their recorded adverse drug reactions across 27 system-organ classes."}, {"name": "Tabula Muris", "url": "https://tabula-muris.ds.czbiohub.org/", "description": "Comprehensive single-cell atlas of 20 mouse organs and tissues, enabling cross-tissue and cross-species comparisons."}, {"name": "Tabula Sapiens", "url": "https://tabula-sapiens-portal.ds.czbiohub.org/", "description": "Comprehensive human single-cell atlas of \\~500K cells from 24 organs and tissues across multiple donors."}, {"name": "TAPE (Tasks Assessing Protein Embeddings)", "url": "https://github.com/songlab-cal/tape", "description": "Benchmark suite of five biologically meaningful semi-supervised learning tasks for evaluating protein representations.", "stars": "734"}, {"name": "The Cancer Genome Atlas (TCGA)", "url": "https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga", "description": "Comprehensive multi-omics (genomics, transcriptomics, proteomics, methylation) dataset for 33 cancer types across \\~11,000 patients."}, {"name": "Therapeutics Data Commons (TDC)", "url": "https://tdcommons.ai/", "description": "Unified benchmark suite covering ADMET, drug-target interaction, drug response, and more."}, {"name": "Tox21", "url": "https://tripod.nih.gov/tox21/challenge/", "description": "12,707 compounds tested in 12 nuclear receptor and stress-response pathway biochemical assays for toxicity prediction."}, {"name": "UK Biobank", "url": "https://www.ukbiobank.ac.uk/", "description": "Large-scale biomedical database of \\~500K participants with genetic, imaging, and health data for population genetics and disease studies."}, {"name": "PubMed E-utilities (esearch/efetch)", "url": "https://www.nlm.nih.gov/dataguide/edirect/esearch.html", "description": "APIs for searching and retrieving biomedical literature from PubMed."}, {"name": "NCBI E-utilities", "url": "https://www.ncbi.nlm.nih.gov/books/NBK25501/", "description": "Unified APIs for accessing NCBI databases (Gene, GEO, SRA, PubChem, etc)."}, {"name": "UniProt REST API", "url": "https://www.uniprot.org/help/api", "description": "Programmatic access to protein sequence and functional annotation data."}, {"name": "Ensembl REST API", "url": "https://rest.ensembl.org/", "description": "API for genomic annotations, variants, genes, and comparative genomics."}, {"name": "KEGG REST API", "url": "https://www.kegg.jp/kegg/rest/keggapi.html", "description": "API for accessing KEGG pathways, compounds, genes, and reactions."}, {"name": "ChEMBL Web Services", "url": "https://www.ebi.ac.uk/chembl/ws", "description": "REST API for bioactive molecules, targets, and bioassays."}, {"name": "Open Targets Platform API", "url": "https://platform.opentargets.org/api", "description": "API for targetdisease associations integrating genetics, genomics, and drug data."}, {"name": "ClinicalTrials.gov API", "url": "https://clinicaltrials.gov/api/gui", "description": "API for querying clinical trial metadata and results."}, {"name": "Chemistry Development Kit", "url": "https://github.com/cdk/cdk", "description": "Cheminformatics software & machine learning tools.", "stars": "575"}, {"name": "Biopython", "url": "https://biopython.org/", "description": "Collection of Python tools for biological computation including sequence analysis, structure parsing, and database access."}, {"name": "FlashDeconv", "url": "https://github.com/cafferychen777/flashdeconv", "description": "High-performance spatial transcriptomics deconvolution (\\~1M spots in \\~3 min).", "stars": "14"}, {"name": "RDKit", "url": "https://github.com/rdkit/rdkit", "description": "Cheminformatics software & machine learning toolkit.", "stars": "3.3k"}, {"name": "DeepChem", "url": "https://github.com/deepchem/deepchem", "description": "Deep learning library for drug discovery, quantum chemistry, and materials science.", "stars": "6.6k"}, {"name": "ChatSpatial", "url": "https://github.com/cafferychen777/ChatSpatial", "description": "MCP server for spatial transcriptomics analysis via natural language.", "stars": "25"}, {"name": "Scanpy", "url": "https://scanpy.readthedocs.io/en/stable/", "description": "Python library for scRNA-seq analysis."}, {"name": "Seurat", "url": "https://satijalab.org/seurat/", "description": "R library for scRNA-seq analysis."}, {"name": "scvi-tools", "url": "https://scvi-tools.org/", "description": "Probabilistic models for single-cell omics data analysis."}, {"name": "CellTypist", "url": "https://github.com/Teichlab/celltypist", "description": "Automated cell type annotation for scRNA-seq.", "stars": "464"}, {"name": "Squidpy", "url": "https://squidpy.readthedocs.io/", "description": "Python library for spatial single-cell analysis."}, {"name": "GROMACS", "url": "https://www.gromacs.org/", "description": "Molecular dynamics simulation package for biochemical molecules."}, {"name": "MDAnalysis", "url": "https://www.mdanalysis.org/", "description": "Python library for analyzing and altering molecular dynamics simulation trajectories."}, {"name": "OpenMM", "url": "https://openmm.org/", "description": "High-performance toolkit for molecular simulation and GPU-accelerated MD."}, {"name": "scVelo", "url": "https://github.com/theislab/scvelo", "description": "RNA velocity estimation for single-cell transcriptomics, inferring the direction and speed of cell differentiation.", "stars": "495"}, {"name": "STAR", "url": "https://github.com/alexdobin/STAR", "description": "Ultrafast universal RNA-seq aligner with support for spliced alignment and single-cell quantification via STARsolo.", "stars": "2.2k"}, {"name": "kallisto", "url": "https://pachterlab.github.io/kallisto/", "description": "Near-optimal RNA-seq quantification using pseudoalignment for fast transcript abundance estimation."}, {"name": "Harmony", "url": "https://github.com/immunogenomics/harmony", "description": "Fast and scalable integration of single-cell data across datasets, conditions, technologies, and species.", "stars": "631"}, {"name": "Monocle3", "url": "https://cole-trapnell-lab.github.io/monocle3/", "description": "Single-cell trajectory analysis tool for learning developmental trajectories and ordering cells in pseudotime."}, {"name": "CellChat", "url": "https://github.com/sqjin/CellChat", "description": "Inference and analysis of cell-cell communication ligand-receptor networks from single-cell transcriptomics data.", "stars": "775"}, {"name": "SCENIC", "url": "https://github.com/aertslab/SCENIC", "description": "Single-cell regulatory network inference and clustering linking transcription factors to co-expressed gene modules.", "stars": "481"}, {"name": "DoubletFinder", "url": "https://github.com/chris-mcginnis-ucsf/DoubletFinder", "description": "Machine learning approach for detecting multiplet (doublet) artifacts in single-cell RNA-seq data.", "stars": "537"}]}, {"name": "Drug Discovery", "parent": "Machine Learning Tasks and Models", "entries": [{"name": "drGAT", "url": "https://github.com/inoue0426/drGAT", "description": "Attention-based model for drug response prediction with gene explainability.", "stars": "1"}, {"name": "MOFGCN", "url": "https://github.com/weiba/MOFGCN/tree/main", "description": "GCN + heterogeneous network.", "stars": "7"}, {"name": "DeepDSC", "url": "https://ieeexplore-ieee-org.ezp2.lib.umn.edu/stamp/stamp.jsp?tp=\\&arnumber=8723620\\&tag=1", "description": "Autoencoder + fully connected NN."}, {"name": "DGDRP", "url": "https://github.com/minwoopak/heteronet", "description": "Multi-view embedding neural network.", "stars": "0"}, {"name": "DeepAEG", "url": "https://github.com/zhejiangzhuque/DeepAEG", "description": "GNN embedding + attention mechanism.", "stars": "3"}, {"name": "RECOVER", "url": "https://github.com/RECOVERcoalition/Recover", "description": "Machine learning framework for predicting synergistic drug combination responses across cell lines.", "stars": "24"}, {"name": "TGSA", "url": "https://github.com/violet-sto/TGSA", "description": "Tumor gene set and attention-based model leveraging biological pathway knowledge for drug response prediction.", "stars": "23"}, {"name": "HiDRA", "url": "https://github.com/bsml320/HiDRA", "description": "Hierarchical network model incorporating gene and pathway-level information for cancer drug response prediction."}, {"name": "DeepPurpose", "url": "https://github.com/kexinhuang12345/DeepPurpose", "description": "Deep learning library for drug repurposing.", "stars": "1.1k"}, {"name": "NeoDTI", "url": "https://github.com/FangpingWan/NeoDTI", "description": "Library for drug-target interaction prediction.", "stars": "77"}, {"name": "DTINet", "url": "https://github.com/luoyunan/DTINet", "description": "Network-based framework integrating heterogeneous biological data for DTI prediction.", "stars": "187"}, {"name": "DeepDTA", "url": "https://github.com/hkmztrk/DeepDTA", "description": "Deep learning model using CNNs on protein sequences and drug SMILES.", "stars": "297"}, {"name": "GraphDTA", "url": "https://github.com/thinng/GraphDTA", "description": "Graph neural networkbased DTI prediction using molecular graphs.", "stars": "296"}, {"name": "MolTrans", "url": "https://github.com/kexinhuang12345/MolTrans", "description": "Transformer-based DTI model leveraging molecular substructures.", "stars": "227"}, {"name": "DrugBAN", "url": "https://github.com/peizhenbai/DrugBAN", "description": "Bilinear attention network for interpretable DTI prediction.", "stars": "142"}, {"name": "MCPINN", "url": "https://github.com/mhlee0903/multi_channels_PINN", "description": "Drug discovery via compound-protein interaction and machine learning.", "stars": "3"}, {"name": "TransformerCPI", "url": "https://github.com/lifanchen-simm/transformerCPI", "description": "CPI prediction using Transformer.", "stars": "154"}, {"name": "REINVENT", "url": "https://github.com/MolecularAI/Reinvent", "description": "Reinforcement learning for de novo drug design.", "stars": "372"}, {"name": "MolGPT", "url": "https://github.com/devalab/molgpt", "description": "Transformer-based model for molecular generation.", "stars": "169"}, {"name": "Molecular Transformer", "url": "https://github.com/pschwllr/MolecularTransformer", "description": "Sequence-to-sequence model for retrosynthesis prediction.", "stars": "419"}, {"name": "TargetDiff", "url": "https://github.com/guanjq/targetdiff", "description": "3D equivariant diffusion model for structure-based drug design.", "stars": "328"}, {"name": "DiffDock", "url": "https://github.com/gcorso/DiffDock", "description": "Diffusion generative model for molecular docking, predicting the binding pose of small molecules to protein targets.", "stars": "1.5k"}, {"name": "JTVAE", "url": "https://github.com/wengong-jin/icml18-jtnn", "description": "Junction tree variational autoencoder for molecular graph generation that guarantees chemical validity via a hierarchical tree decomposition.", "stars": "553"}]}, {"name": "LLM for Biology", "parent": "Machine Learning Tasks and Models", "entries": [{"name": "AI4Chem/ChemLLM-7B-Chat", "url": "https://huggingface.co/AI4Chem/ChemLLM-7B-Chat", "description": "LLM for chemical & molecular science."}, {"name": "BioGPT", "url": "https://github.com/microsoft/BioGPT", "description": "LLM for biomedical text generation.", "stars": "4.5k"}, {"name": "GeneGPT", "url": "https://github.com/ncbi/GeneGPT", "description": "LLM for biomedical information, integrated with various APIs.", "stars": "424"}, {"name": "GenePT", "url": "https://github.com/yiqunchen/GenePT", "description": "Foundation LLM for single-cell data.", "stars": "313"}, {"name": "scPRINT", "url": "https://github.com/cantinilab/scPRINT", "description": "Pretrained on 50M cells for scRNA-seq denoising & zero imputation.", "stars": "143"}, {"name": "ClawBio", "url": "https://github.com/ClawBio/ClawBio", "description": "Bioinformatics-native AI agent skill library with local-first pharmacogenomics, ancestry PCA, semantic similarity, nutrigenomics, and metagenomics skills.", "stars": "551"}, {"name": "BioMedLM", "url": "https://huggingface.co/stanford-crfm/BioMedLM", "description": "2.7B parameter GPT-2-style language model trained exclusively on biomedical literature from PubMed for biomedical question answering and text generation."}, {"name": "MolT5", "url": "https://github.com/blender-nlp/MolT5", "description": "Language model for molecular tasks bridging text and SMILES, enabling molecule captioning and text-driven molecule generation.", "stars": "192"}, {"name": "ChatDrug", "url": "https://github.com/chao1224/ChatDrug", "description": "LLM-based conversational pipeline for drug discovery, using natural language prompts for iterative drug editing and optimization.", "stars": "158"}]}, {"name": "Foundation Models", "parent": "Machine Learning Tasks and Models", "entries": [{"name": "scFoundation", "url": "https://github.com/biomap-research/scFoundation", "description": "Large-scale foundation model for single-cell gene expression, enabling multiple downstream tasks.", "stars": "399"}, {"name": "scGPT", "url": "https://github.com/bowang-lab/scGPT", "description": "Transformer-based foundation model pretrained on millions of single-cell profiles.", "stars": "1.5k"}, {"name": "Geneformer", "url": "https://huggingface.co/ctheodoris/Geneformer", "description": "Context-aware, attention-based deep learning model pretrained on a large corpus of single-cell transcriptomes."}, {"name": "BulkFormer", "url": "https://github.com/KangBoming/BulkFormer", "description": "Foundation model for bulk RNA-seq data; learns general transcriptomic representations.", "stars": "50"}, {"name": "scBERT", "url": "https://github.com/TencentAILabHealthcare/scBERT", "description": "BERT-based foundation model pretrained on large-scale scRNA-seq data for cell type annotation.", "stars": "352"}, {"name": "CellPLM", "url": "https://github.com/OmicsML/CellPLM", "description": "Cell pre-trained language model with inter-cell transformer architecture for diverse single-cell analysis tasks.", "stars": "101"}, {"name": "UCE", "url": "https://github.com/snap-stanford/UCE", "description": "Universal Cell Embeddings: zero-shot single-cell embedding model trained on 36M cells across species, tissues, and assays without fine-tuning.", "stars": "249"}, {"name": "GEARS", "url": "https://github.com/snap-stanford/GEARS", "description": "Graph-based model for predicting transcriptional responses to single and combinatorial genetic perturbations using biological priors.", "stars": "350"}, {"name": "GigaPath", "url": "https://github.com/prov-gigapath/prov-gigapath", "description": "Slide-level digital pathology foundation model pretrained on 1.3 billion pathology image tokens from whole-slide images.", "stars": "589"}, {"name": "UNI", "url": "https://github.com/mahmoodlab/UNI", "description": "General-purpose self-supervised pathology foundation model trained on 100K+ whole-slide images for diverse computational pathology tasks.", "stars": "703"}, {"name": "CONCH", "url": "https://github.com/mahmoodlab/CONCH", "description": "Vision-language foundation model for computational pathology trained with contrastive captioning on pathology imagetext pairs.", "stars": "487"}, {"name": "Phikon", "url": "https://huggingface.co/owkin/phikon", "description": "ViT-based pathology foundation model pretrained with iBOT self-supervision on TCGA whole-slide images."}, {"name": "scMulan", "url": "https://github.com/SuperBianC/scMulan", "description": "Single-cell multi-omic language model pretrained on \\~10M cells spanning transcriptomics, epigenomics, and proteomics for cross-omics transfer tasks.", "stars": "61"}, {"name": "totalVI", "url": "https://github.com/scverse/scvi-tools", "description": "Probabilistic framework for joint analysis of paired scRNA-seq and protein (CITE-seq) data enabling multi-modal cell state representation across single-cell datasets.", "stars": "1.6k"}, {"name": "MultiVI", "url": "https://github.com/scverse/scvi-tools", "description": "Multi-modal variational autoencoder for integrating paired and unpaired single-cell RNA-seq and ATAC-seq measurements into a unified latent space.", "stars": "1.6k"}, {"name": "MIRA", "url": "https://github.com/cistrome/MIRA", "description": "Probabilistic multimodal topic model jointly modeling single-cell transcriptomics and chromatin accessibility for regulatory network inference.", "stars": "68"}, {"name": "GLUE", "url": "https://github.com/gao-lab/GLUE", "description": "Graph-Linked Unified Embedding framework for unpaired single-cell multi-omics data integration across RNA, ATAC, methylation, and protein modalities.", "stars": "458"}, {"name": "BABEL", "url": "https://github.com/wukevin/babel", "description": "Cross-modality translation model enabling prediction between scRNA-seq and scATAC-seq profiles without requiring paired single-cell measurements.", "stars": "48"}, {"name": "Multigrate", "url": "https://github.com/theislab/multigrate", "description": "Asymmetric multi-omics variational autoencoder for integrating single-cell data across RNA, ATAC, and protein modalities with missing-modality support.", "stars": "32"}, {"name": "MOFA+", "url": "https://github.com/bioFAM/MOFA2", "description": "Multi-Omics Factor Analysis framework identifying shared axes of variation across bulk and single-cell datasets including RNA, ATAC, proteomics, methylation, and copy number.", "stars": "391"}, {"name": "GeneCompass", "url": "https://github.com/xCompass-AI/GeneCompass", "description": "Large-scale foundation model integrating DNA regulatory sequences and single-cell transcriptomics from 120M+ cells across multiple species for gene regulation prediction.", "stars": "111"}, {"name": "UnitedNet", "url": "https://github.com/LiuLab-Bioelectronics-Harvard/UnitedNet", "description": "Interpretable multi-task deep neural network for single-cell multi-omics integration spanning transcriptomics, chromatin accessibility, and proteomics.", "stars": "52"}, {"name": "SpatialGlue", "url": "https://github.com/zhanglabtools/SpatialGlue", "description": "Graph attention network for spatial multi-omics integration jointly embedding spatial transcriptomics with chromatin accessibility or proteomics."}, {"name": "MIDAS", "url": "https://github.com/labomics/midas", "description": "Mosaic integration and differential accessibility model for single-cell multi-omics data that handles arbitrary missing-modality combinations across transcriptomics, chromatin accessibility, and proteomics.", "stars": "63"}, {"name": "scArches", "url": "https://github.com/theislab/scarches", "description": "Transfer learning framework for mapping new single-cell datasets onto pre-trained reference atlases across batches, conditions, and modalities.", "stars": "401"}, {"name": "TOSICA", "url": "https://github.com/JackieHanlaopo/TOSICA", "description": "Transformer-based framework for one-stop interpretable cell-type annotation supporting cross-dataset and cross-species transfer."}, {"name": "Evolutionary Scale Modeling (ESM)", "url": "https://github.com/facebookresearch/esm", "description": "Protein embeddings.", "stars": "4k"}, {"name": "ChemBERTa-2", "url": "https://github.com/seyonechithrananda/bert-loves-chemistry", "description": "Chemical embeddings & prediction.", "stars": "488"}, {"name": "ProtTrans", "url": "https://github.com/agemagician/ProtTrans", "description": "Suite of protein language models (ProtBERT, ProtT5, ProtXLNet) trained on billions of protein sequences from UniRef and BFD.", "stars": "1.3k"}, {"name": "ProGen2", "url": "https://github.com/salesforce/progen", "description": "Protein language model trained on diverse protein families for sequence generation and fitness prediction.", "stars": "695"}, {"name": "Ankh", "url": "https://github.com/agemagician/Ankh", "description": "Efficient protein language model optimized for downstream prediction tasks including secondary structure, localization, and function annotation.", "stars": "244"}, {"name": "AlphaFold3", "url": "https://github.com/google-deepmind/alphafold3", "description": "Predicts structures of proteins, nucleic acids, small molecules, and their complexes.", "stars": "7.8k"}, {"name": "Boltz-1", "url": "https://github.com/jwohlwend/boltz", "description": "Open-source all-atom biomolecular structure prediction model for proteins, nucleic acids, small molecules, and their complexes achieving AlphaFold3-level accuracy.", "stars": "3.9k"}, {"name": "Chai-1", "url": "https://github.com/chaidiscovery/chai-lab", "description": "Unified molecular structure prediction model covering proteins, nucleic acids, small molecules, and complexes.", "stars": "1.9k"}, {"name": "ESM3", "url": "https://github.com/evolutionaryscale/esm", "description": "Multimodal protein language model that jointly reasons over sequence, structure, and function for generative protein design and engineering.", "stars": "2.3k"}, {"name": "ESMFold", "url": "https://github.com/facebookresearch/esm", "description": "Fast protein structure prediction using language model embeddings.", "stars": "4k"}, {"name": "RFdiffusion", "url": "https://github.com/RosettaCommons/RFdiffusion", "description": "Generative model for protein backbone design using diffusion.", "stars": "2.8k"}, {"name": "ProteinMPNN", "url": "https://github.com/dauparas/ProteinMPNN", "description": "Deep learning model for protein sequence design given backbone structure.", "stars": "1.7k"}, {"name": "OmegaFold", "url": "https://github.com/HeliXonProtein/OmegaFold", "description": "High-resolution de novo protein structure prediction from sequence.", "stars": "616"}, {"name": "RoseTTAFold", "url": "https://github.com/RosettaCommons/RoseTTAFold", "description": "Three-track neural network for protein structure prediction.", "stars": "2.2k"}, {"name": "OpenFold", "url": "https://github.com/aqlaboratory/openfold", "description": "Trainable, memory-efficient open-source reproduction of AlphaFold2 enabling custom protein structure prediction workflows.", "stars": "3.3k"}, {"name": "SaProt", "url": "https://github.com/westlake-reup/SaProt", "description": "Structure-aware protein language model using structure-aware tokens that encode both sequence and backbone geometry for improved function prediction."}, {"name": "EvoDiff", "url": "https://github.com/microsoft/evodiff", "description": "Discrete diffusion framework for protein sequence generation trained on evolutionary-scale data, supporting unconditional generation, disordered region design, and functional motif scaffolding. \\[ [paper-2023](https://www.biorxiv.org/content/10.1101/2023.09.11.556673v1) ]", "stars": "664"}, {"name": "CHIEF", "url": "https://github.com/hms-dbmi/CHIEF", "description": "Clinical Histopathology Imaging Evaluation Foundation model integrating histology images and clinical context for pan-cancer analysis.", "stars": "698"}, {"name": "BiomedCLIP", "url": "https://huggingface.co/microsoft/BiomedCLIP-PubMedBERT_256-vit_g_14", "description": "CLIP-based vision-language foundation model for biomedical images and text trained on PubMed figurecaption pairs."}, {"name": "Nucleotide Transformer", "url": "https://github.com/instadeepai/nucleotide-transformer", "description": "Foundation model for genomic sequences across multiple species.", "stars": "847"}, {"name": "DNABERT", "url": "https://github.com/jerryji1993/DNABERT", "description": "Pre-trained bidirectional encoder for DNA sequence analysis.", "stars": "746"}, {"name": "DNABERT-2", "url": "https://github.com/Zhihan1996/DNABERT_2", "description": "Improved genome foundation model with efficient tokenization.", "stars": "469"}, {"name": "Enformer", "url": "https://github.com/deepmind/deepmind-research/tree/master/enformer", "description": "Transformer model predicting gene expression from DNA sequence.", "stars": "15k"}, {"name": "Basenji", "url": "https://github.com/calico/basenji", "description": "Sequential regulatory activity prediction from DNA sequences.", "stars": "467"}, {"name": "Caduceus", "url": "https://github.com/kuleshov-group/caduceus", "description": "Bidirectional equivariant long-range DNA sequence model based on Mamba.", "stars": "230"}, {"name": "Evo", "url": "https://github.com/evo-design/evo", "description": "Long-context genomic foundation model (up to 1M tokens).", "stars": "1.5k"}, {"name": "HyenaDNA", "url": "https://github.com/HazyResearch/hyena-dna", "description": "Long-range genomic foundation model handling sequences up to 1M tokens with sub-quadratic attention.", "stars": "772"}, {"name": "Borzoi", "url": "https://github.com/calico/borzoi", "description": "Extended successor to Enformer for predicting RNA-seq coverage from long genomic sequence windows (524 kb) with improved resolution.", "stars": "234"}, {"name": "DeepSEA", "url": "http://deepsea.princeton.edu/", "description": "Deep learning framework for predicting chromatin effects of sequence alterations with single-nucleotide sensitivity across thousands of chromatin features."}, {"name": "Sei", "url": "https://github.com/FunctionLab/sei-framework", "description": "Sequence-to-function framework learning a genome-wide regulatory activity code from DNA sequences for variant effect prediction.", "stars": "112"}, {"name": "GPN (Genomic Pre-trained Network)", "url": "https://github.com/songlab-cal/gpn", "description": "Masked language model for DNA sequences enabling zero-shot variant effect prediction without requiring functional annotations.", "stars": "335"}]}]}