{"slug": "inoue0426--awesome-computational-biology", "title": "Computational Biology", "description": "Awesome list of computational biology.", "github_url": "https://github.com/inoue0426/awesome-computational-biology", "stars": "122", "tag": "Miscellaneous", "entry_count": 242, "subcategory_count": 13, "subcategories": [{"name": "Table of Contents", "parent": "", "entries": [{"name": "Awesome Computational Biology", "url": "#awesome-computational-biology-", "description": ""}]}, {"name": "scRNA", "parent": "Databases", "entries": [{"name": "CZ CELLxGENE", "url": "https://cellxgene.cziscience.com/", "description": "Single-cell dataset repository and interactive explorer from the Chan Zuckerberg Initiative."}, {"name": "Gene Expression Omnibus", "url": "https://www.ncbi.nlm.nih.gov/geo/", "description": "Public functional genomics database."}, {"name": "Human Cell Atlas", "url": "https://www.humancellatlas.org/", "description": "Open global atlas of all cells in the human body."}, {"name": "Single Cell PORTAL", "url": "https://singlecell.broadinstitute.org/single_cell", "description": "Public database for single-cell RNA."}, {"name": "Single Cell Expression Atlas", "url": "https://www.ebi.ac.uk/gxa/sc/home", "description": "Public database for single-cell RNA."}]}, {"name": "Compound", "parent": "Databases", "entries": [{"name": "PubChem", "url": "https://pubchem.ncbi.nlm.nih.gov/", "description": "One of the largest chemical databases (compounds, genes, and proteins)."}, {"name": "ChEBI", "url": "https://www.ebi.ac.uk/chebi/", "description": "Database focused on small chemical compounds."}, {"name": "ChEMBL", "url": "https://www.ebi.ac.uk/chembl/", "description": "Bioactive molecules with drug-like properties."}, {"name": "ChemSpider", "url": "http://www.chemspider.com/", "description": "Chemical structure database."}, {"name": "DrugTargetCommons", "url": "https://drugtargetcommons.fimm.fi/", "description": "Community platform for curating and integrating experimental bioactivity data across drugs and targets."}, {"name": "HMDB (Human Metabolome Database)", "url": "https://hmdb.ca/", "description": "Comprehensive database of small molecule metabolites found in the human body."}, {"name": "KEGG COMPOUND", "url": "https://www.genome.jp/kegg/compound/", "description": "Collection of small molecules and biopolymers."}, {"name": "LIPID MAPS", "url": "https://www.lipidmaps.org/databases/lmsd/overview", "description": "Database of lipids."}, {"name": "Rhea", "url": "https://www.rhea-db.org/", "description": "Database of chemical reactions."}, {"name": "DrugCentral", "url": "http://drugcentral.org/", "description": "Online drug compendium with drug mode of action and indication information."}, {"name": "Drug Repurposing Hub", "url": "https://repo-hub.broadinstitute.org/repurposing#download-data", "description": "Collections of drug repurposing data (drug, MoA, target, etc)."}, {"name": "Therapeutic Target Database", "url": "https://idrblab.net/ttd/full-data-download", "description": "Drug-target, target-disease, and drug-disease datasets."}, {"name": "ZINC ligand discovery database", "url": "https://zinc.docking.org/", "description": "Free database of commercially-available compounds for virtual screening."}]}, {"name": "Pathway", "parent": "Databases", "entries": [{"name": "PathwayCommons", "url": "https://www.pathwaycommons.org/", "description": "Database of pathways and interactions."}, {"name": "KEGG PATHWAY", "url": "https://www.genome.jp/kegg/pathway.html", "description": "Collection of pathway maps."}, {"name": "WikiPathways", "url": "https://wikipathways.org/", "description": "Database of biological pathways."}, {"name": "Reactome", "url": "https://reactome.org/", "description": "Expert-curated, peer-reviewed pathway database with detailed reaction mechanisms."}, {"name": "BioCyc", "url": "https://biocyc.org/", "description": "Collection of pathway/genome databases across thousands of organisms."}, {"name": "SIGNOR", "url": "https://signor.uniroma2.it/", "description": "Database of causal signaling interactions and pathways."}, {"name": "MSigDB (Molecular Signatures Database)", "url": "https://www.gsea-msigdb.org/gsea/msigdb", "description": "Curated gene sets derived from pathways and biological processes."}]}, {"name": "Mass Spectra", "parent": "Databases", "entries": [{"name": "MassBank", "url": "http://www.massbank.jp/", "description": "Open source databases and tools for mass spectrometry reference spectra."}, {"name": "MoNA MassBank of North America", "url": "https://mona.fiehnlab.ucdavis.edu/", "description": "Meta-database of metabolite mass spectra, metadata, and associated compounds."}]}, {"name": "Protein", "parent": "Databases", "entries": [{"name": "THE HUMAN PROTEIN ATLAS", "url": "https://www.proteinatlas.org/", "description": "Comprehensive human protein database (cells, tissues, organs)."}, {"name": "PROTEIN DATA BANK (PDB)", "url": "https://www.rcsb.org/", "description": "3D structures of proteins, nucleic acids, complexes."}, {"name": "UniProt", "url": "https://www.uniprot.org/", "description": "Functional information on proteins."}, {"name": "AlphaFold Protein Structure Database", "url": "https://alphafold.ebi.ac.uk/api-docs", "description": "3D protein structure predictions."}, {"name": "RCSB Protein Data Bank", "url": "https://www.rcsb.org/", "description": "Repository for structural data of biological molecules."}, {"name": "Critical Assessment of Structure Prediction (CASP)", "url": "https://predictioncenter.org/", "description": "Assessing methods for protein structure prediction."}, {"name": "Uniclust", "url": "https://uniclust.mmseqs.com/", "description": "Clustered protein sequence databases."}, {"name": "UniRef", "url": "https://www.uniprot.org/uniref/", "description": "Non-redundant sequence database clustering UniProtKB entries at multiple sequence identity thresholds."}, {"name": "CATH database", "url": "https://www.cathdb.info/", "description": "Hierarchical classification of protein domain structures."}, {"name": "SAbDab", "url": "https://opig.stats.ox.ac.uk/webapps/sabdab-sabpred/sabdab", "description": "Structural Antibody Database containing all antibody structures in the PDB."}, {"name": "OADB (Observed Antibody Space Database)", "url": "http://opig.stats.ox.ac.uk/webapps/oas/", "description": "Database of antibody sequences from immune repertoire sequencing."}, {"name": "InterPro", "url": "https://www.ebi.ac.uk/interpro/", "description": "Protein families, domains, and functional sites database integrating 14 member databases including Pfam and PROSITE."}, {"name": "Pfam", "url": "https://www.ebi.ac.uk/interpro/entry/pfam/", "description": "Database of protein families described by multiple sequence alignments and hidden Markov models."}, {"name": "NeXtProt", "url": "https://www.nextprot.org/", "description": "Expert knowledge base on human proteins with deep functional annotation, complementary to UniProt."}]}, {"name": "Genome", "parent": "Databases", "entries": [{"name": "ENCODE", "url": "https://www.encodeproject.org/", "description": "Encyclopedia of DNA Elements; regulatory and functional genomic elements across the genome."}, {"name": "Ensembl", "url": "https://www.ensembl.org/", "description": "Genome browser and annotation database for vertebrate and other eukaryotic genomes."}, {"name": "Human Genome Resources at NCBI", "url": "https://www.ncbi.nlm.nih.gov/projects/genome/guide/human/index.shtml", "description": "Database for genomics, proteomics, transcriptomics, and systems biology."}, {"name": "GenBank", "url": "https://www.ncbi.nlm.nih.gov/genbank/", "description": "NCBI's database of genetic sequences."}, {"name": "UCSC Genome Browser", "url": "https://genome.ucsc.edu/", "description": "UCSC's genome browser."}, {"name": "cBioPortal", "url": "https://www.cbioportal.org/", "description": "Cancer genomics database; aggregating many patient datasets."}, {"name": "10x Genomics Dataset", "url": "https://www.10xgenomics.com/resources/datasets", "description": "Collection of single-cell datasets."}, {"name": "The Genotype-Tissue Expression (GTEx)", "url": "https://gtexportal.org/home/", "description": "Human gene expression and regulation resource."}, {"name": "Dependency Map (DepMap)", "url": "https://depmap.org/portal/", "description": "CRISPR-Cas9 screens in cancer cell lines."}, {"name": "Catalogue Of Somatic Mutations In Cancer (COSMIC)", "url": "https://cancer.sanger.ac.uk/cosmic", "description": "Resource on somatic mutations in cancers."}, {"name": "MGnify", "url": "https://www.ebi.ac.uk/metagenomics/", "description": "Resource for metagenomic and metatranscriptomic data."}, {"name": "JASPAR", "url": "http://jaspar.genereg.net/", "description": "Database of transcription factor binding profiles."}, {"name": "gnomAD", "url": "https://gnomad.broadinstitute.org/", "description": "Genome Aggregation Database; genetic variation from large-scale sequencing projects."}, {"name": "Rfam", "url": "https://rfam.org/", "description": "Database of RNA families with sequence alignments and consensus structures."}, {"name": "ROADMAP Epigenomics", "url": "http://www.roadmapepigenomics.org/", "description": "Reference epigenome maps for 111 primary human cell types and tissues, including histone modifications, chromatin accessibility, and DNA methylation."}, {"name": "FANTOM5", "url": "https://fantom.gsc.riken.jp/5/", "description": "Functional annotation of mammalian genome; comprehensive atlas of active enhancers, promoters, and transcription start sites across human and mouse cell types."}]}, {"name": "Disease", "parent": "Databases", "entries": [{"name": "KEGG DRUG", "url": "https://www.genome.jp/kegg/drug/", "description": "Comprehensive, approved drug information."}, {"name": "DrugBank", "url": "https://go.drugbank.com/", "description": "Database of drugs and targets (University of Alberta)."}, {"name": "DisGeNET", "url": "https://www.disgenet.org/", "description": "Database of gene-disease associations integrating expert-curated and GWAS data."}, {"name": "OMIM (Online Mendelian Inheritance in Man)", "url": "https://www.omim.org/", "description": "Comprehensive database of human genes and genetic disorders."}, {"name": "Open Targets Platform", "url": "https://platform.opentargets.org/", "description": "Systematic target identification and prioritization platform integrating genetics, genomics, and drug data for drug discovery."}, {"name": "Human Phenotype Ontology (HPO)", "url": "https://hpo.jax.org/", "description": "Standardized vocabulary of phenotypic abnormalities in human disease, linking genes, variants, and clinical features."}, {"name": "DISEASES", "url": "https://diseases.jensenlab.org/", "description": "Gene\u2013disease association database integrating evidence from text mining, curated databases, and experimental data."}]}, {"name": "Interaction", "parent": "Databases", "entries": [{"name": "DGIdb", "url": "https://www.dgidb.org/", "description": "Drug-gene interactions and the druggable genome."}, {"name": "Comparative Toxicogenomics Database", "url": "http://ctdbase.org/", "description": "Chemical-gene interactions, chemical-disease and gene-disease associations, chemical-phenotype associations."}, {"name": "SNAP", "url": "https://snap.stanford.edu/biodata/datasets/10002/10002-ChG-Miner.html", "description": "Dataset of drug-gene interactions."}, {"name": "NCI60", "url": "https://dtp.cancer.gov/discovery_development/nci-60/", "description": "Focuses on 60 cancer cell lines and many drugs."}, {"name": "Genomics of Drug Sensitivity in Cancer (GDSC)", "url": "https://www.cancerrxgene.org/", "description": "Drug sensitivity for \\~1000 human cancer cell lines and hundreds of compounds."}, {"name": "Cancer Cell Line Encyclopedia", "url": "https://sites.broadinstitute.org/ccle/", "description": "Database of \\~1000 cancer cell lines."}, {"name": "CellMiner Cross Database (CellMinerCDB)", "url": "https://discover.nci.nih.gov/cellminercdb/", "description": "Integrates multiple cancer cell line databases."}, {"name": "STITCH", "url": "http://stitch.embl.de/", "description": "Chemical-protein interactions."}, {"name": "BindingDB", "url": "https://www.bindingdb.org/rwd/bind/index.jsp", "description": "Compounds and target database."}, {"name": "Davis kinase inhibitors DB", "url": "http://staff.cs.utu.fi/~aijrinas/dti/", "description": "Experimental kinase inhibitor binding affinity dataset for protein\u2013ligand interaction research."}, {"name": "Kinase Inhibitor Bioactivity Data (KIBA)", "url": "https://janeliascicomp.github.io/KIBA/", "description": "Integrated bioactivity scores for kinase inhibitors combining Ki, Kd, and IC50 measurements."}, {"name": "PDBBind", "url": "https://www.pdbbind-plus.org.cn/", "description": "Binding affinity data for biomolecular complexes."}, {"name": "STRING", "url": "https://string-db.org/", "description": "PPI networks for multiple organisms."}, {"name": "BioGRID", "url": "https://thebiogrid.org/", "description": "Protein, genetic, and chemical interactions."}, {"name": "HIPPIE", "url": "http://cbdm-01.zdv.uni-mainz.de/~mschaefer/hippie/", "description": "Human protein-protein interaction database."}, {"name": "IntAct", "url": "https://www.ebi.ac.uk/intact/home", "description": "Open-source molecular interaction database and analysis system from EMBL-EBI."}, {"name": "Drug Mechanism Database (DrugMechDB)", "url": "https://github.com/SuLab/DrugMechDB/tree/2.0.1", "description": "Mechanisms of action from drug to disease.", "stars": "71"}, {"name": "DRKG", "url": "https://github.com/gnn4dr/DRKG", "description": "Large-scale biological knowledge graph for drug discovery.", "stars": "678"}, {"name": "Hetionet", "url": "https://github.com/hetio/hetionet", "description": "Heterogeneous network integrating genes, diseases, drugs, pathways, and more.", "stars": "347"}, {"name": "PrimeKG", "url": "https://github.com/mims-harvard/PrimeKG", "description": "Multi-modal precision medicine knowledge graph integrating clinical, genetic, and drug data.", "stars": "723"}, {"name": "TRRUST", "url": "https://www.grnpedia.org/trrust/", "description": "Manually curated database of human and mouse transcriptional regulatory interactions between transcription factors and their target genes."}, {"name": "RegNetwork", "url": "http://www.regnetworkweb.org/", "description": "Database of gene regulatory networks covering transcription factor\u2013target gene and miRNA\u2013gene interaction data across multiple species."}, {"name": "miRBase", "url": "https://www.mirbase.org/", "description": "Reference repository for microRNA gene annotations, sequences, and experimentally validated targets."}]}, {"name": "Clinical Trial", "parent": "Databases", "entries": [{"name": "ClinicalTrials.gov", "url": "https://clinicaltrials.gov/", "description": "Privately and publicly funded clinical studies."}, {"name": "ICD10", "url": "https://icd.who.int/browse10/2019/en", "description": "International Classification of Diseases, 10th revision."}, {"name": "EU Drug Regulating Authorities Clinical Trials DB (EudraCT)", "url": "https://eudract.ema.europa.eu/", "description": "European clinical trial database."}, {"name": "MIMIC-IV", "url": "https://mimic.mit.edu/", "description": "Freely accessible critical care database."}, {"name": "1000 Genomes Project", "url": "https://www.internationalgenome.org/", "description": "Reference panel of human genetic variation from 2,504 individuals across 26 populations."}, {"name": "BACE", "url": "https://www.kaggle.com/datasets/gokturkkoch/bace", "description": "Binary classification and regression dataset for \u03b2-secretase 1 (BACE-1) inhibitor binding affinity."}, {"name": "BEAT AML", "url": "https://biodev.github.io/BeatAML2/", "description": "Functional ex vivo drug sensitivity measurements paired with genomics for acute myeloid leukemia."}, {"name": "BindingDB Curated Sets", "url": "https://www.bindingdb.org/rwd/bind/chemsearch/marvin/SDFdownload.jsp?all_download=yes", "description": "Curated binding affinity datasets for protein\u2013ligand interaction benchmarking."}, {"name": "Cancer Therapeutics Response Portal (CTRP)", "url": "https://portals.broadinstitute.org/ctrp/", "description": "Drug sensitivity profiles across \\~900 cancer cell lines for >400 compounds."}, {"name": "ClinTox", "url": "https://tdcommons.ai/single_pred_tasks/tox/#clintox", "description": "Clinical toxicity dataset contrasting FDA-approved drugs with those that failed clinical trials due to toxicity."}, {"name": "CPTAC (Clinical Proteomic Tumor Analysis Consortium)", "url": "https://proteomics.cancer.gov/programs/cptac", "description": "Multi-omic proteogenomic datasets for multiple cancer types linking proteomics with genomics."}, {"name": "CrossDocked2020", "url": "https://arxiv.org/abs/2001.01037", "description": "Large-scale dataset for structure-based virtual screening."}, {"name": "FLIP (Fitness Landscape Inference for Proteins)", "url": "https://github.com/J-SNACKKB/FLIP", "description": "Benchmark collection of protein fitness landscape datasets for evaluating protein ML models.", "stars": "117"}, {"name": "Genomics of Drug Sensitivity in Cancer (GDSC)", "url": "https://www.cancerrxgene.org/", "description": "Drug sensitivity for \\~1000 human cancer cell lines and hundreds of compounds."}, {"name": "GuacaMol", "url": "https://github.com/BenevolentAI/guacamol", "description": "Benchmark suite for generative molecular design models.", "stars": "511"}, {"name": "LINCS L1000", "url": "https://lincsproject.org/LINCS/tools/workflows/find-the-best-place-to-obtain-the-lincs-l1000-data", "description": "Gene expression profiles (978 landmark genes) for >20,000 chemical and genetic perturbations across cell lines."}, {"name": "MoleculeNet", "url": "http://moleculenet.ai/", "description": "Benchmark datasets for molecular machine learning."}, {"name": "MOSES", "url": "https://github.com/molecularsets/moses", "description": "Benchmarking platform for molecular generation models.", "stars": "962"}, {"name": "NCI60", "url": "https://dtp.cancer.gov/discovery_development/nci-60/", "description": "Drug sensitivity benchmark across 60 diverse human cancer cell lines."}, {"name": "OGB (Open Graph Benchmark)", "url": "https://ogb.stanford.edu/", "description": "Large-scale graph ML benchmark suite including biological datasets such as ogbl-ppa (protein-protein associations) and ogbg-molhiv."}, {"name": "OpenBioLink", "url": "https://github.com/OpenBioLink/OpenBioLink", "description": "Benchmark datasets for biological knowledge graph completion.", "stars": "158"}, {"name": "PharmGKB", "url": "https://www.pharmgkb.org/", "description": "Curated pharmacogenomics dataset linking genetic variants to drug response phenotypes across thousands of drugs."}, {"name": "PK-DB", "url": "https://pk-db.com/", "description": "Open database of experimental pharmacokinetics (PK) and ADME data from clinical and preclinical studies."}, {"name": "PRISM", "url": "https://depmap.org/portal/prism/", "description": "Cancer drug sensitivity profiling of >4,500 drugs across >900 cancer cell lines using pooled-cell-line barcoding."}, {"name": "ProteinGym", "url": "https://github.com/OATML-Markslab/ProteinGym", "description": "Large-scale benchmark of deep mutational scanning assays for evaluating protein fitness landscape models.", "stars": "407"}, {"name": "QM9", "url": "https://figshare.com/collections/Quantum_chemistry_structures_and_properties_of_134_kilo_molecules/978904", "description": "Quantum chemistry properties for 134K stable small organic molecules computed at DFT level."}, {"name": "scIB (Single-cell Integration Benchmarks)", "url": "https://github.com/theislab/scib", "description": "Comprehensive benchmarking framework for single-cell data integration methods.", "stars": "408"}, {"name": "SIDER (Side Effect Resource)", "url": "http://sideeffects.embl.de/", "description": "Database of 1,430 approved drugs with their recorded adverse drug reactions across 27 system-organ classes."}, {"name": "Tabula Muris", "url": "https://tabula-muris.ds.czbiohub.org/", "description": "Comprehensive single-cell atlas of 20 mouse organs and tissues, enabling cross-tissue and cross-species comparisons."}, {"name": "Tabula Sapiens", "url": "https://tabula-sapiens-portal.ds.czbiohub.org/", "description": "Comprehensive human single-cell atlas of \\~500K cells from 24 organs and tissues across multiple donors."}, {"name": "TAPE (Tasks Assessing Protein Embeddings)", "url": "https://github.com/songlab-cal/tape", "description": "Benchmark suite of five biologically meaningful semi-supervised learning tasks for evaluating protein representations.", "stars": "734"}, {"name": "The Cancer Genome Atlas (TCGA)", "url": "https://www.cancer.gov/about-nci/organization/ccg/research/structural-genomics/tcga", "description": "Comprehensive multi-omics (genomics, transcriptomics, proteomics, methylation) dataset for 33 cancer types across \\~11,000 patients."}, {"name": "Therapeutics Data Commons (TDC)", "url": "https://tdcommons.ai/", "description": "Unified benchmark suite covering ADMET, drug-target interaction, drug response, and more."}, {"name": "Tox21", "url": "https://tripod.nih.gov/tox21/challenge/", "description": "12,707 compounds tested in 12 nuclear receptor and stress-response pathway biochemical assays for toxicity prediction."}, {"name": "UK Biobank", "url": "https://www.ukbiobank.ac.uk/", "description": "Large-scale biomedical database of \\~500K participants with genetic, imaging, and health data for population genetics and disease studies."}, {"name": "PubMed E-utilities (esearch/efetch)", "url": "https://www.nlm.nih.gov/dataguide/edirect/esearch.html", "description": "APIs for searching and retrieving biomedical literature from PubMed."}, {"name": "NCBI E-utilities", "url": "https://www.ncbi.nlm.nih.gov/books/NBK25501/", "description": "Unified APIs for accessing NCBI databases (Gene, GEO, SRA, PubChem, etc)."}, {"name": "UniProt REST API", "url": "https://www.uniprot.org/help/api", "description": "Programmatic access to protein sequence and functional annotation data."}, {"name": "Ensembl REST API", "url": "https://rest.ensembl.org/", "description": "API for genomic annotations, variants, genes, and comparative genomics."}, {"name": "KEGG REST API", "url": "https://www.kegg.jp/kegg/rest/keggapi.html", "description": "API for accessing KEGG pathways, compounds, genes, and reactions."}, {"name": "ChEMBL Web Services", "url": "https://www.ebi.ac.uk/chembl/ws", "description": "REST API for bioactive molecules, targets, and bioassays."}, {"name": "Open Targets Platform API", "url": "https://platform.opentargets.org/api", "description": "API for target\u2013disease associations integrating genetics, genomics, and drug data."}, {"name": "ClinicalTrials.gov API", "url": "https://clinicaltrials.gov/api/gui", "description": "API for querying clinical trial metadata and results."}, {"name": "Chemistry Development Kit", "url": "https://github.com/cdk/cdk", "description": "Cheminformatics software & machine learning tools.", "stars": "575"}, {"name": "Biopython", "url": "https://biopython.org/", "description": "Collection of Python tools for biological computation including sequence analysis, structure parsing, and database access."}, {"name": "FlashDeconv", "url": "https://github.com/cafferychen777/flashdeconv", "description": "High-performance spatial transcriptomics deconvolution (\\~1M spots in \\~3 min).", "stars": "14"}, {"name": "RDKit", "url": "https://github.com/rdkit/rdkit", "description": "Cheminformatics software & machine learning toolkit.", "stars": "3.3k"}, {"name": "DeepChem", "url": "https://github.com/deepchem/deepchem", "description": "Deep learning library for drug discovery, quantum chemistry, and materials science.", "stars": "6.6k"}, {"name": "ChatSpatial", "url": "https://github.com/cafferychen777/ChatSpatial", "description": "MCP server for spatial transcriptomics analysis via natural language.", "stars": "25"}, {"name": "Scanpy", "url": "https://scanpy.readthedocs.io/en/stable/", "description": "Python library for scRNA-seq analysis."}, {"name": "Seurat", "url": "https://satijalab.org/seurat/", "description": "R library for scRNA-seq analysis."}, {"name": "scvi-tools", "url": "https://scvi-tools.org/", "description": "Probabilistic models for single-cell omics data analysis."}, {"name": "CellTypist", "url": "https://github.com/Teichlab/celltypist", "description": "Automated cell type annotation for scRNA-seq.", "stars": "464"}, {"name": "Squidpy", "url": "https://squidpy.readthedocs.io/", "description": "Python library for spatial single-cell analysis."}, {"name": "GROMACS", "url": "https://www.gromacs.org/", "description": "Molecular dynamics simulation package for biochemical molecules."}, {"name": "MDAnalysis", "url": "https://www.mdanalysis.org/", "description": "Python library for analyzing and altering molecular dynamics simulation trajectories."}, {"name": "OpenMM", "url": "https://openmm.org/", "description": "High-performance toolkit for molecular simulation and GPU-accelerated MD."}, {"name": "scVelo", "url": "https://github.com/theislab/scvelo", "description": "RNA velocity estimation for single-cell transcriptomics, inferring the direction and speed of cell differentiation.", "stars": "495"}, {"name": "STAR", "url": "https://github.com/alexdobin/STAR", "description": "Ultrafast universal RNA-seq aligner with support for spliced alignment and single-cell quantification via STARsolo.", "stars": "2.2k"}, {"name": "kallisto", "url": "https://pachterlab.github.io/kallisto/", "description": "Near-optimal RNA-seq quantification using pseudoalignment for fast transcript abundance estimation."}, {"name": "Harmony", "url": "https://github.com/immunogenomics/harmony", "description": "Fast and scalable integration of single-cell data across datasets, conditions, technologies, and species.", "stars": "631"}, {"name": "Monocle3", "url": "https://cole-trapnell-lab.github.io/monocle3/", "description": "Single-cell trajectory analysis tool for learning developmental trajectories and ordering cells in pseudotime."}, {"name": "CellChat", "url": "https://github.com/sqjin/CellChat", "description": "Inference and analysis of cell-cell communication ligand-receptor networks from single-cell transcriptomics data.", "stars": "775"}, {"name": "SCENIC", "url": "https://github.com/aertslab/SCENIC", "description": "Single-cell regulatory network inference and clustering linking transcription factors to co-expressed gene modules.", "stars": "481"}, {"name": "DoubletFinder", "url": "https://github.com/chris-mcginnis-ucsf/DoubletFinder", "description": "Machine learning approach for detecting multiplet (doublet) artifacts in single-cell RNA-seq data.", "stars": "537"}]}, {"name": "Drug Discovery", "parent": "Machine Learning Tasks and Models", "entries": [{"name": "drGAT", "url": "https://github.com/inoue0426/drGAT", "description": "Attention-based model for drug response prediction with gene explainability.", "stars": "1"}, {"name": "MOFGCN", "url": "https://github.com/weiba/MOFGCN/tree/main", "description": "GCN + heterogeneous network.", "stars": "7"}, {"name": "DeepDSC", "url": "https://ieeexplore-ieee-org.ezp2.lib.umn.edu/stamp/stamp.jsp?tp=\\&arnumber=8723620\\&tag=1", "description": "Autoencoder + fully connected NN."}, {"name": "DGDRP", "url": "https://github.com/minwoopak/heteronet", "description": "Multi-view embedding neural network.", "stars": "0"}, {"name": "DeepAEG", "url": "https://github.com/zhejiangzhuque/DeepAEG", "description": "GNN embedding + attention mechanism.", "stars": "3"}, {"name": "RECOVER", "url": "https://github.com/RECOVERcoalition/Recover", "description": "Machine learning framework for predicting synergistic drug combination responses across cell lines.", "stars": "24"}, {"name": "TGSA", "url": "https://github.com/violet-sto/TGSA", "description": "Tumor gene set and attention-based model leveraging biological pathway knowledge for drug response prediction.", "stars": "23"}, {"name": "HiDRA", "url": "https://github.com/bsml320/HiDRA", "description": "Hierarchical network model incorporating gene and pathway-level information for cancer drug response prediction."}, {"name": "DeepPurpose", "url": "https://github.com/kexinhuang12345/DeepPurpose", "description": "Deep learning library for drug repurposing.", "stars": "1.1k"}, {"name": "NeoDTI", "url": "https://github.com/FangpingWan/NeoDTI", "description": "Library for drug-target interaction prediction.", "stars": "77"}, {"name": "DTINet", "url": "https://github.com/luoyunan/DTINet", "description": "Network-based framework integrating heterogeneous biological data for DTI prediction.", "stars": "187"}, {"name": "DeepDTA", "url": "https://github.com/hkmztrk/DeepDTA", "description": "Deep learning model using CNNs on protein sequences and drug SMILES.", "stars": "297"}, {"name": "GraphDTA", "url": "https://github.com/thinng/GraphDTA", "description": "Graph neural network\u2013based DTI prediction using molecular graphs.", "stars": "296"}, {"name": "MolTrans", "url": "https://github.com/kexinhuang12345/MolTrans", "description": "Transformer-based DTI model leveraging molecular substructures.", "stars": "227"}, {"name": "DrugBAN", "url": "https://github.com/peizhenbai/DrugBAN", "description": "Bilinear attention network for interpretable DTI prediction.", "stars": "142"}, {"name": "MCPINN", "url": "https://github.com/mhlee0903/multi_channels_PINN", "description": "Drug discovery via compound-protein interaction and machine learning.", "stars": "3"}, {"name": "TransformerCPI", "url": "https://github.com/lifanchen-simm/transformerCPI", "description": "CPI prediction using Transformer.", "stars": "154"}, {"name": "REINVENT", "url": "https://github.com/MolecularAI/Reinvent", "description": "Reinforcement learning for de novo drug design.", "stars": "372"}, {"name": "MolGPT", "url": "https://github.com/devalab/molgpt", "description": "Transformer-based model for molecular generation.", "stars": "169"}, {"name": "Molecular Transformer", "url": "https://github.com/pschwllr/MolecularTransformer", "description": "Sequence-to-sequence model for retrosynthesis prediction.", "stars": "419"}, {"name": "TargetDiff", "url": "https://github.com/guanjq/targetdiff", "description": "3D equivariant diffusion model for structure-based drug design.", "stars": "328"}, {"name": "DiffDock", "url": "https://github.com/gcorso/DiffDock", "description": "Diffusion generative model for molecular docking, predicting the binding pose of small molecules to protein targets.", "stars": "1.5k"}, {"name": "JTVAE", "url": "https://github.com/wengong-jin/icml18-jtnn", "description": "Junction tree variational autoencoder for molecular graph generation that guarantees chemical validity via a hierarchical tree decomposition.", "stars": "553"}]}, {"name": "LLM for Biology", "parent": "Machine Learning Tasks and Models", "entries": [{"name": "AI4Chem/ChemLLM-7B-Chat", "url": "https://huggingface.co/AI4Chem/ChemLLM-7B-Chat", "description": "LLM for chemical & molecular science."}, {"name": "BioGPT", "url": "https://github.com/microsoft/BioGPT", "description": "LLM for biomedical text generation.", "stars": "4.5k"}, {"name": "GeneGPT", "url": "https://github.com/ncbi/GeneGPT", "description": "LLM for biomedical information, integrated with various APIs.", "stars": "424"}, {"name": "GenePT", "url": "https://github.com/yiqunchen/GenePT", "description": "Foundation LLM for single-cell data.", "stars": "313"}, {"name": "scPRINT", "url": "https://github.com/cantinilab/scPRINT", "description": "Pretrained on 50M cells for scRNA-seq denoising & zero imputation.", "stars": "143"}, {"name": "ClawBio", "url": "https://github.com/ClawBio/ClawBio", "description": "Bioinformatics-native AI agent skill library with local-first pharmacogenomics, ancestry PCA, semantic similarity, nutrigenomics, and metagenomics skills.", "stars": "551"}, {"name": "BioMedLM", "url": "https://huggingface.co/stanford-crfm/BioMedLM", "description": "2.7B parameter GPT-2-style language model trained exclusively on biomedical literature from PubMed for biomedical question answering and text generation."}, {"name": "MolT5", "url": "https://github.com/blender-nlp/MolT5", "description": "Language model for molecular tasks bridging text and SMILES, enabling molecule captioning and text-driven molecule generation.", "stars": "192"}, {"name": "ChatDrug", "url": "https://github.com/chao1224/ChatDrug", "description": "LLM-based conversational pipeline for drug discovery, using natural language prompts for iterative drug editing and optimization.", "stars": "158"}]}, {"name": "Foundation Models", "parent": "Machine Learning Tasks and Models", "entries": [{"name": "scFoundation", "url": "https://github.com/biomap-research/scFoundation", "description": "Large-scale foundation model for single-cell gene expression, enabling multiple downstream tasks.", "stars": "399"}, {"name": "scGPT", "url": "https://github.com/bowang-lab/scGPT", "description": "Transformer-based foundation model pretrained on millions of single-cell profiles.", "stars": "1.5k"}, {"name": "Geneformer", "url": "https://huggingface.co/ctheodoris/Geneformer", "description": "Context-aware, attention-based deep learning model pretrained on a large corpus of single-cell transcriptomes."}, {"name": "BulkFormer", "url": "https://github.com/KangBoming/BulkFormer", "description": "Foundation model for bulk RNA-seq data; learns general transcriptomic representations.", "stars": "50"}, {"name": "scBERT", "url": "https://github.com/TencentAILabHealthcare/scBERT", "description": "BERT-based foundation model pretrained on large-scale scRNA-seq data for cell type annotation.", "stars": "352"}, {"name": "CellPLM", "url": "https://github.com/OmicsML/CellPLM", "description": "Cell pre-trained language model with inter-cell transformer architecture for diverse single-cell analysis tasks.", "stars": "101"}, {"name": "UCE", "url": "https://github.com/snap-stanford/UCE", "description": "Universal Cell Embeddings: zero-shot single-cell embedding model trained on 36M cells across species, tissues, and assays without fine-tuning.", "stars": "249"}, {"name": "GEARS", "url": "https://github.com/snap-stanford/GEARS", "description": "Graph-based model for predicting transcriptional responses to single and combinatorial genetic perturbations using biological priors.", "stars": "350"}, {"name": "GigaPath", "url": "https://github.com/prov-gigapath/prov-gigapath", "description": "Slide-level digital pathology foundation model pretrained on 1.3 billion pathology image tokens from whole-slide images.", "stars": "589"}, {"name": "UNI", "url": "https://github.com/mahmoodlab/UNI", "description": "General-purpose self-supervised pathology foundation model trained on 100K+ whole-slide images for diverse computational pathology tasks.", "stars": "703"}, {"name": "CONCH", "url": "https://github.com/mahmoodlab/CONCH", "description": "Vision-language foundation model for computational pathology trained with contrastive captioning on pathology image\u2013text pairs.", "stars": "487"}, {"name": "Phikon", "url": "https://huggingface.co/owkin/phikon", "description": "ViT-based pathology foundation model pretrained with iBOT self-supervision on TCGA whole-slide images."}, {"name": "scMulan", "url": "https://github.com/SuperBianC/scMulan", "description": "Single-cell multi-omic language model pretrained on \\~10M cells spanning transcriptomics, epigenomics, and proteomics for cross-omics transfer tasks.", "stars": "61"}, {"name": "totalVI", "url": "https://github.com/scverse/scvi-tools", "description": "Probabilistic framework for joint analysis of paired scRNA-seq and protein (CITE-seq) data enabling multi-modal cell state representation across single-cell datasets.", "stars": "1.6k"}, {"name": "MultiVI", "url": "https://github.com/scverse/scvi-tools", "description": "Multi-modal variational autoencoder for integrating paired and unpaired single-cell RNA-seq and ATAC-seq measurements into a unified latent space.", "stars": "1.6k"}, {"name": "MIRA", "url": "https://github.com/cistrome/MIRA", "description": "Probabilistic multimodal topic model jointly modeling single-cell transcriptomics and chromatin accessibility for regulatory network inference.", "stars": "68"}, {"name": "GLUE", "url": "https://github.com/gao-lab/GLUE", "description": "Graph-Linked Unified Embedding framework for unpaired single-cell multi-omics data integration across RNA, ATAC, methylation, and protein modalities.", "stars": "458"}, {"name": "BABEL", "url": "https://github.com/wukevin/babel", "description": "Cross-modality translation model enabling prediction between scRNA-seq and scATAC-seq profiles without requiring paired single-cell measurements.", "stars": "48"}, {"name": "Multigrate", "url": "https://github.com/theislab/multigrate", "description": "Asymmetric multi-omics variational autoencoder for integrating single-cell data across RNA, ATAC, and protein modalities with missing-modality support.", "stars": "32"}, {"name": "MOFA+", "url": "https://github.com/bioFAM/MOFA2", "description": "Multi-Omics Factor Analysis framework identifying shared axes of variation across bulk and single-cell datasets including RNA, ATAC, proteomics, methylation, and copy number.", "stars": "391"}, {"name": "GeneCompass", "url": "https://github.com/xCompass-AI/GeneCompass", "description": "Large-scale foundation model integrating DNA regulatory sequences and single-cell transcriptomics from 120M+ cells across multiple species for gene regulation prediction.", "stars": "111"}, {"name": "UnitedNet", "url": "https://github.com/LiuLab-Bioelectronics-Harvard/UnitedNet", "description": "Interpretable multi-task deep neural network for single-cell multi-omics integration spanning transcriptomics, chromatin accessibility, and proteomics.", "stars": "52"}, {"name": "SpatialGlue", "url": "https://github.com/zhanglabtools/SpatialGlue", "description": "Graph attention network for spatial multi-omics integration jointly embedding spatial transcriptomics with chromatin accessibility or proteomics."}, {"name": "MIDAS", "url": "https://github.com/labomics/midas", "description": "Mosaic integration and differential accessibility model for single-cell multi-omics data that handles arbitrary missing-modality combinations across transcriptomics, chromatin accessibility, and proteomics.", "stars": "63"}, {"name": "scArches", "url": "https://github.com/theislab/scarches", "description": "Transfer learning framework for mapping new single-cell datasets onto pre-trained reference atlases across batches, conditions, and modalities.", "stars": "401"}, {"name": "TOSICA", "url": "https://github.com/JackieHanlaopo/TOSICA", "description": "Transformer-based framework for one-stop interpretable cell-type annotation supporting cross-dataset and cross-species transfer."}, {"name": "Evolutionary Scale Modeling (ESM)", "url": "https://github.com/facebookresearch/esm", "description": "Protein embeddings.", "stars": "4k"}, {"name": "ChemBERTa-2", "url": "https://github.com/seyonechithrananda/bert-loves-chemistry", "description": "Chemical embeddings & prediction.", "stars": "488"}, {"name": "ProtTrans", "url": "https://github.com/agemagician/ProtTrans", "description": "Suite of protein language models (ProtBERT, ProtT5, ProtXLNet) trained on billions of protein sequences from UniRef and BFD.", "stars": "1.3k"}, {"name": "ProGen2", "url": "https://github.com/salesforce/progen", "description": "Protein language model trained on diverse protein families for sequence generation and fitness prediction.", "stars": "695"}, {"name": "Ankh", "url": "https://github.com/agemagician/Ankh", "description": "Efficient protein language model optimized for downstream prediction tasks including secondary structure, localization, and function annotation.", "stars": "244"}, {"name": "AlphaFold3", "url": "https://github.com/google-deepmind/alphafold3", "description": "Predicts structures of proteins, nucleic acids, small molecules, and their complexes.", "stars": "7.8k"}, {"name": "Boltz-1", "url": "https://github.com/jwohlwend/boltz", "description": "Open-source all-atom biomolecular structure prediction model for proteins, nucleic acids, small molecules, and their complexes achieving AlphaFold3-level accuracy.", "stars": "3.9k"}, {"name": "Chai-1", "url": "https://github.com/chaidiscovery/chai-lab", "description": "Unified molecular structure prediction model covering proteins, nucleic acids, small molecules, and complexes.", "stars": "1.9k"}, {"name": "ESM3", "url": "https://github.com/evolutionaryscale/esm", "description": "Multimodal protein language model that jointly reasons over sequence, structure, and function for generative protein design and engineering.", "stars": "2.3k"}, {"name": "ESMFold", "url": "https://github.com/facebookresearch/esm", "description": "Fast protein structure prediction using language model embeddings.", "stars": "4k"}, {"name": "RFdiffusion", "url": "https://github.com/RosettaCommons/RFdiffusion", "description": "Generative model for protein backbone design using diffusion.", "stars": "2.8k"}, {"name": "ProteinMPNN", "url": "https://github.com/dauparas/ProteinMPNN", "description": "Deep learning model for protein sequence design given backbone structure.", "stars": "1.7k"}, {"name": "OmegaFold", "url": "https://github.com/HeliXonProtein/OmegaFold", "description": "High-resolution de novo protein structure prediction from sequence.", "stars": "616"}, {"name": "RoseTTAFold", "url": "https://github.com/RosettaCommons/RoseTTAFold", "description": "Three-track neural network for protein structure prediction.", "stars": "2.2k"}, {"name": "OpenFold", "url": "https://github.com/aqlaboratory/openfold", "description": "Trainable, memory-efficient open-source reproduction of AlphaFold2 enabling custom protein structure prediction workflows.", "stars": "3.3k"}, {"name": "SaProt", "url": "https://github.com/westlake-reup/SaProt", "description": "Structure-aware protein language model using structure-aware tokens that encode both sequence and backbone geometry for improved function prediction."}, {"name": "EvoDiff", "url": "https://github.com/microsoft/evodiff", "description": "Discrete diffusion framework for protein sequence generation trained on evolutionary-scale data, supporting unconditional generation, disordered region design, and functional motif scaffolding. \\[ [paper-2023](https://www.biorxiv.org/content/10.1101/2023.09.11.556673v1) ]", "stars": "664"}, {"name": "CHIEF", "url": "https://github.com/hms-dbmi/CHIEF", "description": "Clinical Histopathology Imaging Evaluation Foundation model integrating histology images and clinical context for pan-cancer analysis.", "stars": "698"}, {"name": "BiomedCLIP", "url": "https://huggingface.co/microsoft/BiomedCLIP-PubMedBERT_256-vit_g_14", "description": "CLIP-based vision-language foundation model for biomedical images and text trained on PubMed figure\u2013caption pairs."}, {"name": "Nucleotide Transformer", "url": "https://github.com/instadeepai/nucleotide-transformer", "description": "Foundation model for genomic sequences across multiple species.", "stars": "847"}, {"name": "DNABERT", "url": "https://github.com/jerryji1993/DNABERT", "description": "Pre-trained bidirectional encoder for DNA sequence analysis.", "stars": "746"}, {"name": "DNABERT-2", "url": "https://github.com/Zhihan1996/DNABERT_2", "description": "Improved genome foundation model with efficient tokenization.", "stars": "469"}, {"name": "Enformer", "url": "https://github.com/deepmind/deepmind-research/tree/master/enformer", "description": "Transformer model predicting gene expression from DNA sequence.", "stars": "15k"}, {"name": "Basenji", "url": "https://github.com/calico/basenji", "description": "Sequential regulatory activity prediction from DNA sequences.", "stars": "467"}, {"name": "Caduceus", "url": "https://github.com/kuleshov-group/caduceus", "description": "Bidirectional equivariant long-range DNA sequence model based on Mamba.", "stars": "230"}, {"name": "Evo", "url": "https://github.com/evo-design/evo", "description": "Long-context genomic foundation model (up to 1M tokens).", "stars": "1.5k"}, {"name": "HyenaDNA", "url": "https://github.com/HazyResearch/hyena-dna", "description": "Long-range genomic foundation model handling sequences up to 1M tokens with sub-quadratic attention.", "stars": "772"}, {"name": "Borzoi", "url": "https://github.com/calico/borzoi", "description": "Extended successor to Enformer for predicting RNA-seq coverage from long genomic sequence windows (524 kb) with improved resolution.", "stars": "234"}, {"name": "DeepSEA", "url": "http://deepsea.princeton.edu/", "description": "Deep learning framework for predicting chromatin effects of sequence alterations with single-nucleotide sensitivity across thousands of chromatin features."}, {"name": "Sei", "url": "https://github.com/FunctionLab/sei-framework", "description": "Sequence-to-function framework learning a genome-wide regulatory activity code from DNA sequences for variant effect prediction.", "stars": "112"}, {"name": "GPN (Genomic Pre-trained Network)", "url": "https://github.com/songlab-cal/gpn", "description": "Masked language model for DNA sequences enabling zero-shot variant effect prediction without requiring functional annotations.", "stars": "335"}]}], "name": ""}