jaeswift-website/api/data/awesomelist/SE-ML--awesome-seml.json

1 line
No EOL
15 KiB
JSON

{"slug": "SE-ML--awesome-seml", "title": "Seml", "description": "A curated list of articles that cover the software engineering best practices for building machine learning applications.", "github_url": "https://github.com/SE-ML/awesome-seml", "stars": "1K", "tag": "Computer Science", "entry_count": 91, "subcategory_count": 1, "subcategories": [{"name": "General", "parent": "", "entries": [{"name": "Broad Overviews", "url": "#broad-overviews", "description": ""}, {"name": "Data Management", "url": "#data-management", "description": ""}, {"name": "Model Training", "url": "#model-training", "description": ""}, {"name": "Deployment and Operation", "url": "#deployment-and-operation", "description": ""}, {"name": "Social Aspects", "url": "#social-aspects", "description": ""}, {"name": "Governance", "url": "#governance", "description": ""}, {"name": "Tooling", "url": "#tooling", "description": ""}, {"name": "AI Engineering: 11 Foundational Practices", "url": "https://resources.sei.cmu.edu/asset_files/WhitePaper/2019_019_001_634648.pdf", "description": ""}, {"name": "Best Practices for Machine Learning Applications", "url": "https://pdfs.semanticscholar.org/2869/6212a4a204783e9dd3953f06e103c02c6972.pdf", "description": ""}, {"name": "Engineering Best Practices for Machine Learning", "url": "https://se-ml.github.io/practices/", "description": ""}, {"name": "Hidden Technical Debt in Machine Learning Systems", "url": "https://papers.nips.cc/paper/5656-hidden-technical-debt-in-machine-learning-systems.pdf", "description": ""}, {"name": "Rules of Machine Learning: Best Practices for ML Engineering", "url": "https://developers.google.com/machine-learning/guides/rules-of-ml", "description": ""}, {"name": "Software Engineering for Machine Learning: A Case Study", "url": "https://www.microsoft.com/en-us/research/publication/software-engineering-for-machine-learning-a-case-study/", "description": ""}, {"name": "A Survey on Data Collection for Machine Learning A Big Data - AI Integration Perspective\\_2019", "url": "https://deepai.org/publication/a-survey-on-data-collection-for-machine-learning-a-big-data-ai-integration-perspective", "description": ""}, {"name": "Automating Large-Scale Data Quality Verification", "url": "http://www.vldb.org/pvldb/vol11/p1781-schelter.pdf", "description": ""}, {"name": "Data management challenges in production machine learning", "url": "https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/46178.pdf", "description": ""}, {"name": "Data Validation for Machine Learning", "url": "https://mlsys.org/Conferences/2019/doc/2019/167.pdf", "description": ""}, {"name": "How to organize data labelling for ML", "url": "https://www.altexsoft.com/blognp/datascience/how-to-organize-data-labeling-for-machine-learning-approaches-and-tools/", "description": ""}, {"name": "The curse of big data labeling and three ways to solve it", "url": "https://aws.amazon.com/blogs/apn/the-curse-of-big-data-labeling-and-three-ways-to-solve-it/", "description": ""}, {"name": "The Data Linter: Lightweight, Automated Sanity Checking for ML Data Sets", "url": "http://learningsys.org/nips17/assets/papers/paper_19.pdf", "description": ""}, {"name": "The ultimate guide to data labeling for ML", "url": "https://www.cloudfactory.com/data-labeling-guide", "description": ""}, {"name": "10 Best Practices for Deep Learning", "url": "https://nanonets.com/blog/10-best-practices-deep-learning/#track-model-experiments", "description": ""}, {"name": "Apples-to-apples in cross-validation studies: pitfalls in classifier performance measurement", "url": "https://dl.acm.org/doi/abs/10.1145/1882471.1882479", "description": ""}, {"name": "Fairness On The Ground: Applying Algorithmic FairnessApproaches To Production Systems", "url": "https://scontent-amt2-1.xx.fbcdn.net/v/t39.8562-6/159714417_1180893265647073_4215201353052552221_n.pdf?_nc_cat=111\\&ccb=1-3&_nc_sid=ae5e01&_nc_ohc=6WFnNMmyp68AX95bRHk&_nc_ht=scontent-amt2-1.xx\\&oh=7a548f822e659b7bb2f58a511c30ee19\\&oe=606F33AD", "description": ""}, {"name": "How do you manage your Machine Learning Experiments?", "url": "https://medium.com/@hadyelsahar/how-do-you-manage-your-machine-learning-experiments-ab87508348ac", "description": ""}, {"name": "Machine Learning Testing: Survey, Landscapes and Horizons", "url": "https://arxiv.org/pdf/1906.10742.pdf", "description": ""}, {"name": "Nitpicking Machine Learning Technical Debt", "url": "https://matthewmcateer.me/blog/machine-learning-technical-debt/", "description": ""}, {"name": "On Comparing Classifiers: Pitfalls to Avoid and a Recommended Approach", "url": "https://link.springer.com/article/10.1023/A:1009752403260", "description": ""}, {"name": "On human intellect and machine failures: Troubleshooting integrative machine learning systems", "url": "https://arxiv.org/pdf/1611.08309.pdf", "description": ""}, {"name": "Pitfalls and Best Practices in Algorithm Configuration", "url": "https://www.jair.org/index.php/jair/article/download/11420/26488/", "description": ""}, {"name": "Pitfalls of supervised feature selection", "url": "https://academic.oup.com/bioinformatics/article/26/3/440/213774", "description": ""}, {"name": "Preparing and Architecting for Machine Learning", "url": "https://www.gartner.com/en/documents/3889770/preparing-and-architecting-for-machine-learning-2018-upd", "description": ""}, {"name": "Preliminary Systematic Literature Review of Machine Learning System Development Process", "url": "https://arxiv.org/abs/1910.05528", "description": ""}, {"name": "Software development best practices in a deep learning environment", "url": "https://towardsdatascience.com/software-development-best-practices-in-a-deep-learning-environment-a1769e9859b1", "description": ""}, {"name": "Testing and Debugging in Machine Learning", "url": "https://developers.google.com/machine-learning/testing-debugging", "description": ""}, {"name": "What Went Wrong and Why? Diagnosing Situated Interaction Failures in the Wild", "url": "https://www.microsoft.com/en-us/research/publication/what-went-wrong-and-why-diagnosing-situated-interaction-failures-in-the-wild/", "description": ""}, {"name": "Best Practices in Machine Learning Infrastructure", "url": "https://algorithmia.com/blog/best-practices-in-machine-learning-infrastructure", "description": ""}, {"name": "Building Continuous Integration Services for Machine Learning", "url": "http://pages.cs.wisc.edu/\\~wentaowu/papers/kdd20-ci-for-ml.pdf", "description": ""}, {"name": "Continuous Delivery for Machine Learning", "url": "https://martinfowler.com/articles/cd4ml.html", "description": ""}, {"name": "Continuous Training for Production ML in the TensorFlow Extended (TFX) Platform", "url": "https://www.usenix.org/system/files/opml19papers-baylor.pdf", "description": ""}, {"name": "Fairness Indicators: Scalable Infrastructure for Fair ML Systems", "url": "https://ai.googleblog.com/2019/12/fairness-indicators-scalable.html", "description": ""}, {"name": "Machine Learning Logistics", "url": "https://mapr.com/ebook/machine-learning-logistics/", "description": ""}, {"name": "Machine learning: Moving from experiments to production", "url": "https://blog.codecentric.de/en/2019/03/machine-learning-experiments-production/", "description": ""}, {"name": "ML Ops: Machine Learning as an engineered disciplined", "url": "https://towardsdatascience.com/ml-ops-machine-learning-as-an-engineering-discipline-b86ca4874a3f", "description": ""}, {"name": "Model Governance Reducing the Anarchy of Production", "url": "https://www.usenix.org/conference/atc18/presentation/sridhar", "description": ""}, {"name": "ModelOps: Cloud-based lifecycle management for reliable and trusted AI", "url": "http://hummer.io/docs/2019-ic2e-modelops.pdf", "description": ""}, {"name": "Operational Machine Learning", "url": "https://www.kdnuggets.com/2018/04/operational-machine-learning-successful-mlops.html", "description": ""}, {"name": "Scaling Machine Learning as a Service", "url": "http://proceedings.mlr.press/v67/li17a/li17a.pdf", "description": ""}, {"name": "TFX: A tensorflow-based Production-Scale ML Platform", "url": "https://dl.acm.org/doi/pdf/10.1145/3097983.3098021?download=true", "description": ""}, {"name": "The ML Test Score: A Rubric for ML Production Readiness and Technical Debt Reduction", "url": "https://research.google/pubs/pub46555/", "description": ""}, {"name": "Underspecification Presents Challenges for Credibility in Modern Machine Learning", "url": "https://arxiv.org/abs/2011.03395", "description": ""}, {"name": "Versioning for end-to-end machine learning pipelines", "url": "https://doi.org/10.1145/3076246.3076248", "description": ""}, {"name": "Data Scientists in Software Teams: State of the Art and Challenges", "url": "http://web.cs.ucla.edu/\\~miryung/Publications/tse2017-datascientists.pdf", "description": ""}, {"name": "Machine Learning Interviews", "url": "https://github.com/chiphuyen/machine-learning-systems-design/blob/master/build/build1/consolidated.pdf", "description": "", "stars": "7.4k"}, {"name": "Managing Machine Learning Projects", "url": "https://d1.awsstatic.com/whitepapers/aws-managing-ml-projects.pdf", "description": ""}, {"name": "Principled Machine Learning: Practices and Tools for Efficient Collaboration", "url": "https://dev.to/robogeek/principled-machine-learning-4eho", "description": ""}, {"name": "A Human-Centered Interpretability Framework Based on Weight of Evidence", "url": "https://arxiv.org/pdf/2104.13299.pdf", "description": ""}, {"name": "An Architectural Risk Analysis Of Machine Learning Systems", "url": "https://berryvilleiml.com/docs/ara.pdf", "description": ""}, {"name": "Beyond Debiasing", "url": "https://complexdiscovery.com/wp-content/uploads/2021/09/EDRi-Beyond-Debiasing-Report.pdf", "description": ""}, {"name": "Closing the AI Accountability Gap: Defining an End-to-End Framework for Internal Algorithmic Auditing", "url": "https://dl.acm.org/doi/pdf/10.1145/3351095.3372873", "description": ""}, {"name": "Inherent trade-offs in the fair determination of risk scores", "url": "https://arxiv.org/abs/1609.05807", "description": ""}, {"name": "Responsible AI practices", "url": "https://ai.google/responsibilities/responsible-ai-practices/", "description": ""}, {"name": "Toward Trustworthy AI Development: Mechanisms for Supporting Verifiable Claims", "url": "https://arxiv.org/abs/2004.07213", "description": ""}, {"name": "Understanding Software-2.0", "url": "https://dl.acm.org/doi/abs/10.1145/3453478", "description": ""}, {"name": "Aim", "url": "https://aimstack.io", "description": "Aim is an open source experiment tracking tool."}, {"name": "Airflow", "url": "https://airflow.apache.org/", "description": "Programmatically author, schedule and monitor workflows."}, {"name": "Alibi Detect", "url": "https://github.com/SeldonIO/alibi-detect", "description": "Python library focused on outlier, adversarial and drift detection.", "stars": "1.5k"}, {"name": "Archai", "url": "https://github.com/microsoft/archai", "description": "Neural architecture search.", "stars": "373"}, {"name": "Data Version Control (DVC)", "url": "https://dvc.org/", "description": "DVC is a data and ML experiments management tool."}, {"name": "Facets Overview / Facets Dive", "url": "https://pair-code.github.io/facets/", "description": "Robust visualizations to aid in understanding machine learning datasets."}, {"name": "FairLearn", "url": "https://fairlearn.github.io/", "description": "A toolkit to assess and improve the fairness of machine learning models."}, {"name": "Git Large File System (LFS)", "url": "https://git-lfs.github.com/", "description": "Replaces large files such as datasets with text pointers inside Git."}, {"name": "Great Expectations", "url": "https://github.com/great-expectations/great_expectations", "description": "Data validation and testing with integration in pipelines.", "stars": "7.4k"}, {"name": "HParams", "url": "https://github.com/PetrochukM/HParams", "description": "A thoughtful approach to configuration management for machine learning projects.", "stars": "126"}, {"name": "Kubeflow", "url": "https://www.kubeflow.org/", "description": "A platform for data scientists who want to build and experiment with ML pipelines."}, {"name": "Label Studio", "url": "https://github.com/heartexlabs/label-studio", "description": "A multi-type data labeling and annotation tool with standardized output format.", "stars": "11k"}, {"name": "LiFT", "url": "https://github.com/linkedin/LiFT", "description": "Linkedin fairness toolkit.", "stars": "159"}, {"name": "MLFlow", "url": "https://mlflow.org/", "description": "Manage the ML lifecycle, including experimentation, deployment, and a central model registry."}, {"name": "Model Card Toolkit", "url": "https://github.com/tensorflow/model-card-toolkit", "description": "Streamlines and automates the generation of model cards; for model documentation.", "stars": "314"}, {"name": "Neptune.ai", "url": "https://neptune.ai/", "description": "Experiment tracking tool bringing organization and collaboration to data science projects."}, {"name": "Neuraxle", "url": "https://github.com/Neuraxio/Neuraxle", "description": "Sklearn-like framework for hyperparameter tuning and AutoML in deep learning projects.", "stars": "543"}, {"name": "OpenML", "url": "https://www.openml.org", "description": "An inclusive movement to build an open, organized, online ecosystem for machine learning."}, {"name": "PyTorch Lightning", "url": "https://github.com/PyTorchLightning/pytorch-lightning", "description": "The lightweight PyTorch wrapper for high-performance AI research. Scale your models, not the boilerplate.", "stars": "20k"}, {"name": "REVISE: REvealing VIsual biaSEs", "url": "https://github.com/princetonvisualai/revise-tool", "description": "Automatically detect bias in visual data sets.", "stars": "91"}, {"name": "Robustness Metrics", "url": "https://github.com/google-research/robustness_metrics", "description": "Lightweight modules to evaluate the robustness of classification models.", "stars": "418"}, {"name": "Seldon Core", "url": "https://github.com/SeldonIO/seldon-core", "description": "An MLOps framework to package, deploy, monitor and manage thousands of production machine learning models on Kubernetes.", "stars": "3.4k"}, {"name": "Spark Machine Learning", "url": "https://spark.apache.org/mllib/", "description": "Spark\u2019s ML library consisting of common learning algorithms and utilities."}, {"name": "TensorBoard", "url": "https://www.tensorflow.org/tensorboard/", "description": "TensorFlow's Visualization Toolkit."}, {"name": "Tensorflow Extended (TFX)", "url": "https://www.tensorflow.org/tfx/", "description": "An end-to-end platform for deploying production ML pipelines."}, {"name": "Tensorflow Data Validation (TFDV)", "url": "https://github.com/tensorflow/data-validation", "description": "Library for exploring and validating machine learning data. Similar to Great Expectations, but for Tensorflow data.", "stars": "674"}, {"name": "Weights & Biases", "url": "https://www.wandb.com/", "description": "Experiment tracking, model optimization, and dataset versioning."}]}], "name": ""}