jaeswift-website/api/data/awesomelist/mmcgrana--services-engineering.json

1 line
No EOL
11 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{"slug": "mmcgrana--services-engineering", "title": "Services Engineering", "description": "A reading list for services engineering, with a focus on cloud infrastructure services", "github_url": "https://github.com/mmcgrana/services-engineering", "stars": "3.4K", "tag": "Miscellaneous", "entry_count": 71, "subcategory_count": 1, "subcategories": [{"name": "Services Engineering Reading List", "parent": "", "entries": [{"name": "Fault Injection in Production", "url": "http://queue.acm.org/detail.cfm?id=2353017", "description": ""}, {"name": "Making Reliable Distributed Systems in the Presence of Software Errors", "url": "http://www.erlang.org/download/armstrong_thesis_2003.pdf", "description": ""}, {"name": "Highly Available Transactions: Virtues and Limitations", "url": "http://www.bailis.org/papers/hat-vldb2014.pdf", "description": ""}, {"name": "The Incident Command System", "url": "http://www.high-reliability.org/files/The_Incident_Command_System.pdf", "description": ""}, {"name": "The Chubby Lock Service for Loosely Coupled Distributed Systems", "url": "http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/archive/chubby-osdi06.pdf", "description": ""}, {"name": "Bigtable: a Distributed Storage System for Structured Data", "url": "http://www.read.seas.harvard.edu/\\~kohler/class/cs239-w08/chang06bigtable.pdf", "description": ""}, {"name": "Spanner: Googles Globally-Distributed Database", "url": "http://research.google.com/archive/spanner-osdi2012.pdf", "description": ""}, {"name": "Dynamo: Amazons Highly Available Key-Value Store", "url": "http://www.read.seas.harvard.edu/\\~kohler/class/cs239-w08/decandia07dynamo.pdf", "description": ""}, {"name": "MapReduce: Simplified Data Processing on Large Clusters", "url": "http://research.google.com/archive/mapreduce-osdi04.pdf", "description": ""}, {"name": "The Google File System", "url": "http://research.google.com/archive/gfs-sosp2003.pdf", "description": ""}, {"name": "On Designing and Deploying Internet Scale Services", "url": "http://mvdirona.com/jrh/talksAndPapers/JamesRH_Lisa.pdf", "description": ""}, {"name": "Kafka: A Distributed Messaging System for Log Processing", "url": "http://research.microsoft.com/en-us/UM/people/srikanth/netdb11/netdb11papers/netdb11-final12.pdf", "description": ""}, {"name": "Weathering the Unexpected", "url": "http://queue.acm.org/detail.cfm?id=2371516", "description": ""}, {"name": "The Unified Logging Infrastructure for Data Analytics at Twitter", "url": "http://vldb.org/pvldb/vol5/p1771_georgelee_vldb2012.pdf", "description": ""}, {"name": "Automatic Management of Partitioned, Replicated Search Services", "url": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.222.1862\\&rep=rep1\\&type=pdf", "description": ""}, {"name": "Learning to Embrace Failure", "url": "http://best.dtu.dk/SC13/p20-casestudy.pdf", "description": ""}, {"name": "Scaling Big Data Mining Infrastructure: The Twitter Experience", "url": "http://www.kdd.org/sites/default/files/issues/14-2-2012-12/V14-02-02-Lin.pdf", "description": ""}, {"name": "Dremel: Interactive Analysis of Web-Scale Datasets", "url": "http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/pubs/archive/36632.pdf", "description": ""}, {"name": "Out of the Tar Pit", "url": "http://shaffner.us/cs/papers/tarpit.pdf", "description": ""}, {"name": "The Log-Structured Merge-Tree", "url": "http://www.cs.umb.edu/\\~poneil/lsmtree.pdf", "description": ""}, {"name": "In Search of an Understandable Consensus Algorithm", "url": "https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf", "description": ""}, {"name": "Failure Trends in a Large Disk Drive Population", "url": "http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/archive/disk_failures.pdf", "description": ""}, {"name": "Fallacies of Distributed Computing Explained", "url": "http://www.rgoarchitects.com/Files/fallacies.pdf", "description": ""}, {"name": "F1 - The Fault-Tolerant Distributed RDBMS Supporting Googles Ad Business", "url": "http://research.google.com/pubs/archive/38125.pdf", "description": ""}, {"name": "Dapper, A Large Scale Distributed Systems Tracing Infrastructure", "url": "http://research.google.com/pubs/archive/36356.pdf", "description": ""}, {"name": "Resident Distributed Datasets: a Fault-Tolerant Abstraction for In-Memory Cluster Computing", "url": "https://www.usenix.org/system/files/conference/nsdi12/nsdi12-final138.pdf", "description": ""}, {"name": "The Human Side of Postmortems", "url": "https://docs.google.com/file/d/0Byl4UKRYLErDVlJMNDNjaThiR2M/edit", "description": ""}, {"name": "Crew Resource Management: a Positive Change for the Fire Service", "url": "http://www.iaff.org/06news/NearMissKit/6.%20Crew%20Resource%20Management/CRM.pdf", "description": ""}, {"name": "Resilience Engineering: Part I", "url": "http://www.kitchensoap.com/2011/04/07/resilience-engineering-part-i/", "description": ""}, {"name": "Systems Engineering: a Great Definition", "url": "http://www.kitchensoap.com/2011/07/18/systems-engineering-great-definition/", "description": ""}, {"name": "Chaos Monkey Released Into The Wild", "url": "http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html", "description": ""}, {"name": "Some Rules for Engineering and Operations", "url": "http://blog.b3k.us/2012/01/24/some-rules.html", "description": ""}, {"name": "Service Level Disagreements Part I", "url": "http://blog.b3k.us/2009/07/15/service-level-disagreements.html", "description": ""}, {"name": "Incuriosity Will Kill Your Infrastructure", "url": "http://yellerapp.com/posts/2015-03-16-incuriosity-killed-the-infrastructure.html", "description": ""}, {"name": "My Philosophy on Alerting", "url": "https://docs.google.com/document/d/199PqyG3UsyXlwieHaqbGiWVa8eMWi8zzAn0YfcApr8Q/edit#heading=h.whsaboyw21nk", "description": ""}, {"name": "You Cant Sacrifice Partition Tolerance", "url": "http://codahale.com/you-cant-sacrifice-partition-tolerance/", "description": ""}, {"name": "Customer Trust", "url": "http://perspectives.mvdirona.com/2013/01/15/CustomerTrust.aspx", "description": ""}, {"name": "Observations on Errors, Corrections, & Trust of Dependent Systems", "url": "http://perspectives.mvdirona.com/2012/02/26/ObservationsOnErrorsCorrectionsTrustOfDependentSystems.aspx", "description": ""}, {"name": "Game Day Exercises at Stripe: Learning from `kill -9`", "url": "https://stripe.com/blog/game-day-exercises-at-stripe", "description": ""}, {"name": "Life Beyond Distributed Transactions: An Apostates Opinion", "url": "http://cs.brown.edu/courses/cs227/archives/2012/papers/weaker/cidr07p15.pdf", "description": ""}, {"name": "Notes on Distributed Systems for Young Bloods", "url": "http://www.somethingsimilar.com/2013/01/14/notes-on-distributed-systems-for-young-bloods/", "description": ""}, {"name": "The Network is Reliable", "url": "http://aphyr.com/posts/288-the-network-is-reliable", "description": ""}, {"name": "The Trouble with Clocks", "url": "http://aphyr.com/posts/299-the-trouble-with-timestamps", "description": ""}, {"name": "Call Me Maybe: Final Thoughts", "url": "http://aphyr.com/posts/286-call-me-maybe-final-thoughts", "description": ""}, {"name": "Getting Real About Distributed Systems Reliability", "url": "http://blog.empathybox.com/post/19574936361/getting-real-about-distributed-system-reliability", "description": ""}, {"name": "The Log: What every software engineer should know about real-time data's unifying abstraction", "url": "http://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying", "description": ""}, {"name": "Incident Response at Heroku", "url": "https://blog.heroku.com/archives/2014/5/9/incident-response-at-heroku", "description": ""}, {"name": "On HTTP Load Testing", "url": "http://www.mnot.net/blog/2011/05/18/http_benchmark_rules", "description": ""}, {"name": "Observability at Twitter", "url": "https://blog.twitter.com/2013/observability-at-twitter", "description": ""}, {"name": "Steveys Google Platforms Rant", "url": "https://gist.github.com/chitchcock/1281611", "description": ""}, {"name": "Design, Lessons, and Advice from Building Distributed Systems at Google", "url": "http://www.cs.cornell.edu/projects/ladis2009/talks/dean-keynote-ladis2009.pdf", "description": ""}, {"name": "Service Design Best Practices", "url": "http://www.mvdirona.com/jrh/TalksAndPapers/JamesHamilton_POA20090226.pdf", "description": ""}, {"name": "The Field Guide To Understanding Human Error", "url": "http://www.amazon.com/Field-Guide-Understanding-Human-Error/dp/0754648265", "description": ""}, {"name": "Agile Retrospectives: Making Good Teams Great", "url": "http://www.amazon.com/Agile-Retrospectives-Making-Teams-Great/dp/0977616649", "description": ""}, {"name": "Better: A Surgeons Notes on Performance", "url": "http://www.amazon.com/dp/0312427654", "description": ""}, {"name": "The Checklist Manifesto: How to Get Things Right", "url": "http://www.amazon.com/The-Checklist-Manifesto-ebook/dp/B0030V0PEW", "description": ""}, {"name": "High Performance Browser Networking", "url": "http://chimera.labs.oreilly.com/books/1230000000545/index.html", "description": ""}, {"name": "Resilience Engineering in Practice", "url": "http://www.amazon.com/Resilience-Engineering-Practice-Ashgate-Studies/dp/1409410358/", "description": ""}, {"name": "Effective Monitoring and Alerting", "url": "http://www.amazon.com/Effective-Monitoring-Alerting-For-Operations/dp/1449333524", "description": ""}, {"name": "Release It!: Design and Deploy Production-Ready Software", "url": "http://www.amazon.com/Release-It-Production-Ready-Pragmatic-Programmers/dp/0978739213", "description": ""}, {"name": "The Challenger Launch Decision", "url": "http://www.amazon.com/The-Challenger-Launch-Decision-Technology/dp/0226851761", "description": ""}, {"name": "Managing the Unexpected", "url": "http://www.amazon.com/gp/product/B004IK9U4U", "description": ""}, {"name": "Berkley AMP Lab", "url": "https://amplab.cs.berkeley.edu/", "description": ""}, {"name": "Berkeley Database Group", "url": "http://db.cs.berkeley.edu/w/", "description": ""}, {"name": "Google Research", "url": "http://research.google.com/", "description": ""}, {"name": "Microsoft Systems Research", "url": "http://research.microsoft.com/en-US/groups/sr/default.aspx", "description": ""}, {"name": "Twitter Research", "url": "https://engineering.twitter.com/research", "description": ""}, {"name": "Monitorama", "url": "http://monitorama.com/", "description": ""}, {"name": "Ricon", "url": "http://ricon.io/", "description": ""}, {"name": "Surge", "url": "http://surge.omniti.com/", "description": ""}, {"name": "Velocity", "url": "http://velocityconf.com/", "description": ""}]}]}