1 line
No EOL
11 KiB
JSON
1 line
No EOL
11 KiB
JSON
{"slug": "mmcgrana--services-engineering", "title": "Services Engineering", "description": "A reading list for services engineering, with a focus on cloud infrastructure services", "github_url": "https://github.com/mmcgrana/services-engineering", "stars": "3.4K", "tag": "Miscellaneous", "entry_count": 71, "subcategory_count": 1, "subcategories": [{"name": "Services Engineering Reading List", "parent": "", "entries": [{"name": "Fault Injection in Production", "url": "http://queue.acm.org/detail.cfm?id=2353017", "description": ""}, {"name": "Making Reliable Distributed Systems in the Presence of Software Errors", "url": "http://www.erlang.org/download/armstrong_thesis_2003.pdf", "description": ""}, {"name": "Highly Available Transactions: Virtues and Limitations", "url": "http://www.bailis.org/papers/hat-vldb2014.pdf", "description": ""}, {"name": "The Incident Command System", "url": "http://www.high-reliability.org/files/The_Incident_Command_System.pdf", "description": ""}, {"name": "The Chubby Lock Service for Loosely Coupled Distributed Systems", "url": "http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/archive/chubby-osdi06.pdf", "description": ""}, {"name": "Bigtable: a Distributed Storage System for Structured Data", "url": "http://www.read.seas.harvard.edu/\\~kohler/class/cs239-w08/chang06bigtable.pdf", "description": ""}, {"name": "Spanner: Google’s Globally-Distributed Database", "url": "http://research.google.com/archive/spanner-osdi2012.pdf", "description": ""}, {"name": "Dynamo: Amazon’s Highly Available Key-Value Store", "url": "http://www.read.seas.harvard.edu/\\~kohler/class/cs239-w08/decandia07dynamo.pdf", "description": ""}, {"name": "MapReduce: Simplified Data Processing on Large Clusters", "url": "http://research.google.com/archive/mapreduce-osdi04.pdf", "description": ""}, {"name": "The Google File System", "url": "http://research.google.com/archive/gfs-sosp2003.pdf", "description": ""}, {"name": "On Designing and Deploying Internet Scale Services", "url": "http://mvdirona.com/jrh/talksAndPapers/JamesRH_Lisa.pdf", "description": ""}, {"name": "Kafka: A Distributed Messaging System for Log Processing", "url": "http://research.microsoft.com/en-us/UM/people/srikanth/netdb11/netdb11papers/netdb11-final12.pdf", "description": ""}, {"name": "Weathering the Unexpected", "url": "http://queue.acm.org/detail.cfm?id=2371516", "description": ""}, {"name": "The Unified Logging Infrastructure for Data Analytics at Twitter", "url": "http://vldb.org/pvldb/vol5/p1771_georgelee_vldb2012.pdf", "description": ""}, {"name": "Automatic Management of Partitioned, Replicated Search Services", "url": "http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.222.1862\\&rep=rep1\\&type=pdf", "description": ""}, {"name": "Learning to Embrace Failure", "url": "http://best.dtu.dk/SC13/p20-casestudy.pdf", "description": ""}, {"name": "Scaling Big Data Mining Infrastructure: The Twitter Experience", "url": "http://www.kdd.org/sites/default/files/issues/14-2-2012-12/V14-02-02-Lin.pdf", "description": ""}, {"name": "Dremel: Interactive Analysis of Web-Scale Datasets", "url": "http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/pubs/archive/36632.pdf", "description": ""}, {"name": "Out of the Tar Pit", "url": "http://shaffner.us/cs/papers/tarpit.pdf", "description": ""}, {"name": "The Log-Structured Merge-Tree", "url": "http://www.cs.umb.edu/\\~poneil/lsmtree.pdf", "description": ""}, {"name": "In Search of an Understandable Consensus Algorithm", "url": "https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf", "description": ""}, {"name": "Failure Trends in a Large Disk Drive Population", "url": "http://static.googleusercontent.com/external_content/untrusted_dlcp/research.google.com/en/us/archive/disk_failures.pdf", "description": ""}, {"name": "Fallacies of Distributed Computing Explained", "url": "http://www.rgoarchitects.com/Files/fallacies.pdf", "description": ""}, {"name": "F1 - The Fault-Tolerant Distributed RDBMS Supporting Google’s Ad Business", "url": "http://research.google.com/pubs/archive/38125.pdf", "description": ""}, {"name": "Dapper, A Large Scale Distributed Systems Tracing Infrastructure", "url": "http://research.google.com/pubs/archive/36356.pdf", "description": ""}, {"name": "Resident Distributed Datasets: a Fault-Tolerant Abstraction for In-Memory Cluster Computing", "url": "https://www.usenix.org/system/files/conference/nsdi12/nsdi12-final138.pdf", "description": ""}, {"name": "The Human Side of Postmortems", "url": "https://docs.google.com/file/d/0Byl4UKRYLErDVlJMNDNjaThiR2M/edit", "description": ""}, {"name": "Crew Resource Management: a Positive Change for the Fire Service", "url": "http://www.iaff.org/06news/NearMissKit/6.%20Crew%20Resource%20Management/CRM.pdf", "description": ""}, {"name": "Resilience Engineering: Part I", "url": "http://www.kitchensoap.com/2011/04/07/resilience-engineering-part-i/", "description": ""}, {"name": "Systems Engineering: a Great Definition", "url": "http://www.kitchensoap.com/2011/07/18/systems-engineering-great-definition/", "description": ""}, {"name": "Chaos Monkey Released Into The Wild", "url": "http://techblog.netflix.com/2012/07/chaos-monkey-released-into-wild.html", "description": ""}, {"name": "Some Rules for Engineering and Operations", "url": "http://blog.b3k.us/2012/01/24/some-rules.html", "description": ""}, {"name": "Service Level Disagreements Part I", "url": "http://blog.b3k.us/2009/07/15/service-level-disagreements.html", "description": ""}, {"name": "Incuriosity Will Kill Your Infrastructure", "url": "http://yellerapp.com/posts/2015-03-16-incuriosity-killed-the-infrastructure.html", "description": ""}, {"name": "My Philosophy on Alerting", "url": "https://docs.google.com/document/d/199PqyG3UsyXlwieHaqbGiWVa8eMWi8zzAn0YfcApr8Q/edit#heading=h.whsaboyw21nk", "description": ""}, {"name": "You Can’t Sacrifice Partition Tolerance", "url": "http://codahale.com/you-cant-sacrifice-partition-tolerance/", "description": ""}, {"name": "Customer Trust", "url": "http://perspectives.mvdirona.com/2013/01/15/CustomerTrust.aspx", "description": ""}, {"name": "Observations on Errors, Corrections, & Trust of Dependent Systems", "url": "http://perspectives.mvdirona.com/2012/02/26/ObservationsOnErrorsCorrectionsTrustOfDependentSystems.aspx", "description": ""}, {"name": "Game Day Exercises at Stripe: Learning from `kill -9`", "url": "https://stripe.com/blog/game-day-exercises-at-stripe", "description": ""}, {"name": "Life Beyond Distributed Transactions: An Apostate’s Opinion", "url": "http://cs.brown.edu/courses/cs227/archives/2012/papers/weaker/cidr07p15.pdf", "description": ""}, {"name": "Notes on Distributed Systems for Young Bloods", "url": "http://www.somethingsimilar.com/2013/01/14/notes-on-distributed-systems-for-young-bloods/", "description": ""}, {"name": "The Network is Reliable", "url": "http://aphyr.com/posts/288-the-network-is-reliable", "description": ""}, {"name": "The Trouble with Clocks", "url": "http://aphyr.com/posts/299-the-trouble-with-timestamps", "description": ""}, {"name": "Call Me Maybe: Final Thoughts", "url": "http://aphyr.com/posts/286-call-me-maybe-final-thoughts", "description": ""}, {"name": "Getting Real About Distributed Systems Reliability", "url": "http://blog.empathybox.com/post/19574936361/getting-real-about-distributed-system-reliability", "description": ""}, {"name": "The Log: What every software engineer should know about real-time data's unifying abstraction", "url": "http://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying", "description": ""}, {"name": "Incident Response at Heroku", "url": "https://blog.heroku.com/archives/2014/5/9/incident-response-at-heroku", "description": ""}, {"name": "On HTTP Load Testing", "url": "http://www.mnot.net/blog/2011/05/18/http_benchmark_rules", "description": ""}, {"name": "Observability at Twitter", "url": "https://blog.twitter.com/2013/observability-at-twitter", "description": ""}, {"name": "Stevey’s Google Platforms Rant", "url": "https://gist.github.com/chitchcock/1281611", "description": ""}, {"name": "Design, Lessons, and Advice from Building Distributed Systems at Google", "url": "http://www.cs.cornell.edu/projects/ladis2009/talks/dean-keynote-ladis2009.pdf", "description": ""}, {"name": "Service Design Best Practices", "url": "http://www.mvdirona.com/jrh/TalksAndPapers/JamesHamilton_POA20090226.pdf", "description": ""}, {"name": "The Field Guide To Understanding Human Error", "url": "http://www.amazon.com/Field-Guide-Understanding-Human-Error/dp/0754648265", "description": ""}, {"name": "Agile Retrospectives: Making Good Teams Great", "url": "http://www.amazon.com/Agile-Retrospectives-Making-Teams-Great/dp/0977616649", "description": ""}, {"name": "Better: A Surgeon’s Notes on Performance", "url": "http://www.amazon.com/dp/0312427654", "description": ""}, {"name": "The Checklist Manifesto: How to Get Things Right", "url": "http://www.amazon.com/The-Checklist-Manifesto-ebook/dp/B0030V0PEW", "description": ""}, {"name": "High Performance Browser Networking", "url": "http://chimera.labs.oreilly.com/books/1230000000545/index.html", "description": ""}, {"name": "Resilience Engineering in Practice", "url": "http://www.amazon.com/Resilience-Engineering-Practice-Ashgate-Studies/dp/1409410358/", "description": ""}, {"name": "Effective Monitoring and Alerting", "url": "http://www.amazon.com/Effective-Monitoring-Alerting-For-Operations/dp/1449333524", "description": ""}, {"name": "Release It!: Design and Deploy Production-Ready Software", "url": "http://www.amazon.com/Release-It-Production-Ready-Pragmatic-Programmers/dp/0978739213", "description": ""}, {"name": "The Challenger Launch Decision", "url": "http://www.amazon.com/The-Challenger-Launch-Decision-Technology/dp/0226851761", "description": ""}, {"name": "Managing the Unexpected", "url": "http://www.amazon.com/gp/product/B004IK9U4U", "description": ""}, {"name": "Berkley AMP Lab", "url": "https://amplab.cs.berkeley.edu/", "description": ""}, {"name": "Berkeley Database Group", "url": "http://db.cs.berkeley.edu/w/", "description": ""}, {"name": "Google Research", "url": "http://research.google.com/", "description": ""}, {"name": "Microsoft Systems Research", "url": "http://research.microsoft.com/en-US/groups/sr/default.aspx", "description": ""}, {"name": "Twitter Research", "url": "https://engineering.twitter.com/research", "description": ""}, {"name": "Monitorama", "url": "http://monitorama.com/", "description": ""}, {"name": "Ricon", "url": "http://ricon.io/", "description": ""}, {"name": "Surge", "url": "http://surge.omniti.com/", "description": ""}, {"name": "Velocity", "url": "http://velocityconf.com/", "description": ""}]}]} |