1 line
No EOL
13 KiB
JSON
1 line
No EOL
13 KiB
JSON
{"slug": "theimpossibleastronaut--awesome-linguistics", "title": "Awesome Linguistics", "description": "A curated list of anything remotely related to linguistics", "github_url": "https://github.com/theimpossibleastronaut/awesome-linguistics", "stars": "390", "tag": "Computer Science", "entry_count": 84, "subcategory_count": 13, "subcategories": [{"name": "Awesome Linguistics", "parent": "", "entries": [{"name": "Programming", "url": "#programming", "description": ""}, {"name": "Resources", "url": "#resources", "description": ""}, {"name": "Standards", "url": "#standards", "description": ""}, {"name": "Lists", "url": "#lists", "description": ""}, {"name": "Communities", "url": "#communities", "description": ""}]}, {"name": "Programming", "parent": "", "entries": []}, {"name": "Platforms and toolkits", "parent": "", "entries": [{"name": "CLARIN-D web tools", "url": "https://www.clarin-d.net/en/analysing", "description": "Tools for Analysing Research Data"}, {"name": "CorpusExplorer", "url": "https://notes.jan-oliver-ruediger.de/software/corpusexplorer-overview/", "description": "Software for corpus linguists and text/data mining enthusiasts. The CorpusExplorer combines over 50 interactive visualizations under a user-friendly interface."}, {"name": "Haxe-linguistics", "url": "https://github.com/sexybiggetje/haxe-linguistics", "description": "Early linguistical analysis and natural language processing library for Haxe.", "stars": "26"}, {"name": "Natural", "url": "https://github.com/NaturalNode/natural", "description": "General natural language tools for Node.js.", "stars": "11k"}, {"name": "Natural Language ToolKit (NLTK)", "url": "http://www.nltk.org/", "description": "The most complete platform for building Python programs to work with human language data."}, {"name": "Snowball", "url": "https://snowballstem.org/", "description": "Snowball is a language in which stemming algorithms can be easily represented."}, {"name": "Spacy", "url": "https://spacy.io/", "description": "Industrial-strength National Language Processing in Python."}, {"name": "Mate Tools", "url": "http://hdl.handle.net/11022/1007-0000-0000-8E4E-A", "description": ""}, {"name": "UBIAI", "url": "https://ubiai.tools/", "description": "Easy-to-use text annotation tool for teams with most comprehensive auto-annotation features. Supports NER, relations and document classification as well as OCR annotation for invoice labeling."}, {"name": "textblob-de", "url": "https://github.com/markuskiller/textblob-de", "description": "Nice alternative for spacy (see above).", "stars": "103"}, {"name": "tyo", "url": "https://github.com/mongsvo/tyo", "description": "A utility for finding Typo-Bridges.", "stars": "0"}, {"name": "UralicNLP", "url": "https://github.com/mikahama/uralicNLP", "description": "An open source Python library for processing morphologically rich and, for the most part, endangered Uralic languages. It can do morphological analysis, generation, lemmatization, disambiguation and lexical lookup for a great many Uralic languages.", "stars": "74"}]}, {"name": "Algorithms", "parent": "", "entries": [{"name": "Stemming algorithms for various European languages", "url": "http://snowball.tartarus.org/texts/stemmersoverview.html", "description": "Various stemming algorithms from snowball."}, {"name": "The Porter Stemmer Algorithm", "url": "http://tartarus.org/martin/PorterStemmer/", "description": "The ‘official’ home page for distribution of the Porter Stemming Algorithm, written and maintained by its author, Martin Porter."}]}, {"name": "Data sets", "parent": "", "entries": [{"name": "EuroRomCom Data", "url": "https://github.com/kirkins/euroromcom", "description": "JSON formatted Pan-Romance word lists.", "stars": "22"}, {"name": "Araneum Germanicum", "url": "http://aranea.juls.savba.sk/aranea_about/_germanicum.html", "description": ""}, {"name": "CEHugeWebCorpus", "url": "https://lindat.mff.cuni.cz/repository/xmlui/handle/11372/LRT-2638", "description": "German corpus based on CommonCrawl"}, {"name": "Digitales Wörterbuch der deutschen Sprache (DWDS)", "url": "https://dwds.de", "description": ""}, {"name": "GC4 Corpus", "url": "https://german-nlp-group.github.io/projects/gc4-corpus.html", "description": ""}, {"name": "IDS Corpora", "url": "https://www1.ids-mannheim.de/kl/projekte/korpora", "description": "German Reference Corpus"}, {"name": "Leipzig Corpora Collection", "url": "https://wortschatz.uni-leipzig.de/en/download/", "description": "sampled sentences in different languages."}, {"name": "SdeWaC", "url": "https://www.ims.uni-stuttgart.de/forschung/ressourcen/korpora/sdewac.en.html", "description": "big german internet corpus"}, {"name": "C-WEP", "url": "http://lingured.info/linguistic-resources/cwep/", "description": ""}, {"name": "DysList (list of dyslexic errors)", "url": "https://github.com/Rauschii/DysListGerman", "description": "", "stars": "5"}, {"name": "Falko", "url": "https://www.linguistik.hu-berlin.de/de/institut/professuren/korpuslinguistik/forschung/falko", "description": ""}, {"name": "Litkey", "url": "https://www.linguistics.ruhr-uni-bochum.de/litkeycorpus/", "description": ""}, {"name": "OpinionSpam", "url": "https://github.com/hdaSprachtechnologie/OpinionSpam", "description": "", "stars": "2"}]}, {"name": "Resources", "parent": "", "entries": [{"name": "Low Resource Languages", "url": "https://github.com/RIchardLitt/low-resource-languages", "description": "A list of resources for conservation, development, and documentation of low resource (human) languages.", "stars": "405"}, {"name": "Language Science Press", "url": "https://langsci-press.org/", "description": "Language Science Press is a born-digital scholar-led open access publisher in linguistics."}]}, {"name": "Deep learning models and transformers", "parent": "", "entries": [{"name": "dbmdz BERT models", "url": "https://github.com/dbmdz/berts", "description": "", "stars": "156"}, {"name": "Deepset German BERT model", "url": "https://deepset.ai/german-bert", "description": ""}, {"name": "Evaluating German Transformer Language Models with Syntactic Agreement Tests", "url": "https://github.com/DFKI-NLP/gevalm", "description": "", "stars": "7"}, {"name": "German ELMo Model", "url": "https://github.com/t-systems-on-site-services-gmbh/german-elmo-model", "description": "", "stars": "28"}, {"name": "german-transformer-training", "url": "https://github.com/PhilipMay/german-transformer-training", "description": "", "stars": "23"}, {"name": "GermLM", "url": "https://github.com/tonianelope/Multilingual-BERT", "description": "", "stars": "14"}, {"name": "GerPT2", "url": "https://github.com/bminixhofer/gerpt2", "description": "", "stars": "20"}, {"name": "Sentence Transformers", "url": "https://github.com/UKPLab/sentence-transformers", "description": "", "stars": "16k"}]}, {"name": "On Wikipedia", "parent": "", "entries": [{"name": "Bag of words model", "url": "https://en.wikipedia.org/wiki/Bag-of-words_model", "description": ""}, {"name": "Document classification", "url": "https://en.wikipedia.org/wiki/Document_classification", "description": ""}, {"name": "Language models", "url": "https://en.wikipedia.org/wiki/Language_model", "description": ""}, {"name": "Naive Bayes classification", "url": "https://en.wikipedia.org/wiki/Naive_Bayes_classifier", "description": ""}, {"name": "Natural language processing", "url": "https://en.wikipedia.org/wiki/Natural_language_processing", "description": ""}, {"name": "Outline of natural language processing", "url": "https://en.wikipedia.org/wiki/Outline_of_natural_language_processing", "description": ""}, {"name": "Parts of speech tagging", "url": "https://en.wikipedia.org/wiki/Part-of-speech_tagging", "description": ""}, {"name": "Sentiment analysis", "url": "https://en.wikipedia.org/wiki/Sentiment_analysis", "description": ""}, {"name": "Term frequency - inverse document frequency", "url": "https://en.wikipedia.org/wiki/Tf%E2%80%93idf", "description": ""}, {"name": "Vector space model", "url": "https://en.wikipedia.org/wiki/Vector_space_model", "description": ""}]}, {"name": "On Youtube", "parent": "", "entries": [{"name": "Computational Linguistics Lecture Playlist (Youtube)", "url": "https://www.youtube.com/playlist?list=PLegWUnz91WfuPebLI97-WueAP90JO-15i", "description": "Lectures for University of Maryland class on computational linguistics."}, {"name": "The Virtual Linguistics Campus", "url": "https://www.youtube.com/channel/UCaMpov1PPVXGcKYgwHjXB3g", "description": "CC-licensed educational videos interconnected with Marburg University's e-learning platform of the same name."}]}, {"name": "Books", "parent": "", "entries": [{"name": "Essentials of Linguistics, 2nd edition", "url": "https://ecampusontario.pressbooks.pub/essentialsoflinguistics2/", "description": "An introductory book (2nd edition)."}, {"name": "Introduction to Linguistics", "url": "https://linguistics.ucla.edu/people/Kracht/courses/ling20-fall07/ling-intro.pdf", "description": ""}, {"name": "Natural Language Processing with Python", "url": "https://www.nltk.org/book/", "description": "The book from the NLTK package."}, {"name": "Text Mining with R", "url": "https://www.tidytextmining.com", "description": ""}, {"name": "Foundations of Computational Linguistics", "url": "https://books.google.com/books?id=o9iGAgAAQBAJ\\&dq=Foundations+of+Computational+Linguistics\\&hl=nl\\&source=gbs_navlinks_s", "description": ""}, {"name": "Foundations of Statistical Natural Language Processing", "url": "https://books.google.nl/books?id=YiFDxbEX3SUC", "description": ""}, {"name": "Semisupervised Learning for Computational Linguistics", "url": "https://books.google.com/books/about/Semisupervised_Learning_for_Computationa.html?id=VCd67cGB_rAC\\&redir_esc=y", "description": ""}, {"name": "Speech and Language Processing: An Introduction to Natural Language Processing, Computational Linguistics and Speech Recognition", "url": "https://books.google.nl/books?id=fZmj5UNK8AQC", "description": ""}, {"name": "The Oxford Handbook of Computational Linguistics", "url": "https://www.oxfordhandbooks.com/view/10.1093/oxfordhb/9780199276349.001.0001/oxfordhb-9780199276349", "description": ""}]}, {"name": "Standards", "parent": "", "entries": [{"name": "DTA Basisformat", "url": "https://www.deutschestextarchiv.de/doku/basisformat/", "description": ""}, {"name": "ISO TC 37 SC 4", "url": "https://www.iso.org/committee/297592.html", "description": ""}, {"name": "UIMA", "url": "https://docs.oasis-open.org/uima/v1.0/os/uima-spec-os.html", "description": ""}]}, {"name": "Lists", "parent": "", "entries": [{"name": "15 most popular books on good reads", "url": "https://www.goodreads.com/shelf/show/natural-language-processing", "description": ""}, {"name": "nlp-datasets", "url": "https://github.com/niderhoff/nlp-datasets", "description": "", "stars": "5.8k"}, {"name": "NLP-progress", "url": "https://github.com/sebastianruder/NLP-progress", "description": "", "stars": "23k"}, {"name": "/r/LanguageTechnology/", "url": "https://www.reddit.com/r/LanguageTechnology/", "description": ""}, {"name": "awesome-nlp", "url": "https://github.com/keon/awesome-nlp", "description": "", "stars": "17k"}, {"name": "Awesome Community-Curated NLP List", "url": "https://github.com/alvations/awesome-community-curated-nlp", "description": "", "stars": "198"}, {"name": "awesome-chinese-nlp", "url": "https://github.com/crownpku/Awesome-Chinese-NLP", "description": "", "stars": "7.8k"}, {"name": "awesome-danish", "url": "https://github.com/fnielsen/awesome-danish", "description": "", "stars": "172"}, {"name": "awesome-hungarian-nlp", "url": "https://github.com/oroszgy/awesome-hungarian-nlp", "description": "", "stars": "234"}, {"name": "awesome Information Retrieval", "url": "https://github.com/harpribot/awesome-information-retrieval", "description": "", "stars": "1.1k"}, {"name": "Indonesian NLP", "url": "https://github.com/kmkurn/id-nlp-resource", "description": "", "stars": "279"}, {"name": "Norwegian NLP resources", "url": "https://github.com/web64/norwegian-nlp-resources", "description": "", "stars": "181"}, {"name": "German NLP resources", "url": "https://github.com/adbar/German-NLP/", "description": "", "stars": "465"}, {"name": "awesome-nlp-polish", "url": "https://github.com/ksopyla/awesome-nlp-polish", "description": "", "stars": "297"}, {"name": "awesome-spanish-nlp", "url": "https://github.com/dav009/awesome-spanish-nlp", "description": "", "stars": "336"}, {"name": "M. Weisser's list of NLP/Computational Linguistics Resources", "url": "https://martinweisser.org/corpora_site/comp_ling_resources.html", "description": ""}]}, {"name": "Communities", "parent": "", "entries": [{"name": "Linguistics Stack Exchange", "url": "https://linguistics.stackexchange.com/", "description": ""}, {"name": "Untranslatable.co, Multilingual urban dictionary", "url": "https://untranslatable.co/", "description": ""}]}]} |