{"slug": "arbox--nlp-with-ruby", "title": "Nlp with Ruby", "description": "Curated List: Practical Natural Language Processing done in Ruby", "github_url": "https://github.com/arbox/nlp-with-ruby", "stars": "967", "tag": "Computer Science", "entry_count": 146, "subcategory_count": 17, "subcategories": [{"name": "General", "parent": "", "entries": [{"name": ":sparkles: Tutorials", "url": "#sparkles-tutorials", "description": ""}, {"name": "NLP Pipeline Subtasks", "url": "#nlp-pipeline-subtasks", "description": ""}, {"name": "High Level Tasks", "url": "#high-level-tasks", "description": ""}, {"name": "Dialog Agents, Assistants, and Chatbots", "url": "#dialog-agents-assistants-and-chatbots", "description": ""}, {"name": "Linguistic Resources", "url": "#linguistic-resources", "description": ""}, {"name": "Machine Learning Libraries", "url": "#machine-learning-libraries", "description": ""}, {"name": "Data Visualization", "url": "#data-visualization", "description": ""}, {"name": "Optical Character Recognition", "url": "#optical-character-recognition", "description": ""}, {"name": "Text Extraction", "url": "#text-extraction", "description": ""}, {"name": "Full Text Search, Information Retrieval, Indexing", "url": "#full-text-search-information-retrieval-indexing", "description": ""}, {"name": "Language Aware String Manipulation", "url": "#language-aware-string-manipulation", "description": ""}, {"name": "Articles, Posts, Talks, and Presentations", "url": "#articles-posts-talks-and-presentations", "description": ""}, {"name": "Projects and Code Examples", "url": "#projects-and-code-examples", "description": ""}, {"name": "Books", "url": "#books", "description": ""}, {"name": "Community", "url": "#community", "description": ""}, {"name": "Needs your Help!", "url": "#needs-your-help", "description": ""}, {"name": "Related Resources", "url": "#related-resources", "description": ""}, {"name": "License", "url": "#license", "description": ""}]}, {"name": "Pipeline Generation", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "composable\\_operations", "url": "https://github.com/t6d/composable_operations", "description": "", "stars": "47"}, {"name": "ruby-spark", "url": "https://github.com/ondra-m/ruby-spark", "description": "", "stars": "224"}, {"name": "phobos", "url": "https://github.com/phobos/phobos", "description": "", "stars": "211"}, {"name": "parallel", "url": "https://github.com/grosser/parallel", "description": "", "stars": "3.9k"}, {"name": "pwrake", "url": "https://github.com/masa16/pwrake", "description": "", "stars": "57"}]}, {"name": "Multipurpose Engines", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "open-nlp", "url": "https://github.com/louismullie/open-nlp", "description": "", "stars": "89"}, {"name": "stanford-core-nlp", "url": "https://github.com/louismullie/stanford-core-nlp", "description": "", "stars": "429"}, {"name": "treat", "url": "https://github.com/louismullie/treat", "description": "", "stars": "1.4k"}, {"name": "nlp\\_toolz", "url": "https://github.com/LeFnord/nlp_toolz", "description": "", "stars": "2"}, {"name": "open\\_nlp", "url": "https://github.com/hck/open_nlp", "description": "", "stars": "11"}, {"name": "ruby-spacy", "url": "https://github.com/yohasebe/ruby-spacy", "description": "", "stars": "34"}, {"name": "alchemyapi\\_ruby", "url": "https://github.com/alchemyapi/alchemyapi_ruby", "description": "", "stars": "36"}, {"name": "wit-ruby", "url": "https://github.com/wit-ai/wit-ruby", "description": "", "stars": "280"}, {"name": "wlapi", "url": "https://github.com/arbox/wlapi", "description": "Ruby client library for", "stars": "19"}, {"name": "monkeylearn-ruby", "url": "https://github.com/monkeylearn/monkeylearn-ruby", "description": "Sentiment", "stars": "79"}, {"name": "google-cloud-language", "url": "https://github.com/googleapis/google-cloud-ruby/tree/master/google-cloud-language", "description": "", "stars": "1.2k"}]}, {"name": "Language Identification", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "scylla", "url": "https://github.com/hashwin/scylla", "description": "", "stars": "34"}]}, {"name": "Segmentation", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "tokenizer", "url": "https://github.com/arbox/tokenizer", "description": "", "stars": "44"}, {"name": "pragmatic\\_tokenizer", "url": "https://github.com/diasks2/pragmatic_tokenizer", "description": "", "stars": "87"}, {"name": "nlp-pure", "url": "https://github.com/parhamr/nlp-pure", "description": "", "stars": "19"}, {"name": "textoken", "url": "https://github.com/manorie/textoken", "description": "", "stars": "31"}, {"name": "pragmatic\\_segmenter", "url": "https://github.com/diasks2/pragmatic_segmenter", "description": "", "stars": "503"}, {"name": "punkt-segmenter", "url": "https://github.com/lfcipriani/punkt-segmenter", "description": "", "stars": "89"}, {"name": "tactful\\_tokenizer", "url": "https://github.com/zencephalon/Tactful_Tokenizer", "description": "", "stars": "79"}, {"name": "scapel", "url": "https://github.com/louismullie/scalpel", "description": "", "stars": "53"}]}, {"name": "Lexical Processing", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "ruby-stemmer", "url": "https://github.com/aurelian/ruby-stemmer", "description": "", "stars": "255"}, {"name": "uea-stemmer", "url": "https://github.com/ealdent/uea-stemmer", "description": "", "stars": "50"}, {"name": "lemmatizer", "url": "https://github.com/yohasebe/lemmatizer", "description": "", "stars": "102"}, {"name": "wc", "url": "https://github.com/thesp0nge/wc", "description": "", "stars": "6"}, {"name": "word\\_count", "url": "https://github.com/AtelierConvivialite/word_count", "description": "", "stars": "4"}, {"name": "words\\_counted", "url": "https://github.com/abitdodgy/words_counted", "description": "", "stars": "156"}, {"name": "stopwords-filter", "url": "https://github.com/brenes/stopwords-filter", "description": "Filter and", "stars": "68"}]}, {"name": "Phrasal Level Processing", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "n\\_gram", "url": "https://github.com/reddavis/N-Gram", "description": "", "stars": "36"}, {"name": "ruby-ngram", "url": "https://github.com/tkellen/ruby-ngram", "description": "", "stars": "11"}, {"name": "raingrams", "url": "https://github.com/postmodern/raingrams", "description": "", "stars": "69"}]}, {"name": "Syntactic Processing", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "stanfordparser", "url": "https://rubygems.org/gems/stanfordparser", "description": ""}, {"name": "rley", "url": "https://github.com/famished-tiger/Rley", "description": "", "stars": "34"}, {"name": "rsyntaxtree", "url": "https://github.com/yohasebe/rsyntaxtree", "description": "", "stars": "67"}]}, {"name": "Semantic Analysis", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "amatch", "url": "https://github.com/flori/amatch", "description": "", "stars": "354"}, {"name": "damerau-levenshtein", "url": "https://github.com/GlobalNamesArchitecture/damerau-levenshtein", "description": "", "stars": "127"}, {"name": "hotwater", "url": "https://github.com/colinsurprenant/hotwater", "description": "", "stars": "80"}, {"name": "levenshtein-ffi", "url": "https://github.com/dbalatero/levenshtein-ffi", "description": "", "stars": "148"}, {"name": "tf\\_idf", "url": "https://github.com/reddavis/TF-IDF", "description": "", "stars": "36"}, {"name": "tf-idf-similarity", "url": "https://github.com/jpmckinney/tf-idf-similarity", "description": "", "stars": "652"}]}, {"name": "Pragmatical Analysis", "parent": "NLP Pipeline Subtasks", "entries": [{"name": "SentimentLib", "url": "https://github.com/nzaillian/sentiment_lib", "description": "", "stars": "13"}]}, {"name": "Spelling and Error Correction", "parent": "High Level Tasks", "entries": [{"name": "gingerice", "url": "https://github.com/subosito/gingerice", "description": "", "stars": "479"}, {"name": "hunspell-i18n", "url": "https://github.com/romanbsd/hunspell", "description": "", "stars": "4"}, {"name": "ffi-hunspell", "url": "https://github.com/postmodern/ffi-hunspell", "description": "", "stars": "49"}, {"name": "hunspell", "url": "https://github.com/segabor/Hunspell", "description": "", "stars": "34"}]}, {"name": "Text Alignment", "parent": "High Level Tasks", "entries": [{"name": "alignment", "url": "https://github.com/povilasjurcys/alignment", "description": "", "stars": "1"}]}, {"name": "Machine Translation", "parent": "High Level Tasks", "entries": [{"name": "google-api-client", "url": "https://github.com/googleapis/google-api-ruby-client", "description": "", "stars": "2.6k"}, {"name": "microsoft\\_translator", "url": "https://github.com/ikayzo/microsoft_translator", "description": "", "stars": "21"}, {"name": "termit", "url": "https://github.com/pawurb/termit", "description": "", "stars": "507"}, {"name": "zipf", "url": "https://github.com/pks/zipf", "description": "", "stars": "2"}]}, {"name": "Sentiment Analysis", "parent": "High Level Tasks", "entries": [{"name": "stimmung", "url": "https://github.com/pachacamac/stimmung", "description": "", "stars": "20"}]}, {"name": "Numbers, Dates, and Time Parsing", "parent": "High Level Tasks", "entries": [{"name": "chronic", "url": "https://github.com/mojombo/chronic", "description": "", "stars": "3.1k"}, {"name": "chronic\\_between", "url": "https://github.com/jrobertson/chronic_between", "description": "", "stars": "27"}, {"name": "chronic\\_duration", "url": "https://github.com/henrypoydar/chronic_duration", "description": "", "stars": "347"}, {"name": "kronic", "url": "https://github.com/xaviershay/kronic", "description": "", "stars": "151"}, {"name": "nickel", "url": "https://github.com/iainbeeston/nickel", "description": "", "stars": "107"}, {"name": "tickle", "url": "https://github.com/yb66/tickle", "description": "", "stars": "75"}, {"name": "numerizer", "url": "https://github.com/jduff/numerizer", "description": "", "stars": "34"}]}, {"name": "Named Entity Recognition", "parent": "High Level Tasks", "entries": [{"name": "ruby-ner", "url": "https://github.com/mblongii/ruby-ner", "description": "", "stars": "17"}, {"name": "ruby-nlp", "url": "https://github.com/tiendung/ruby-nlp", "description": "", "stars": "87"}]}, {"name": "Text-to-Speech-to-Text", "parent": "High Level Tasks", "entries": [{"name": "espeak-ruby", "url": "https://github.com/dejan/espeak-ruby", "description": "", "stars": "186"}, {"name": "tts", "url": "https://github.com/c2h2/tts", "description": "", "stars": "89"}, {"name": "att\\_speech", "url": "https://github.com/adhearsion/att_speech", "description": "", "stars": "20"}, {"name": "pocketsphinx-ruby", "url": "https://github.com/watsonbox/pocketsphinx-ruby", "description": "", "stars": "253"}, {"name": "chatterbot", "url": "https://github.com/muffinista/chatterbot", "description": "", "stars": "493"}, {"name": "lita", "url": "https://github.com/litaio/lita", "description": "", "stars": "1.7k"}, {"name": "rwordnet", "url": "https://github.com/doches/rwordnet", "description": "", "stars": "88"}, {"name": "wordnet", "url": "https://github.com/ged/ruby-wordnet/blob/master/README.rdoc", "description": "", "stars": "134"}, {"name": "rb-libsvm", "url": "https://github.com/febeling/rb-libsvm", "description": "", "stars": "276"}, {"name": "weka", "url": "https://github.com/paulgoetze/weka-jruby", "description": "", "stars": "67"}, {"name": "decisiontree", "url": "https://github.com/igrigorik/decisiontree", "description": "", "stars": "1.4k"}, {"name": "rtimbl", "url": "https://github.com/maspwr/rtimbl", "description": "", "stars": "5"}, {"name": "classifier-reborn", "url": "https://github.com/jekyll/classifier-reborn", "description": "", "stars": "531"}, {"name": "lda-ruby", "url": "https://github.com/ealdent/lda-ruby", "description": "", "stars": "132"}, {"name": "liblinear-ruby-swig", "url": "https://github.com/tomz/liblinear-ruby-swig", "description": "", "stars": "82"}, {"name": "linnaeus", "url": "https://github.com/djcp/linnaeus", "description": "", "stars": "37"}, {"name": "maxent\\_string\\_classifier", "url": "https://github.com/mccraigmccraig/maxent_string_classifier", "description": "", "stars": "9"}, {"name": "naive\\_bayes", "url": "https://github.com/reddavis/Naive-Bayes", "description": "", "stars": "46"}, {"name": "nbayes", "url": "https://github.com/oasic/nbayes", "description": "", "stars": "149"}, {"name": "omnicat", "url": "https://github.com/mustafaturan/omnicat", "description": "", "stars": "11"}, {"name": "omnicat-bayes", "url": "https://github.com/mustafaturan/omnicat-bayes", "description": "", "stars": "32"}, {"name": "ruby-fann", "url": "https://github.com/tangledpath/ruby-fann", "description": "", "stars": "451"}, {"name": "rblearn", "url": "https://github.com/himkt/rblearn", "description": "Feature Extraction and Crossvalidation library.", "stars": "1"}, {"name": "tesseract-ocr", "url": "https://github.com/meh/ruby-tesseract-ocr", "description": "", "stars": "605"}, {"name": "yomu", "url": "https://github.com/yomurb/yomu", "description": "", "stars": "478"}, {"name": "rsolr", "url": "https://github.com/rsolr/rsolr", "description": "", "stars": "416"}, {"name": "sunspot", "url": "https://github.com/sunspot/sunspot", "description": "", "stars": "3k"}, {"name": "thinking-sphinx", "url": "https://github.com/pat/thinking-sphinx", "description": "", "stars": "1.6k"}, {"name": "elasticsearch", "url": "https://github.com/elastic/elasticsearch-ruby/tree/master/elasticsearch", "description": "", "stars": "1.9k"}, {"name": "elasticsearch-rails", "url": "https://github.com/elastic/elasticsearch-rails", "description": "", "stars": "3k"}, {"name": "google-api-client", "url": "https://github.com/googleapis/google-api-ruby-client", "description": "", "stars": "2.6k"}, {"name": "fuzzy\\_match", "url": "https://github.com/seamusabshere/fuzzy_match", "description": "", "stars": "650"}, {"name": "fuzzy-string-match", "url": "https://github.com/kiyoka/fuzzy-string-match", "description": "", "stars": "268"}, {"name": "active\\_support", "url": "https://github.com/rails/rails/tree/master/activesupport/lib/active_support", "description": "", "stars": "52k"}, {"name": "fuzzy\\_tools", "url": "https://github.com/brianhempel/fuzzy_tools", "description": "", "stars": "22"}, {"name": "u", "url": "http://disu.se/software/u-1.0/", "description": ""}, {"name": "unicode", "url": "https://github.com/blackwinter/unicode", "description": "", "stars": "82"}, {"name": "CommonRegexRuby", "url": "https://github.com/talyssonoc/CommonRegexRuby", "description": "", "stars": "78"}, {"name": "regexp-examples", "url": "https://github.com/tom-lord/regexp-examples", "description": "", "stars": "508"}, {"name": "verbal\\_expressions", "url": "https://github.com/ryan-endacott/verbal_expressions", "description": "", "stars": "570"}, {"name": "translit\\_kit", "url": "https://github.com/AnalyzePlatypus/TranslitKit", "description": "", "stars": "5"}, {"name": "re2", "url": "https://github.com/mudge/re2", "description": "", "stars": "87"}, {"name": "regex\\_sample", "url": "https://github.com/mochizukikotaro/regex_sample", "description": "", "stars": "1"}, {"name": "iuliia", "url": "https://github.com/adnikiforov/iuliia-rb", "description": "", "stars": "8"}, {"name": "Going the Distance", "url": "https://github.com/schneems/going_the_distance", "description": "", "stars": "60"}, {"name": "Named entity recognition with Stanford NER and Ruby", "url": "https://github.com/mblongii/ruby-ner", "description": "", "stars": "17"}, {"name": "Words Counted", "url": "http://rubywordcount.com/", "description": ""}, {"name": "RSyntaxTree", "url": "https://yohasebe.com/rsyntaxtree/", "description": ""}, {"name": "Miller, Rob", "url": "https://twitter.com/robmil/", "description": ""}, {"name": "Watson, Mark", "url": "https://twitter.com/mark_l_watson", "description": ""}, {"name": "Watson, Mark", "url": "https://twitter.com/mark_l_watson", "description": ""}, {"name": "Reddit", "url": "https://www.reddit.com/r/LanguageTechnology/search?q=ruby\\&restrict_sr=on", "description": ""}, {"name": "Stack Overflow", "url": "https://stackoverflow.com/search?q=%5Bnlp%5D+and+%5Bruby%5D", "description": ""}, {"name": "Twitter", "url": "https://twitter.com/search?q=Ruby%20NLP%20%23ruby%20OR%20%23nlproc%20OR%20%23rubynlp%20OR%20%23nlp\\&src=typd\\&lang=en", "description": ""}, {"name": "ferret", "url": "https://github.com/dbalmain/ferret", "description": "", "stars": "280"}, {"name": "summarize", "url": "https://github.com/ssoper/summarize", "description": "", "stars": "205"}, {"name": "Neural Machine Translation Implementations", "url": "https://github.com/jonsafari/nmt-list", "description": "", "stars": "364"}, {"name": "Awesome Ruby", "url": "https://github.com/markets/awesome-ruby#natural-language-processing", "description": "", "stars": "12k"}, {"name": "Ruby NLP", "url": "https://github.com/diasks2/ruby-nlp", "description": "", "stars": "1.2k"}, {"name": "Speech and Natural Language Processing", "url": "https://github.com/edobashira/speech-language-processing", "description": "", "stars": "2.1k"}, {"name": "Scientific Ruby", "url": "http://sciruby.com/", "description": ""}, {"name": "iRuby", "url": "https://github.com/SciRuby/iruby", "description": "IRuby kernel for Jupyter (formelly IPython).", "stars": "728"}, {"name": "Awesome OCR", "url": "https://github.com/kba/awesome-ocr", "description": "", "stars": "2.2k"}, {"name": "Awesome TensorFlow", "url": "https://github.com/jtoy/awesome-tensorflow", "description": "", "stars": "17k"}]}], "name": ""}