nltk-data: add all downloadables

This commit is contained in:
Benjamin Sparks 2025-05-21 16:46:18 +02:00
parent f2f2eab113
commit 547dc4736e

View file

@ -54,41 +54,212 @@ let
'';
}
);
makeChunker =
pname:
makeNltkDataPackage {
inherit pname;
location = "chunkers";
hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc=";
};
makeCorpus =
pname:
makeNltkDataPackage {
inherit pname;
location = "corpora";
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
};
makeGrammar =
pname:
makeNltkDataPackage {
inherit pname;
location = "grammars";
hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM=";
};
makeHelp =
pname:
makeNltkDataPackage {
inherit pname;
location = "help";
hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE=";
};
makeMisc =
pname:
makeNltkDataPackage {
inherit pname;
location = "misc";
hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4=";
};
makeModel =
pname:
makeNltkDataPackage {
inherit pname;
location = "models";
hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4=";
};
makeTagger =
pname:
makeNltkDataPackage {
inherit pname;
location = "taggers";
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
};
makeTokenizer =
pname:
makeNltkDataPackage {
inherit pname;
location = "tokenizers";
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
};
makeStemmer =
pname:
makeNltkDataPackage {
inherit pname;
location = "stemmers";
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
};
in
lib.makeScope newScope (self: {
punkt = makeNltkDataPackage {
pname = "punkt";
location = "tokenizers";
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
};
punkt-tab = makeNltkDataPackage {
pname = "punkt_tab";
location = "tokenizers";
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
};
averaged-perceptron-tagger = makeNltkDataPackage {
pname = "averaged_perceptron_tagger";
location = "taggers";
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
};
averaged-perceptron-tagger-eng = makeNltkDataPackage {
pname = "averaged_perceptron_tagger_eng";
location = "taggers";
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
};
snowball-data = makeNltkDataPackage {
pname = "snowball_data";
location = "stemmers";
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
};
stopwords = makeNltkDataPackage {
pname = "stopwords";
location = "corpora";
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
};
wordnet = makeNltkDataPackage {
pname = "wordnet";
location = "corpora";
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
};
## Chunkers
maxent-ne-chunker = makeChunker "maxent_ne_chunker";
maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab";
## Corpora
abc = makeCorpus "abc";
alpino = makeCorpus "alpino";
bcp47 = makeCorpus "bcp47";
biocreative-ppi = makeCorpus "biocreative_ppi";
brown = makeCorpus "brown";
brown-tei = makeCorpus "brown_tei";
cess-cat = makeCorpus "cess_cat";
cess-esp = makeCorpus "cess_esp";
chat80 = makeCorpus "chat80";
city-database = makeCorpus "city_database";
cmudict = makeCorpus "cmudict";
comparative-sentences = makeCorpus "comparative_sentences";
comtrans = makeCorpus "comtrans";
conll2000 = makeCorpus "conll2000";
conll2002 = makeCorpus "conll2002";
conll2007 = makeCorpus "conll2007";
crubadan = makeCorpus "crubadan";
dependency-treebank = makeCorpus "dependency_treebank";
dolch = makeCorpus "dolch";
europarl-raw = makeCorpus "europarl_raw";
extended-omw = makeCorpus "extended_omw";
floresta = makeCorpus "floresta";
framenet-v15 = makeCorpus "framenet_v15";
framenet-v17 = makeCorpus "framenet_v17";
gazetteers = makeCorpus "gazetteers";
genesis = makeCorpus "genesis";
gutenberg = makeCorpus "gutenberg";
ieer = makeCorpus "ieer";
inaugural = makeCorpus "inaugural";
indian = makeCorpus "indian";
jeita = makeCorpus "jeita";
kimmo = makeCorpus "kimmo";
knbc = makeCorpus "knbc";
lin-thesaurus = makeCorpus "lin_thesaurus";
mac-morpho = makeCorpus "mac_morpho";
machado = makeCorpus "machado";
masc-tagged = makeCorpus "masc_tagged";
movie-reviews = makeCorpus "movie_reviews";
mte-teip5 = makeCorpus "mte_teip5";
names = makeCorpus "names";
nombank-1-0 = makeCorpus "nombank.1.0";
nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes";
nps-chat = makeCorpus "nps_chat";
omw = makeCorpus "omw";
omw-1-4 = makeCorpus "omw-1.4";
opinion-lexicon = makeCorpus "opinion_lexicon";
panlex-swadesh = makeCorpus "panlex_swadesh";
paradigms = makeCorpus "paradigms";
pe08 = makeCorpus "pe08";
pil = makeCorpus "pil";
pl196x = makeCorpus "pl196x";
ppattach = makeCorpus "ppattach";
problem-reports = makeCorpus "problem_reports";
product-reviews-1 = makeCorpus "product_reviews_1";
product-reviews-2 = makeCorpus "product_reviews_2";
propbank = makeCorpus "propbank";
pros-cons = makeCorpus "pros_cons";
ptb = makeCorpus "ptb";
qc = makeCorpus "qc";
reuters = makeCorpus "reuters";
rte = makeCorpus "rte";
semcor = makeCorpus "semcor";
senseval = makeCorpus "senseval";
sentence-polarity = makeCorpus "sentence_polarity";
sentiwordnet = makeCorpus "sentiwordnet";
shakespeare = makeCorpus "shakespeare";
sinica-treebank = makeCorpus "sinica_treebank";
smultron = makeCorpus "smultron";
state-union = makeCorpus "state_union";
stopwords = makeCorpus "stopwords";
subjectivity = makeCorpus "subjectivity";
swadesh = makeCorpus "swadesh";
switchboard = makeCorpus "switchboard";
timit = makeCorpus "timit";
toolbox = makeCorpus "toolbox";
treebank = makeCorpus "treebank";
twitter-samples = makeCorpus "twitter_samples";
udhr = makeCorpus "udhr";
udhr2 = makeCorpus "udhr2";
unicode-samples = makeCorpus "unicode_samples";
universal-treebanks-v20 = makeCorpus "universal_treebanks_v20";
verbnet = makeCorpus "verbnet";
verbnet3 = makeCorpus "verbnet3";
webtext = makeCorpus "webtext";
wordnet = makeCorpus "wordnet";
wordnet-ic = makeCorpus "wordnet_ic";
wordnet2021 = makeCorpus "wordnet2021";
wordnet2022 = makeCorpus "wordnet2022";
wordnet31 = makeCorpus "wordnet31";
words = makeCorpus "words";
ycoe = makeCorpus "ycoe";
## Grammars
basque-grammars = makeGrammar "basque_grammars";
book-grammars = makeGrammar "book_grammars";
large-grammars = makeGrammar "large_grammars";
sample-grammars = makeGrammar "sample_grammars";
spanish-grammars = makeGrammar "spanish_grammars";
## Help
tagsets-json = makeHelp "tagsets_json";
## Misc
mwa-ppdb = makeMisc "mwa_ppdb";
perluniprops = makeMisc "perluniprops";
## Models
bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux";
moses-sample = makeModel "moses_sample";
wmt15-eval = makeModel "wmt15_eval";
word2vec-sample = makeModel "word2vec_sample";
## Taggers
averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger";
averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng";
averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru";
averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus";
maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger";
maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab";
universal-tagset = makeTagger "universal_tagset";
## Tokenizers
punkt = makeTokenizer "punkt";
punkt-tab = makeTokenizer "punkt_tab";
## Stemmers
porter-test = makeStemmer "porter_test";
rslp = makeStemmer "rslp";
snowball-data = makeStemmer "snowball_data";
})