mirror of
https://github.com/NixOS/nixpkgs.git
synced 2025-06-10 03:23:29 +03:00
Revert "nltk-data: make searchable, add all downloadables" (#409843)
This commit is contained in:
commit
34fc0d11eb
10 changed files with 45 additions and 224 deletions
|
@ -76,7 +76,7 @@ in
|
||||||
API_PORT = toString cfg.port;
|
API_PORT = toString cfg.port;
|
||||||
BASE_URL = "http://localhost:${toString cfg.port}";
|
BASE_URL = "http://localhost:${toString cfg.port}";
|
||||||
DATA_DIR = "/var/lib/mealie";
|
DATA_DIR = "/var/lib/mealie";
|
||||||
NLTK_DATA = pkgs.nltk-data.averaged-perceptron-tagger-eng;
|
NLTK_DATA = pkgs.nltk-data.averaged_perceptron_tagger_eng;
|
||||||
} // (builtins.mapAttrs (_: val: toString val) cfg.settings);
|
} // (builtins.mapAttrs (_: val: toString val) cfg.settings);
|
||||||
|
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
|
|
|
@ -109,7 +109,7 @@ pythonpkgs.buildPythonApplication rec {
|
||||||
|
|
||||||
# Needed for tests
|
# Needed for tests
|
||||||
preCheck = ''
|
preCheck = ''
|
||||||
export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng}
|
export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng}
|
||||||
'';
|
'';
|
||||||
|
|
||||||
disabledTestPaths = [
|
disabledTestPaths = [
|
||||||
|
|
|
@ -305,8 +305,8 @@ python.pkgs.buildPythonApplication rec {
|
||||||
tesseract5
|
tesseract5
|
||||||
;
|
;
|
||||||
nltkData = with nltk-data; [
|
nltkData = with nltk-data; [
|
||||||
punkt-tab
|
punkt_tab
|
||||||
snowball-data
|
snowball_data
|
||||||
stopwords
|
stopwords
|
||||||
];
|
];
|
||||||
tests = { inherit (nixosTests) paperless; };
|
tests = { inherit (nixosTests) paperless; };
|
||||||
|
|
|
@ -152,7 +152,7 @@ let
|
||||||
|
|
||||||
paths = [
|
paths = [
|
||||||
nltk-data.punkt
|
nltk-data.punkt
|
||||||
nltk-data.averaged-perceptron-tagger
|
nltk-data.averaged_perceptron_tagger
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
|
|
|
@ -125,7 +125,7 @@ let
|
||||||
aider-nltk-data = symlinkJoin {
|
aider-nltk-data = symlinkJoin {
|
||||||
name = "aider-nltk-data";
|
name = "aider-nltk-data";
|
||||||
paths = [
|
paths = [
|
||||||
nltk-data.punkt-tab
|
nltk-data.punkt_tab
|
||||||
nltk-data.stopwords
|
nltk-data.stopwords
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
|
@ -44,7 +44,7 @@ buildPythonPackage rec {
|
||||||
|
|
||||||
# Needed for tests
|
# Needed for tests
|
||||||
preCheck = ''
|
preCheck = ''
|
||||||
export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng}
|
export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng}
|
||||||
'';
|
'';
|
||||||
|
|
||||||
meta = {
|
meta = {
|
||||||
|
|
|
@ -24,7 +24,7 @@ let
|
||||||
name = "nltk-test-data";
|
name = "nltk-test-data";
|
||||||
paths = [
|
paths = [
|
||||||
nltk-data.punkt
|
nltk-data.punkt
|
||||||
nltk-data.punkt-tab
|
nltk-data.punkt_tab
|
||||||
nltk-data.stopwords
|
nltk-data.stopwords
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
|
@ -10,16 +10,12 @@ let
|
||||||
version = "0-unstable-2024-07-29";
|
version = "0-unstable-2024-07-29";
|
||||||
nativeBuildInputs = [ unzip ];
|
nativeBuildInputs = [ unzip ];
|
||||||
dontBuild = true;
|
dontBuild = true;
|
||||||
dontFixup = true;
|
|
||||||
meta = with lib; {
|
meta = with lib; {
|
||||||
description = "NLTK Data";
|
description = "NLTK Data";
|
||||||
homepage = "https://github.com/nltk/nltk_data";
|
homepage = "https://github.com/nltk/nltk_data";
|
||||||
license = licenses.asl20;
|
license = licenses.asl20;
|
||||||
platforms = platforms.all;
|
platforms = platforms.all;
|
||||||
maintainers = with maintainers; [
|
maintainers = with maintainers; [ happysalada ];
|
||||||
bengsparks
|
|
||||||
happysalada
|
|
||||||
];
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
makeNltkDataPackage =
|
makeNltkDataPackage =
|
||||||
|
@ -54,212 +50,41 @@ let
|
||||||
'';
|
'';
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
makeChunker =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "chunkers";
|
|
||||||
hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeCorpus =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "corpora";
|
|
||||||
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeGrammar =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "grammars";
|
|
||||||
hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeHelp =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "help";
|
|
||||||
hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeMisc =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "misc";
|
|
||||||
hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeModel =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "models";
|
|
||||||
hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeTagger =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "taggers";
|
|
||||||
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeTokenizer =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "tokenizers";
|
|
||||||
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
|
|
||||||
};
|
|
||||||
|
|
||||||
makeStemmer =
|
|
||||||
pname:
|
|
||||||
makeNltkDataPackage {
|
|
||||||
inherit pname;
|
|
||||||
location = "stemmers";
|
|
||||||
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
|
|
||||||
};
|
|
||||||
in
|
in
|
||||||
lib.makeScope newScope (self: {
|
lib.makeScope newScope (self: {
|
||||||
## Chunkers
|
punkt = makeNltkDataPackage {
|
||||||
maxent-ne-chunker = makeChunker "maxent_ne_chunker";
|
pname = "punkt";
|
||||||
maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab";
|
location = "tokenizers";
|
||||||
|
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
|
||||||
## Corpora
|
};
|
||||||
abc = makeCorpus "abc";
|
punkt_tab = makeNltkDataPackage {
|
||||||
alpino = makeCorpus "alpino";
|
pname = "punkt_tab";
|
||||||
bcp47 = makeCorpus "bcp47";
|
location = "tokenizers";
|
||||||
biocreative-ppi = makeCorpus "biocreative_ppi";
|
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
|
||||||
brown = makeCorpus "brown";
|
};
|
||||||
brown-tei = makeCorpus "brown_tei";
|
averaged_perceptron_tagger = makeNltkDataPackage {
|
||||||
cess-cat = makeCorpus "cess_cat";
|
pname = "averaged_perceptron_tagger";
|
||||||
cess-esp = makeCorpus "cess_esp";
|
location = "taggers";
|
||||||
chat80 = makeCorpus "chat80";
|
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
|
||||||
city-database = makeCorpus "city_database";
|
};
|
||||||
cmudict = makeCorpus "cmudict";
|
averaged_perceptron_tagger_eng = makeNltkDataPackage {
|
||||||
comparative-sentences = makeCorpus "comparative_sentences";
|
pname = "averaged_perceptron_tagger_eng";
|
||||||
comtrans = makeCorpus "comtrans";
|
location = "taggers";
|
||||||
conll2000 = makeCorpus "conll2000";
|
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
|
||||||
conll2002 = makeCorpus "conll2002";
|
};
|
||||||
conll2007 = makeCorpus "conll2007";
|
snowball_data = makeNltkDataPackage {
|
||||||
crubadan = makeCorpus "crubadan";
|
pname = "snowball_data";
|
||||||
dependency-treebank = makeCorpus "dependency_treebank";
|
location = "stemmers";
|
||||||
dolch = makeCorpus "dolch";
|
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
|
||||||
europarl-raw = makeCorpus "europarl_raw";
|
};
|
||||||
extended-omw = makeCorpus "extended_omw";
|
stopwords = makeNltkDataPackage {
|
||||||
floresta = makeCorpus "floresta";
|
pname = "stopwords";
|
||||||
framenet-v15 = makeCorpus "framenet_v15";
|
location = "corpora";
|
||||||
framenet-v17 = makeCorpus "framenet_v17";
|
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
|
||||||
gazetteers = makeCorpus "gazetteers";
|
};
|
||||||
genesis = makeCorpus "genesis";
|
wordnet = makeNltkDataPackage {
|
||||||
gutenberg = makeCorpus "gutenberg";
|
pname = "wordnet";
|
||||||
ieer = makeCorpus "ieer";
|
location = "corpora";
|
||||||
inaugural = makeCorpus "inaugural";
|
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
|
||||||
indian = makeCorpus "indian";
|
};
|
||||||
jeita = makeCorpus "jeita";
|
|
||||||
kimmo = makeCorpus "kimmo";
|
|
||||||
knbc = makeCorpus "knbc";
|
|
||||||
lin-thesaurus = makeCorpus "lin_thesaurus";
|
|
||||||
mac-morpho = makeCorpus "mac_morpho";
|
|
||||||
machado = makeCorpus "machado";
|
|
||||||
masc-tagged = makeCorpus "masc_tagged";
|
|
||||||
movie-reviews = makeCorpus "movie_reviews";
|
|
||||||
mte-teip5 = makeCorpus "mte_teip5";
|
|
||||||
names = makeCorpus "names";
|
|
||||||
nombank-1-0 = makeCorpus "nombank.1.0";
|
|
||||||
nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes";
|
|
||||||
nps-chat = makeCorpus "nps_chat";
|
|
||||||
omw = makeCorpus "omw";
|
|
||||||
omw-1-4 = makeCorpus "omw-1.4";
|
|
||||||
opinion-lexicon = makeCorpus "opinion_lexicon";
|
|
||||||
panlex-swadesh = makeCorpus "panlex_swadesh";
|
|
||||||
paradigms = makeCorpus "paradigms";
|
|
||||||
pe08 = makeCorpus "pe08";
|
|
||||||
pil = makeCorpus "pil";
|
|
||||||
pl196x = makeCorpus "pl196x";
|
|
||||||
ppattach = makeCorpus "ppattach";
|
|
||||||
problem-reports = makeCorpus "problem_reports";
|
|
||||||
product-reviews-1 = makeCorpus "product_reviews_1";
|
|
||||||
product-reviews-2 = makeCorpus "product_reviews_2";
|
|
||||||
propbank = makeCorpus "propbank";
|
|
||||||
pros-cons = makeCorpus "pros_cons";
|
|
||||||
ptb = makeCorpus "ptb";
|
|
||||||
qc = makeCorpus "qc";
|
|
||||||
reuters = makeCorpus "reuters";
|
|
||||||
rte = makeCorpus "rte";
|
|
||||||
semcor = makeCorpus "semcor";
|
|
||||||
senseval = makeCorpus "senseval";
|
|
||||||
sentence-polarity = makeCorpus "sentence_polarity";
|
|
||||||
sentiwordnet = makeCorpus "sentiwordnet";
|
|
||||||
shakespeare = makeCorpus "shakespeare";
|
|
||||||
sinica-treebank = makeCorpus "sinica_treebank";
|
|
||||||
smultron = makeCorpus "smultron";
|
|
||||||
state-union = makeCorpus "state_union";
|
|
||||||
stopwords = makeCorpus "stopwords";
|
|
||||||
subjectivity = makeCorpus "subjectivity";
|
|
||||||
swadesh = makeCorpus "swadesh";
|
|
||||||
switchboard = makeCorpus "switchboard";
|
|
||||||
timit = makeCorpus "timit";
|
|
||||||
toolbox = makeCorpus "toolbox";
|
|
||||||
treebank = makeCorpus "treebank";
|
|
||||||
twitter-samples = makeCorpus "twitter_samples";
|
|
||||||
udhr = makeCorpus "udhr";
|
|
||||||
udhr2 = makeCorpus "udhr2";
|
|
||||||
unicode-samples = makeCorpus "unicode_samples";
|
|
||||||
universal-treebanks-v20 = makeCorpus "universal_treebanks_v20";
|
|
||||||
verbnet = makeCorpus "verbnet";
|
|
||||||
verbnet3 = makeCorpus "verbnet3";
|
|
||||||
webtext = makeCorpus "webtext";
|
|
||||||
wordnet = makeCorpus "wordnet";
|
|
||||||
wordnet-ic = makeCorpus "wordnet_ic";
|
|
||||||
wordnet2021 = makeCorpus "wordnet2021";
|
|
||||||
wordnet2022 = makeCorpus "wordnet2022";
|
|
||||||
wordnet31 = makeCorpus "wordnet31";
|
|
||||||
words = makeCorpus "words";
|
|
||||||
ycoe = makeCorpus "ycoe";
|
|
||||||
|
|
||||||
## Grammars
|
|
||||||
basque-grammars = makeGrammar "basque_grammars";
|
|
||||||
book-grammars = makeGrammar "book_grammars";
|
|
||||||
large-grammars = makeGrammar "large_grammars";
|
|
||||||
sample-grammars = makeGrammar "sample_grammars";
|
|
||||||
spanish-grammars = makeGrammar "spanish_grammars";
|
|
||||||
|
|
||||||
## Help
|
|
||||||
tagsets-json = makeHelp "tagsets_json";
|
|
||||||
|
|
||||||
## Misc
|
|
||||||
mwa-ppdb = makeMisc "mwa_ppdb";
|
|
||||||
perluniprops = makeMisc "perluniprops";
|
|
||||||
|
|
||||||
## Models
|
|
||||||
bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux";
|
|
||||||
moses-sample = makeModel "moses_sample";
|
|
||||||
wmt15-eval = makeModel "wmt15_eval";
|
|
||||||
word2vec-sample = makeModel "word2vec_sample";
|
|
||||||
|
|
||||||
## Taggers
|
|
||||||
averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger";
|
|
||||||
averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng";
|
|
||||||
averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru";
|
|
||||||
averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus";
|
|
||||||
maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger";
|
|
||||||
maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab";
|
|
||||||
universal-tagset = makeTagger "universal_tagset";
|
|
||||||
|
|
||||||
## Tokenizers
|
|
||||||
punkt = makeTokenizer "punkt";
|
|
||||||
punkt-tab = makeTokenizer "punkt_tab";
|
|
||||||
|
|
||||||
## Stemmers
|
|
||||||
porter-test = makeStemmer "porter_test";
|
|
||||||
rslp = makeStemmer "rslp";
|
|
||||||
snowball-data = makeStemmer "snowball_data";
|
|
||||||
})
|
})
|
||||||
|
|
|
@ -1354,10 +1354,6 @@ mapAliases {
|
||||||
# When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable.
|
# When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable.
|
||||||
|
|
||||||
nixosTest = testers.nixosTest; # Added 2022-05-05
|
nixosTest = testers.nixosTest; # Added 2022-05-05
|
||||||
nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21
|
|
||||||
nltk-data.averaged_perceptron_tagger_eng = nltk-data.averaged-perceptron-tagger-eng; # Added 2025-05-21
|
|
||||||
nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21
|
|
||||||
nltk-data.snowball_data = nltk-data.snowball-data; # Added 2025-05-21
|
|
||||||
nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17
|
nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17
|
||||||
noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10
|
noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10
|
||||||
nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23
|
nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23
|
||||||
|
|
|
@ -2344,7 +2344,7 @@ with pkgs;
|
||||||
|
|
||||||
mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { };
|
mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { };
|
||||||
|
|
||||||
nltk-data = lib.recurseIntoAttrs (callPackage ../tools/text/nltk-data { });
|
nltk-data = callPackage ../tools/text/nltk-data { };
|
||||||
|
|
||||||
seabios-coreboot = seabios.override { ___build-type = "coreboot"; };
|
seabios-coreboot = seabios.override { ___build-type = "coreboot"; };
|
||||||
seabios-csm = seabios.override { ___build-type = "csm"; };
|
seabios-csm = seabios.override { ___build-type = "csm"; };
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue