diff --git a/nixos/modules/services/web-apps/mealie.nix b/nixos/modules/services/web-apps/mealie.nix index b8f65b1fb98e..46571fea4617 100644 --- a/nixos/modules/services/web-apps/mealie.nix +++ b/nixos/modules/services/web-apps/mealie.nix @@ -76,7 +76,7 @@ in API_PORT = toString cfg.port; BASE_URL = "http://localhost:${toString cfg.port}"; DATA_DIR = "/var/lib/mealie"; - NLTK_DATA = pkgs.nltk-data.averaged-perceptron-tagger-eng; + NLTK_DATA = pkgs.nltk-data.averaged_perceptron_tagger_eng; } // (builtins.mapAttrs (_: val: toString val) cfg.settings); serviceConfig = { diff --git a/pkgs/by-name/me/mealie/package.nix b/pkgs/by-name/me/mealie/package.nix index 6a7897f9a79f..0092eab6b5f3 100644 --- a/pkgs/by-name/me/mealie/package.nix +++ b/pkgs/by-name/me/mealie/package.nix @@ -109,7 +109,7 @@ pythonpkgs.buildPythonApplication rec { # Needed for tests preCheck = '' - export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng} + export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng} ''; disabledTestPaths = [ diff --git a/pkgs/by-name/pa/paperless-ngx/package.nix b/pkgs/by-name/pa/paperless-ngx/package.nix index 4ca8929f10be..cc3e97393ca7 100644 --- a/pkgs/by-name/pa/paperless-ngx/package.nix +++ b/pkgs/by-name/pa/paperless-ngx/package.nix @@ -305,8 +305,8 @@ python.pkgs.buildPythonApplication rec { tesseract5 ; nltkData = with nltk-data; [ - punkt-tab - snowball-data + punkt_tab + snowball_data stopwords ]; tests = { inherit (nixosTests) paperless; }; diff --git a/pkgs/by-name/un/unstructured-api/package.nix b/pkgs/by-name/un/unstructured-api/package.nix index adf9fa83dd96..96a5a15584d8 100644 --- a/pkgs/by-name/un/unstructured-api/package.nix +++ b/pkgs/by-name/un/unstructured-api/package.nix @@ -152,7 +152,7 @@ let paths = [ nltk-data.punkt - nltk-data.averaged-perceptron-tagger + nltk-data.averaged_perceptron_tagger ]; }; in diff --git a/pkgs/development/python-modules/aider-chat/default.nix b/pkgs/development/python-modules/aider-chat/default.nix index bc4534b8da61..4aa6ae731311 100644 --- a/pkgs/development/python-modules/aider-chat/default.nix +++ b/pkgs/development/python-modules/aider-chat/default.nix @@ -125,7 +125,7 @@ let aider-nltk-data = symlinkJoin { name = "aider-nltk-data"; paths = [ - nltk-data.punkt-tab + nltk-data.punkt_tab nltk-data.stopwords ]; }; diff --git a/pkgs/development/python-modules/ingredient-parser-nlp/default.nix b/pkgs/development/python-modules/ingredient-parser-nlp/default.nix index ee07bd34acd3..88d03d362361 100644 --- a/pkgs/development/python-modules/ingredient-parser-nlp/default.nix +++ b/pkgs/development/python-modules/ingredient-parser-nlp/default.nix @@ -44,7 +44,7 @@ buildPythonPackage rec { # Needed for tests preCheck = '' - export NLTK_DATA=${nltk-data.averaged-perceptron-tagger-eng} + export NLTK_DATA=${nltk-data.averaged_perceptron_tagger_eng} ''; meta = { diff --git a/pkgs/development/python-modules/type-infer/default.nix b/pkgs/development/python-modules/type-infer/default.nix index 4b8ef9e99d5e..4ce88d3b049f 100644 --- a/pkgs/development/python-modules/type-infer/default.nix +++ b/pkgs/development/python-modules/type-infer/default.nix @@ -24,7 +24,7 @@ let name = "nltk-test-data"; paths = [ nltk-data.punkt - nltk-data.punkt-tab + nltk-data.punkt_tab nltk-data.stopwords ]; }; diff --git a/pkgs/tools/text/nltk-data/default.nix b/pkgs/tools/text/nltk-data/default.nix index e4aa7f06c355..d9613b86637a 100644 --- a/pkgs/tools/text/nltk-data/default.nix +++ b/pkgs/tools/text/nltk-data/default.nix @@ -10,16 +10,12 @@ let version = "0-unstable-2024-07-29"; nativeBuildInputs = [ unzip ]; dontBuild = true; - dontFixup = true; meta = with lib; { description = "NLTK Data"; homepage = "https://github.com/nltk/nltk_data"; license = licenses.asl20; platforms = platforms.all; - maintainers = with maintainers; [ - bengsparks - happysalada - ]; + maintainers = with maintainers; [ happysalada ]; }; }; makeNltkDataPackage = @@ -54,212 +50,41 @@ let ''; } ); - - makeChunker = - pname: - makeNltkDataPackage { - inherit pname; - location = "chunkers"; - hash = "sha256-kemjqaCM9hlKAdMw8oVJnp62EAC9rMQ50dKg7wlAwEc="; - }; - - makeCorpus = - pname: - makeNltkDataPackage { - inherit pname; - location = "corpora"; - hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; - }; - - makeGrammar = - pname: - makeNltkDataPackage { - inherit pname; - location = "grammars"; - hash = "sha256-pyLEcX3Azv8j1kCGvVYonuiNgVJxtWt7veU0S/yNbIM="; - }; - - makeHelp = - pname: - makeNltkDataPackage { - inherit pname; - location = "help"; - hash = "sha256-97mYLNES5WujLF5gD8Ul4cJ6LqSzz+jDzclUsdBeHNE="; - }; - - makeMisc = - pname: - makeNltkDataPackage { - inherit pname; - location = "misc"; - hash = "sha256-XtizfEsc8TYWqvvC/eSFdha2ClC5/ZiJM8nue0vXLb4="; - }; - - makeModel = - pname: - makeNltkDataPackage { - inherit pname; - location = "models"; - hash = "sha256-iq3weEgCci6rgLW2j28F2eRLprJtInGXKe/awJPSVG4="; - }; - - makeTagger = - pname: - makeNltkDataPackage { - inherit pname; - location = "taggers"; - hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; - }; - - makeTokenizer = - pname: - makeNltkDataPackage { - inherit pname; - location = "tokenizers"; - hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; - }; - - makeStemmer = - pname: - makeNltkDataPackage { - inherit pname; - location = "stemmers"; - hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; - }; in lib.makeScope newScope (self: { - ## Chunkers - maxent-ne-chunker = makeChunker "maxent_ne_chunker"; - maxent-ne-chunker-tab = makeChunker "maxent_ne_chunker_tab"; - - ## Corpora - abc = makeCorpus "abc"; - alpino = makeCorpus "alpino"; - bcp47 = makeCorpus "bcp47"; - biocreative-ppi = makeCorpus "biocreative_ppi"; - brown = makeCorpus "brown"; - brown-tei = makeCorpus "brown_tei"; - cess-cat = makeCorpus "cess_cat"; - cess-esp = makeCorpus "cess_esp"; - chat80 = makeCorpus "chat80"; - city-database = makeCorpus "city_database"; - cmudict = makeCorpus "cmudict"; - comparative-sentences = makeCorpus "comparative_sentences"; - comtrans = makeCorpus "comtrans"; - conll2000 = makeCorpus "conll2000"; - conll2002 = makeCorpus "conll2002"; - conll2007 = makeCorpus "conll2007"; - crubadan = makeCorpus "crubadan"; - dependency-treebank = makeCorpus "dependency_treebank"; - dolch = makeCorpus "dolch"; - europarl-raw = makeCorpus "europarl_raw"; - extended-omw = makeCorpus "extended_omw"; - floresta = makeCorpus "floresta"; - framenet-v15 = makeCorpus "framenet_v15"; - framenet-v17 = makeCorpus "framenet_v17"; - gazetteers = makeCorpus "gazetteers"; - genesis = makeCorpus "genesis"; - gutenberg = makeCorpus "gutenberg"; - ieer = makeCorpus "ieer"; - inaugural = makeCorpus "inaugural"; - indian = makeCorpus "indian"; - jeita = makeCorpus "jeita"; - kimmo = makeCorpus "kimmo"; - knbc = makeCorpus "knbc"; - lin-thesaurus = makeCorpus "lin_thesaurus"; - mac-morpho = makeCorpus "mac_morpho"; - machado = makeCorpus "machado"; - masc-tagged = makeCorpus "masc_tagged"; - movie-reviews = makeCorpus "movie_reviews"; - mte-teip5 = makeCorpus "mte_teip5"; - names = makeCorpus "names"; - nombank-1-0 = makeCorpus "nombank.1.0"; - nonbreaking-prefixes = makeCorpus "nonbreaking_prefixes"; - nps-chat = makeCorpus "nps_chat"; - omw = makeCorpus "omw"; - omw-1-4 = makeCorpus "omw-1.4"; - opinion-lexicon = makeCorpus "opinion_lexicon"; - panlex-swadesh = makeCorpus "panlex_swadesh"; - paradigms = makeCorpus "paradigms"; - pe08 = makeCorpus "pe08"; - pil = makeCorpus "pil"; - pl196x = makeCorpus "pl196x"; - ppattach = makeCorpus "ppattach"; - problem-reports = makeCorpus "problem_reports"; - product-reviews-1 = makeCorpus "product_reviews_1"; - product-reviews-2 = makeCorpus "product_reviews_2"; - propbank = makeCorpus "propbank"; - pros-cons = makeCorpus "pros_cons"; - ptb = makeCorpus "ptb"; - qc = makeCorpus "qc"; - reuters = makeCorpus "reuters"; - rte = makeCorpus "rte"; - semcor = makeCorpus "semcor"; - senseval = makeCorpus "senseval"; - sentence-polarity = makeCorpus "sentence_polarity"; - sentiwordnet = makeCorpus "sentiwordnet"; - shakespeare = makeCorpus "shakespeare"; - sinica-treebank = makeCorpus "sinica_treebank"; - smultron = makeCorpus "smultron"; - state-union = makeCorpus "state_union"; - stopwords = makeCorpus "stopwords"; - subjectivity = makeCorpus "subjectivity"; - swadesh = makeCorpus "swadesh"; - switchboard = makeCorpus "switchboard"; - timit = makeCorpus "timit"; - toolbox = makeCorpus "toolbox"; - treebank = makeCorpus "treebank"; - twitter-samples = makeCorpus "twitter_samples"; - udhr = makeCorpus "udhr"; - udhr2 = makeCorpus "udhr2"; - unicode-samples = makeCorpus "unicode_samples"; - universal-treebanks-v20 = makeCorpus "universal_treebanks_v20"; - verbnet = makeCorpus "verbnet"; - verbnet3 = makeCorpus "verbnet3"; - webtext = makeCorpus "webtext"; - wordnet = makeCorpus "wordnet"; - wordnet-ic = makeCorpus "wordnet_ic"; - wordnet2021 = makeCorpus "wordnet2021"; - wordnet2022 = makeCorpus "wordnet2022"; - wordnet31 = makeCorpus "wordnet31"; - words = makeCorpus "words"; - ycoe = makeCorpus "ycoe"; - - ## Grammars - basque-grammars = makeGrammar "basque_grammars"; - book-grammars = makeGrammar "book_grammars"; - large-grammars = makeGrammar "large_grammars"; - sample-grammars = makeGrammar "sample_grammars"; - spanish-grammars = makeGrammar "spanish_grammars"; - - ## Help - tagsets-json = makeHelp "tagsets_json"; - - ## Misc - mwa-ppdb = makeMisc "mwa_ppdb"; - perluniprops = makeMisc "perluniprops"; - - ## Models - bllip-wsj-no-aux = makeModel "bllip_wsj_no_aux"; - moses-sample = makeModel "moses_sample"; - wmt15-eval = makeModel "wmt15_eval"; - word2vec-sample = makeModel "word2vec_sample"; - - ## Taggers - averaged-perceptron-tagger = makeTagger "averaged_perceptron_tagger"; - averaged-perceptron-tagger-eng = makeTagger "averaged_perceptron_tagger_eng"; - averaged-perceptron-tagger-ru = makeTagger "averaged_perceptron_tagger_ru"; - averaged-perceptron-tagger-rus = makeTagger "averaged_perceptron_tagger_rus"; - maxent-treebank-pos-tagger = makeTagger "maxent_treebank_pos_tagger"; - maxent-treebank-pos-tagger-tab = makeTagger "maxent_treebank_pos_tagger_tab"; - universal-tagset = makeTagger "universal_tagset"; - - ## Tokenizers - punkt = makeTokenizer "punkt"; - punkt-tab = makeTokenizer "punkt_tab"; - - ## Stemmers - porter-test = makeStemmer "porter_test"; - rslp = makeStemmer "rslp"; - snowball-data = makeStemmer "snowball_data"; + punkt = makeNltkDataPackage { + pname = "punkt"; + location = "tokenizers"; + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; + }; + punkt_tab = makeNltkDataPackage { + pname = "punkt_tab"; + location = "tokenizers"; + hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg="; + }; + averaged_perceptron_tagger = makeNltkDataPackage { + pname = "averaged_perceptron_tagger"; + location = "taggers"; + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; + }; + averaged_perceptron_tagger_eng = makeNltkDataPackage { + pname = "averaged_perceptron_tagger_eng"; + location = "taggers"; + hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M="; + }; + snowball_data = makeNltkDataPackage { + pname = "snowball_data"; + location = "stemmers"; + hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk="; + }; + stopwords = makeNltkDataPackage { + pname = "stopwords"; + location = "corpora"; + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; + }; + wordnet = makeNltkDataPackage { + pname = "wordnet"; + location = "corpora"; + hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk="; + }; }) diff --git a/pkgs/top-level/aliases.nix b/pkgs/top-level/aliases.nix index ad609df10c42..6282093db163 100644 --- a/pkgs/top-level/aliases.nix +++ b/pkgs/top-level/aliases.nix @@ -1354,10 +1354,6 @@ mapAliases { # When the nixops_unstable alias is removed, nixops_unstable_minimal can be renamed to nixops_unstable. nixosTest = testers.nixosTest; # Added 2022-05-05 - nltk-data.averaged_perceptron_tagger = nltk-data.averaged-perceptron-tagger; # Added 2025-05-21 - nltk-data.averaged_perceptron_tagger_eng = nltk-data.averaged-perceptron-tagger-eng; # Added 2025-05-21 - nltk-data.punkt_tab = nltk-data.punkt-tab; # Added 2025-05-21 - nltk-data.snowball_data = nltk-data.snowball-data; # Added 2025-05-21 nmap-unfree = throw "'nmap-unfree' has been renamed to/replaced by 'nmap'"; # Converted to throw 2024-10-17 noah = throw "'noah' has been removed because it was broken and its upstream archived"; # Added 2025-05-10 nodejs_18 = throw "Node.js 18.x has reached End-Of-Life and has been removed"; # Added 2025-04-23 diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index d196b6c1f469..d8ed42c1930a 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -2344,7 +2344,7 @@ with pkgs; mpd-sima = python3Packages.callPackage ../tools/audio/mpd-sima { }; - nltk-data = lib.recurseIntoAttrs (callPackage ../tools/text/nltk-data { }); + nltk-data = callPackage ../tools/text/nltk-data { }; seabios-coreboot = seabios.override { ___build-type = "coreboot"; }; seabios-csm = seabios.override { ___build-type = "csm"; };