0
0
Fork 0
mirror of https://github.com/NixOS/nixpkgs.git synced 2025-07-13 21:50:33 +03:00

pg-dump-anon: init at 1.3.1

This is a Go program inside the sources of `postgresql_anonymizer` that
allows to perform database dumps, but with anonymized data. I figured
that it's a little awkward to have a client program to be part of the
extension package.

So I decided to create a second package called `pg-dump-anon`. Since
it's one repository, both share `version` & `src`.

Also extended the VM test to make sure we're getting properly anonymized
data when dumping with `pg_dump_anon`.
This commit is contained in:
Maximilian Bosch 2024-03-13 14:36:20 +01:00
parent 8d0e5a3402
commit 0cdaede144
No known key found for this signature in database
GPG key ID: 797C8FD26F815B0E
3 changed files with 90 additions and 20 deletions

View file

@ -2,7 +2,8 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: {
name = "pg_anonymizer";
meta.maintainers = lib.teams.flyingcircus.members;
nodes.machine = {
nodes.machine = { pkgs, ... }: {
environment.systemPackages = [ pkgs.pg-dump-anon ];
services.postgresql = {
enable = true;
extraPlugins = ps: [ ps.anonymizer ];
@ -39,16 +40,55 @@ import ./make-test-python.nix ({ pkgs, lib, ... }: {
assert row[1] != original_name, f"Expected first row to have a name other than {original_name}"
assert not bool(row[2]), "Expected points to be NULL in first row"
with subtest("Check initial state"):
output = get_player_table_contents()
def find_xsv_in_dump(dump, sep=','):
"""
Expecting to find a CSV (for pg_dump_anon) or TSV (for pg_dump) structure, looking like
COPY public.player ...
1,Shields,
2,Salazar,
\.
in the given dump (the commas are tabs in case of pg_dump).
Extract the CSV lines and split by `sep`.
"""
try:
from itertools import dropwhile, takewhile
return [x.split(sep) for x in list(takewhile(
lambda x: x != "\\.",
dropwhile(
lambda x: not x.startswith("COPY public.player"),
dump.splitlines()
)
))[1:]]
except:
print(f"Dump to process: {dump}")
raise
def check_original_data(output):
assert output[0] == ['1','Foo','23'], f"Expected first row from player table to be 1,Foo,23; got {output[0]}"
assert output[1] == ['2','Bar','42'], f"Expected first row from player table to be 2,Bar,42; got {output[1]}"
with subtest("Anonymize"):
machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
output = get_player_table_contents()
def check_anonymized_rows(output):
check_anonymized_row(output[0], '1', 'Foo')
check_anonymized_row(output[1], '2', 'Bar')
with subtest("Check initial state"):
check_original_data(get_player_table_contents())
with subtest("Anonymous dumps"):
check_original_data(find_xsv_in_dump(
machine.succeed("sudo -u postgres pg_dump demo"),
sep='\t'
))
check_anonymized_rows(find_xsv_in_dump(
machine.succeed("sudo -u postgres pg_dump_anon -U postgres -h /run/postgresql -d demo"),
sep=','
))
with subtest("Anonymize"):
machine.succeed("sudo -u postgres psql -d demo --command 'select anon.anonymize_database();'")
check_anonymized_rows(get_player_table_contents())
'';
})