diff --git a/.docs/system-services-storage.md b/.docs/system-services-storage.md index c82a78e27ff78c4e38b7f43d05abe1458492df5f..e7767cbc4e09583147acb3becc6de931c1710c7e 100644 --- a/.docs/system-services-storage.md +++ b/.docs/system-services-storage.md @@ -36,14 +36,19 @@ The default configuration creates two buckets `dbrepo-upload`, `dbrepo-download` Upload a CSV-file into the `dbrepo-upload` bucket with the AWS CLI: ```console -$ aws --endpoint-url http://<hostname>:9000 s3 cp /path/to/file.csv s3://dbrepo-upload/ +$ aws --endpoint-url http://<hostname>:9000 \ + s3 \ + cp /path/to/file.csv \ + s3://dbrepo-upload/ upload: /path/to/file.csv to s3://dbrepo-upload/file.csv ``` You can list the buckets: ```console -$ aws --endpoint-url http://<hostname>:9000 s3 ls +$ aws --endpoint-url http://<hostname>:9000 \ + s3 \ + ls 2023-12-03 16:23:15 dbrepo-download 2023-12-03 16:28:05 dbrepo-upload ``` @@ -51,7 +56,10 @@ $ aws --endpoint-url http://<hostname>:9000 s3 ls And list the files in the bucket `dbrepo-upload` with: ```console -$ aws --endpoint-url http://<hostname>:9000 s3 ls dbrepo-upload +$ aws --endpoint-url http://<hostname>:9000 \ + s3 \ + ls \ + dbrepo-upload 2023-12-03 16:28:05 535219 file.csv ``` diff --git a/dbrepo-analyse-service/test.sh b/dbrepo-analyse-service/test.sh index 41806ed7bd0050033adb3884628f642be890df82..f77b0f294c88e510cb0373da9a6f3bab69c2f8f7 100755 --- a/dbrepo-analyse-service/test.sh +++ b/dbrepo-analyse-service/test.sh @@ -1,3 +1,3 @@ #!/bin/bash source ./dbrepo-analyse-service/venv/bin/activate -cd ./dbrepo-analyse-service/ && coverage run -m pytest test/test_determine_dt.py test/test_determine_pk.py test/test_minio_client.py --junitxml=report.xml && coverage html --omit="test/*" && coverage report --omit="test/*" > ./coverage.txt \ No newline at end of file +cd ./dbrepo-analyse-service/ && coverage run -m pytest test/test_determine_dt.py test/test_determine_pk.py test/test_s3_client.py --junitxml=report.xml && coverage html --omit="test/*" && coverage report --omit="test/*" > ./coverage.txt \ No newline at end of file diff --git a/dbrepo-analyse-service/test/conftest.py b/dbrepo-analyse-service/test/conftest.py index 505ac71d135bb16cdbee9bf2433b70d6149ece55..c062a1b9d4a57c1eeb9da77faae0a023fe2a419c 100644 --- a/dbrepo-analyse-service/test/conftest.py +++ b/dbrepo-analyse-service/test/conftest.py @@ -6,8 +6,6 @@ import logging from minio.deleteobjects import DeleteObject from testcontainers.minio import MinioContainer -from clients.minio_client import MinioClient - @pytest.fixture(scope="session") def session(request): @@ -16,9 +14,9 @@ def session(request): :param request: / :return: The minIO container """ - logging.debug("[fixture] creating minIO container") - container = MinioContainer(access_key="minioadmin", secret_key="minioadmin") - logging.debug("[fixture] starting minIO container") + logging.debug("[fixture] creating container") + container = MinioContainer(access_key="seaweedfsadmin", secret_key="seaweedfsadmin") + logging.debug("[fixture] starting container") container.start() # set the environment for the client endpoint = 'http://' + container.get_container_host_ip() + ':' + container.get_exposed_port(9000) diff --git a/dbrepo-analyse-service/test/s3_config.json b/dbrepo-analyse-service/test/s3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f270753cdc96278a039e483966ea864a16781cfe --- /dev/null +++ b/dbrepo-analyse-service/test/s3_config.json @@ -0,0 +1,20 @@ +{ + "identities": [ + { + "name": "admin", + "credentials": [ + { + "accessKey": "seaweedfsadmin", + "secretKey": "seaweedfsadmin" + } + ], + "actions": [ + "Read", + "Write", + "List", + "Tagging", + "Admin" + ] + } + ] +} \ No newline at end of file diff --git a/dbrepo-analyse-service/test/test_determine_dt.py b/dbrepo-analyse-service/test/test_determine_dt.py index 0101b48a7a8874cd7a4228acaf6d06e60cbe293b..2acf97aacd300b57bf6e9a5a6a214b7ed9b4bd75 100644 --- a/dbrepo-analyse-service/test/test_determine_dt.py +++ b/dbrepo-analyse-service/test/test_determine_dt.py @@ -9,7 +9,7 @@ import unittest import json -from clients.minio_client import MinioClient +from clients.s3_client import S3Client from botocore.exceptions import ClientError from determine_dt import determine_datatypes @@ -32,7 +32,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datetime.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datetime.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datetime.csv", separator=",") @@ -54,7 +54,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datetime_tz.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datetime_tz.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datetime_tz.csv", separator=",") @@ -76,7 +76,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datetime_t.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datetime_t.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datetime_t.csv", separator=",") @@ -98,7 +98,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datatypes.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datatypes.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datatypes.csv", separator=",") @@ -121,7 +121,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_fileEmpty_succeeds(self): # mock - MinioClient().upload_file("empty.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("empty.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes("empty.csv") @@ -133,7 +133,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_separatorSemicolon_succeeds(self): # mock - MinioClient().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="separator.csv", separator=";") @@ -144,7 +144,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_separatorGuess_succeeds(self): # mock - MinioClient().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="separator.csv") @@ -155,7 +155,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_separatorGuessLargeDataset_succeeds(self): # mock - MinioClient().upload_file("large.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("large.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="large.csv") diff --git a/dbrepo-analyse-service/test/test_determine_pk.py b/dbrepo-analyse-service/test/test_determine_pk.py index 81d9fe29ec88fe0fc6054ee97af155a1a74f3873..4fffda96c524d551efe3d8728713547fa7179a8f 100644 --- a/dbrepo-analyse-service/test/test_determine_pk.py +++ b/dbrepo-analyse-service/test/test_determine_pk.py @@ -9,127 +9,99 @@ import unittest import os import json -from clients.minio_client import MinioClient -from testcontainers.minio import MinioContainer +from clients.s3_client import S3Client from determine_pk import determine_pk - -def before(): - container = MinioContainer(access_key="minioadmin", secret_key="minioadmin").start() - endpoint = 'http://' + container.get_container_host_ip() + ':' + container.get_exposed_port(9000) - os.environ['S3_STORAGE_ENDPOINT'] = endpoint - client = container.get_client() - # create buckets - client.make_bucket('dbrepo-upload') - client.make_bucket('dbrepo-download') - return container - class DeterminePrimaryKeyTest(unittest.TestCase): # @Test def test_determine_pk_largeFileIdFirst_succeeds(self): - with before() as minio: - - # mock - MinioClient().upload_file("largefile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("largefile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('largefile_idfirst.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('largefile_idfirst.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_largeFileIdInBetween_succeeds(self): - with before() as minio: + # mock + S3Client().upload_file("largefile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') - # mock - MinioClient().upload_file("largefile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') - - # test - response = determine_pk('largefile_idinbtw.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('largefile_idinbtw.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_largeFileNoPrimaryKey_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("largefile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("largefile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('largefile_no_pk.csv') - data = json.loads(response) - self.assertEqual({}, data) + # test + response = determine_pk('largefile_no_pk.csv') + data = json.loads(response) + self.assertEqual({}, data) # @Test def test_determine_pk_largeFileNullInUnique_fails(self): - with before() as minio: + # mock + S3Client().upload_file("largefile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') - # mock - MinioClient().upload_file("largefile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') - - # test - response = determine_pk('largefile_nullinunique.csv') - data = json.loads(response) - self.assertFalse('uniquestr' in data) + # test + response = determine_pk('largefile_nullinunique.csv') + data = json.loads(response) + self.assertFalse('uniquestr' in data) # @Test def test_determine_pk_smallFileIdFirst_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("smallfile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("smallfile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('smallfile_idfirst.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('smallfile_idfirst.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_smallFileIdIntBetween_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("smallfile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("smallfile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('smallfile_idinbtw.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('smallfile_idinbtw.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_smallFileNoPrimaryKey_fails(self): - with before() as minio: + # mock + S3Client().upload_file("smallfile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') - # mock - MinioClient().upload_file("smallfile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') - - # test - response = determine_pk('smallfile_no_pk.csv') - data = json.loads(response) - self.assertEqual({}, data) + # test + response = determine_pk('smallfile_no_pk.csv') + data = json.loads(response) + self.assertEqual({}, data) # @Test def test_determine_pk_smallFileNullInUnique_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("smallfile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("smallfile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('smallfile_nullinunique.csv') - data = json.loads(response) - self.assertFalse('uniquestr' in data) + # test + response = determine_pk('smallfile_nullinunique.csv') + data = json.loads(response) + self.assertFalse('uniquestr' in data) if __name__ == '__main__':