diff --git a/.docs/system-services-storage.md b/.docs/system-services-storage.md index 37c469898af6043f676d4e683ae65f9ffd4bc8ef..e7767cbc4e09583147acb3becc6de931c1710c7e 100644 --- a/.docs/system-services-storage.md +++ b/.docs/system-services-storage.md @@ -8,19 +8,21 @@ author: Martin Weise !!! debug "Debug Information" - Image: [`bitnami/minio:2023-debian-11`](https://hub.docker.com/r/bitnami/minio) + Image: [`chrislusf/seaweedfs:3.59`](https://hub.docker.com/r/chrislusf/seaweedfs) - * Ports: 9000/tcp, 9001/tcp - * Console: `http://<hostname>/admin/storage` + * Ports: 9000/tcp + * Prometheus: `http://<hostname>:9091/metrics` ## Overview -We use [minIO](https://min.io) as a high-performance, S3 compatible object store packaged by Bitnami (VMware) for easy -cloud-ready deployments that by default support replication and monitoring. +We use [SeaweedFS](https://seaweedfs.github.io/) as a high-performance, S3 compatible object store for easy, cloud-ready +deployments that by default support replication and monitoring. No graphical user interface is provided out-of-the-box, +administrators can access the S3 storage via S3-compatible clients +e.g. [AWS CLI](https://docs.aws.amazon.com/cli/latest/reference/s3/) (see below). ### Users -The default configuration creates one user `minioadmin` with password `minioadmin`. +The default configuration creates one user `seaweedfsadmin` with password `seaweedfsadmin`. ### Buckets @@ -29,42 +31,48 @@ The default configuration creates two buckets `dbrepo-upload`, `dbrepo-download` * `dbrepo-upload` for CSV-file upload (for import of data, analysis, etc.) from the User Interface * `dbrepo-download` for CSV-file download (exporting data, metadata, etc.) -### Metrics Collection +### Examples -By default, Prometheus metrics are not enabled as they require a running Prometheus server in the background. You can -enable the metrics endpoint by setting the following environment variables in the `docker-compose.yml` (deployment with -[Docker Compose](../deployment-docker-compose)) or `values.yml` (deployment with [Helm](../deployment-helm/)) according -to the [minIO documentation](https://min.io/docs/minio/linux/operations/monitoring/collect-minio-metrics-using-prometheus.html). +Upload a CSV-file into the `dbrepo-upload` bucket with the AWS CLI: -### Examples +```console +$ aws --endpoint-url http://<hostname>:9000 \ + s3 \ + cp /path/to/file.csv \ + s3://dbrepo-upload/ +upload: /path/to/file.csv to s3://dbrepo-upload/file.csv +``` -Upload a CSV-file into the `dbrepo-upload` bucket with the console -via `http://<hostname>/admin/storage/browser/dbrepo-upload`. +You can list the buckets: -<figure markdown> - { .img-border } - <figcaption>Uploading a file with the minIO console storage browser.</figcaption> -</figure> +```console +$ aws --endpoint-url http://<hostname>:9000 \ + s3 \ + ls +2023-12-03 16:23:15 dbrepo-download +2023-12-03 16:28:05 dbrepo-upload +``` -Alternatively, you can use the middleware of the [User Interface](../system-other-ui/) to upload files. +And list the files in the bucket `dbrepo-upload` with: -Download a CSV-file from the `dbrepo-download` bucket with the console -via `http://<hostname>/admin/storage/browser/dbrepo-download`. +```console +$ aws --endpoint-url http://<hostname>:9000 \ + s3 \ + ls \ + dbrepo-upload +2023-12-03 16:28:05 535219 file.csv +``` -<figure markdown> - { .img-border } - <figcaption>Downloading a file with the minIO console storage browser.</figcaption> -</figure> +Alternatively, you can use the middleware of the [User Interface](../system-other-ui/) to upload files. Alternatively, you can use a S3-compatible client: -* [minIO Client](https://min.io/docs/minio/linux/reference/minio-mc.html) (most generic implementation of S3) * [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) (generic Python implementation of S3) * AWS SDK (tailored towards Amazon S3) ## Limitations -* Prometheus metrics are not enabled by default (they require a running Prometheus server). +* No support for multiple regions. !!! question "Do you miss functionality? Do these limitations affect you?" diff --git a/dbrepo-analyse-service/Dockerfile b/dbrepo-analyse-service/Dockerfile index 52912bdcebd58f7d51e7f8f8a59f1195a42e76ff..bc0f52276a4f4160e04ec75a6832092670684455 100644 --- a/dbrepo-analyse-service/Dockerfile +++ b/dbrepo-analyse-service/Dockerfile @@ -15,8 +15,8 @@ ENV FLASK_ENV=production ENV HOSTNAME=analyse-service ENV LOG_LEVEL=INFO ENV S3_STORAGE_ENDPOINT="http://storage-service:9000" -ENV S3_ACCESS_KEY_ID="minioadmin" -ENV S3_SECRET_ACCESS_KEY="minioadmin" +ENV S3_ACCESS_KEY_ID="seaweedfsadmin" +ENV S3_SECRET_ACCESS_KEY="seaweedfsadmin" COPY ./as-yml ./as-yml COPY ./clients ./clients diff --git a/dbrepo-analyse-service/clients/minio_client.py b/dbrepo-analyse-service/clients/s3_client.py similarity index 97% rename from dbrepo-analyse-service/clients/minio_client.py rename to dbrepo-analyse-service/clients/s3_client.py index d88da1b983652c1cd1ef3530001f7edf994aba52..c0df983fcce4e927d4ceaf351ad49bbcbb70d704 100644 --- a/dbrepo-analyse-service/clients/minio_client.py +++ b/dbrepo-analyse-service/clients/s3_client.py @@ -5,12 +5,12 @@ import logging from botocore.exceptions import ClientError -class MinioClient: +class S3Client: def __init__(self): endpoint_url = os.getenv('S3_STORAGE_ENDPOINT', 'http://localhost:9000') - aws_access_key_id = os.getenv('S3_ACCESS_KEY_ID', 'minioadmin') - aws_secret_access_key = os.getenv('S3_SECRET_ACCESS_KEY', 'minioadmin') + aws_access_key_id = os.getenv('S3_ACCESS_KEY_ID', 'seaweedfsadmin') + aws_secret_access_key = os.getenv('S3_SECRET_ACCESS_KEY', 'seaweedfsadmin') logging.info("retrieve file from S3, endpoint_url=%s, aws_access_key_id=%s, aws_secret_access_key=(hidden)", endpoint_url, aws_access_key_id) self.client = boto3.client(service_name='s3', endpoint_url=endpoint_url, aws_access_key_id=aws_access_key_id, diff --git a/dbrepo-analyse-service/determine_dt.py b/dbrepo-analyse-service/determine_dt.py index 9584bd0351c05f088c5c29436c5a345bfaeab5e9..8ac23898dba4237fb0a8d47595c54733aa3a6708 100644 --- a/dbrepo-analyse-service/determine_dt.py +++ b/dbrepo-analyse-service/determine_dt.py @@ -6,7 +6,7 @@ import json import csv import logging import io -from clients.minio_client import MinioClient +from clients.s3_client import S3Client import messytables, pandas as pd from messytables import CSVTableSet, type_guess, \ @@ -17,9 +17,9 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) - # Use option enum=True for searching Postgres ENUM Types in CSV file. Remark # Enum is not SQL standard, hence, it might not be supported by all db-engines. # However, it can be used in Postgres and MySQL. - minio_client = MinioClient() - minio_client.file_exists('dbrepo-upload', filename) - response = minio_client.get_file('dbrepo-upload', filename) + s3_client = S3Client() + s3_client.file_exists('dbrepo-upload', filename) + response = s3_client.get_file('dbrepo-upload', filename) stream = response['Body'] if response['ContentLength'] == 0: logging.warning(f'Failed to determine data types: file {filename} has empty body') diff --git a/dbrepo-analyse-service/determine_pk.py b/dbrepo-analyse-service/determine_pk.py index 8431e96d30ef525d6d5dd8ffbbd37effacebb746..cc182052891d8bae34c5e8a57217f0843c5dfa1f 100644 --- a/dbrepo-analyse-service/determine_pk.py +++ b/dbrepo-analyse-service/determine_pk.py @@ -5,7 +5,7 @@ import random import numpy as np import math from determine_dt import determine_datatypes -from clients.minio_client import MinioClient +from clients.s3_client import S3Client def determine_pk(filename, separator=','): @@ -15,9 +15,9 @@ def determine_pk(filename, separator=','): colnames = dt.keys() colindex = list(range(0, len(colnames))) - minio_client = MinioClient() - minio_client.file_exists('dbrepo-upload', filename) - response = minio_client.get_file('dbrepo-upload', filename) + s3_client = S3Client() + s3_client.file_exists('dbrepo-upload', filename) + response = s3_client.get_file('dbrepo-upload', filename) stream = response['Body'] if response['ContentLength'] == 0: logging.warning(f'Failed to determine primary key: file {filename} has empty body') diff --git a/dbrepo-analyse-service/test.sh b/dbrepo-analyse-service/test.sh index 41806ed7bd0050033adb3884628f642be890df82..f77b0f294c88e510cb0373da9a6f3bab69c2f8f7 100755 --- a/dbrepo-analyse-service/test.sh +++ b/dbrepo-analyse-service/test.sh @@ -1,3 +1,3 @@ #!/bin/bash source ./dbrepo-analyse-service/venv/bin/activate -cd ./dbrepo-analyse-service/ && coverage run -m pytest test/test_determine_dt.py test/test_determine_pk.py test/test_minio_client.py --junitxml=report.xml && coverage html --omit="test/*" && coverage report --omit="test/*" > ./coverage.txt \ No newline at end of file +cd ./dbrepo-analyse-service/ && coverage run -m pytest test/test_determine_dt.py test/test_determine_pk.py test/test_s3_client.py --junitxml=report.xml && coverage html --omit="test/*" && coverage report --omit="test/*" > ./coverage.txt \ No newline at end of file diff --git a/dbrepo-analyse-service/test/conftest.py b/dbrepo-analyse-service/test/conftest.py index 505ac71d135bb16cdbee9bf2433b70d6149ece55..c062a1b9d4a57c1eeb9da77faae0a023fe2a419c 100644 --- a/dbrepo-analyse-service/test/conftest.py +++ b/dbrepo-analyse-service/test/conftest.py @@ -6,8 +6,6 @@ import logging from minio.deleteobjects import DeleteObject from testcontainers.minio import MinioContainer -from clients.minio_client import MinioClient - @pytest.fixture(scope="session") def session(request): @@ -16,9 +14,9 @@ def session(request): :param request: / :return: The minIO container """ - logging.debug("[fixture] creating minIO container") - container = MinioContainer(access_key="minioadmin", secret_key="minioadmin") - logging.debug("[fixture] starting minIO container") + logging.debug("[fixture] creating container") + container = MinioContainer(access_key="seaweedfsadmin", secret_key="seaweedfsadmin") + logging.debug("[fixture] starting container") container.start() # set the environment for the client endpoint = 'http://' + container.get_container_host_ip() + ':' + container.get_exposed_port(9000) diff --git a/dbrepo-analyse-service/test/s3_config.json b/dbrepo-analyse-service/test/s3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f270753cdc96278a039e483966ea864a16781cfe --- /dev/null +++ b/dbrepo-analyse-service/test/s3_config.json @@ -0,0 +1,20 @@ +{ + "identities": [ + { + "name": "admin", + "credentials": [ + { + "accessKey": "seaweedfsadmin", + "secretKey": "seaweedfsadmin" + } + ], + "actions": [ + "Read", + "Write", + "List", + "Tagging", + "Admin" + ] + } + ] +} \ No newline at end of file diff --git a/dbrepo-analyse-service/test/test_determine_dt.py b/dbrepo-analyse-service/test/test_determine_dt.py index 0101b48a7a8874cd7a4228acaf6d06e60cbe293b..2acf97aacd300b57bf6e9a5a6a214b7ed9b4bd75 100644 --- a/dbrepo-analyse-service/test/test_determine_dt.py +++ b/dbrepo-analyse-service/test/test_determine_dt.py @@ -9,7 +9,7 @@ import unittest import json -from clients.minio_client import MinioClient +from clients.s3_client import S3Client from botocore.exceptions import ClientError from determine_dt import determine_datatypes @@ -32,7 +32,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datetime.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datetime.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datetime.csv", separator=",") @@ -54,7 +54,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datetime_tz.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datetime_tz.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datetime_tz.csv", separator=",") @@ -76,7 +76,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datetime_t.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datetime_t.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datetime_t.csv", separator=",") @@ -98,7 +98,7 @@ class DetermineDatatypesTest(unittest.TestCase): } # mock - MinioClient().upload_file("datatypes.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("datatypes.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="datatypes.csv", separator=",") @@ -121,7 +121,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_fileEmpty_succeeds(self): # mock - MinioClient().upload_file("empty.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("empty.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes("empty.csv") @@ -133,7 +133,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_separatorSemicolon_succeeds(self): # mock - MinioClient().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="separator.csv", separator=";") @@ -144,7 +144,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_separatorGuess_succeeds(self): # mock - MinioClient().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("separator.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="separator.csv") @@ -155,7 +155,7 @@ class DetermineDatatypesTest(unittest.TestCase): def test_determine_datatypes_separatorGuessLargeDataset_succeeds(self): # mock - MinioClient().upload_file("large.csv", './data/test_dt/', 'dbrepo-upload') + S3Client().upload_file("large.csv", './data/test_dt/', 'dbrepo-upload') # test response = determine_datatypes(filename="large.csv") diff --git a/dbrepo-analyse-service/test/test_determine_pk.py b/dbrepo-analyse-service/test/test_determine_pk.py index 81d9fe29ec88fe0fc6054ee97af155a1a74f3873..4fffda96c524d551efe3d8728713547fa7179a8f 100644 --- a/dbrepo-analyse-service/test/test_determine_pk.py +++ b/dbrepo-analyse-service/test/test_determine_pk.py @@ -9,127 +9,99 @@ import unittest import os import json -from clients.minio_client import MinioClient -from testcontainers.minio import MinioContainer +from clients.s3_client import S3Client from determine_pk import determine_pk - -def before(): - container = MinioContainer(access_key="minioadmin", secret_key="minioadmin").start() - endpoint = 'http://' + container.get_container_host_ip() + ':' + container.get_exposed_port(9000) - os.environ['S3_STORAGE_ENDPOINT'] = endpoint - client = container.get_client() - # create buckets - client.make_bucket('dbrepo-upload') - client.make_bucket('dbrepo-download') - return container - class DeterminePrimaryKeyTest(unittest.TestCase): # @Test def test_determine_pk_largeFileIdFirst_succeeds(self): - with before() as minio: - - # mock - MinioClient().upload_file("largefile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("largefile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('largefile_idfirst.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('largefile_idfirst.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_largeFileIdInBetween_succeeds(self): - with before() as minio: + # mock + S3Client().upload_file("largefile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') - # mock - MinioClient().upload_file("largefile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') - - # test - response = determine_pk('largefile_idinbtw.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('largefile_idinbtw.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_largeFileNoPrimaryKey_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("largefile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("largefile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('largefile_no_pk.csv') - data = json.loads(response) - self.assertEqual({}, data) + # test + response = determine_pk('largefile_no_pk.csv') + data = json.loads(response) + self.assertEqual({}, data) # @Test def test_determine_pk_largeFileNullInUnique_fails(self): - with before() as minio: + # mock + S3Client().upload_file("largefile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') - # mock - MinioClient().upload_file("largefile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') - - # test - response = determine_pk('largefile_nullinunique.csv') - data = json.loads(response) - self.assertFalse('uniquestr' in data) + # test + response = determine_pk('largefile_nullinunique.csv') + data = json.loads(response) + self.assertFalse('uniquestr' in data) # @Test def test_determine_pk_smallFileIdFirst_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("smallfile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("smallfile_idfirst.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('smallfile_idfirst.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('smallfile_idfirst.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_smallFileIdIntBetween_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("smallfile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("smallfile_idinbtw.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('smallfile_idinbtw.csv') - data = json.loads(response) - self.assertEqual(1, int(data['id'])) + # test + response = determine_pk('smallfile_idinbtw.csv') + data = json.loads(response) + self.assertEqual(1, int(data['id'])) # @Test def test_determine_pk_smallFileNoPrimaryKey_fails(self): - with before() as minio: + # mock + S3Client().upload_file("smallfile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') - # mock - MinioClient().upload_file("smallfile_no_pk.csv", './data/test_pk/', 'dbrepo-upload') - - # test - response = determine_pk('smallfile_no_pk.csv') - data = json.loads(response) - self.assertEqual({}, data) + # test + response = determine_pk('smallfile_no_pk.csv') + data = json.loads(response) + self.assertEqual({}, data) # @Test def test_determine_pk_smallFileNullInUnique_fails(self): - with before() as minio: - - # mock - MinioClient().upload_file("smallfile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') + # mock + S3Client().upload_file("smallfile_nullinunique.csv", './data/test_pk/', 'dbrepo-upload') - # test - response = determine_pk('smallfile_nullinunique.csv') - data = json.loads(response) - self.assertFalse('uniquestr' in data) + # test + response = determine_pk('smallfile_nullinunique.csv') + data = json.loads(response) + self.assertFalse('uniquestr' in data) if __name__ == '__main__': diff --git a/dbrepo-analyse-service/test/test_minio_client.py b/dbrepo-analyse-service/test/test_s3_client.py similarity index 68% rename from dbrepo-analyse-service/test/test_minio_client.py rename to dbrepo-analyse-service/test/test_s3_client.py index 9c125a3522b14984c3fa3cf1cfcb423c29bee0c0..8fc6affbaf2ec43ac4c50ebdf2f9e5afad1c6dcf 100644 --- a/dbrepo-analyse-service/test/test_minio_client.py +++ b/dbrepo-analyse-service/test/test_s3_client.py @@ -9,16 +9,16 @@ import unittest from botocore.exceptions import ClientError -from clients.minio_client import MinioClient +from clients.s3_client import S3Client -class MinioClientTest(unittest.TestCase): +class S3ClientTest(unittest.TestCase): # @Test def test_upload_file_succeeds(self): # test - response = MinioClient().upload_file(filename="testdt01.csv", path="./data/") + response = S3Client().upload_file(filename="testdt01.csv", path="./data/") self.assertTrue(response) # @Test @@ -26,7 +26,7 @@ class MinioClientTest(unittest.TestCase): # test try: - MinioClient().upload_file(filename="testdt06.csv", path="./data/") + S3Client().upload_file(filename="testdt06.csv", path="./data/") except FileNotFoundError: pass except Exception: @@ -38,10 +38,10 @@ class MinioClientTest(unittest.TestCase): def test_download_file_succeeds(self): # mock - MinioClient().upload_file(filename="testdt01.csv", path="./data/", bucket="dbrepo-upload") + S3Client().upload_file(filename="testdt01.csv", path="./data/", bucket="dbrepo-upload") # test - response = MinioClient().download_file(filename="testdt01.csv") + response = S3Client().download_file(filename="testdt01.csv") self.assertTrue(response) # @Test @@ -49,7 +49,7 @@ class MinioClientTest(unittest.TestCase): # test try: - MinioClient().download_file(filename="testdt01.csv") + S3Client().download_file(filename="testdt01.csv") except ClientError: pass except Exception: @@ -61,10 +61,10 @@ class MinioClientTest(unittest.TestCase): def test_get_file_succeeds(self): # mock - MinioClient().upload_file(filename="testdt01.csv", path="./data/", bucket="dbrepo-upload") + S3Client().upload_file(filename="testdt01.csv", path="./data/", bucket="dbrepo-upload") # test - response = MinioClient().get_file(bucket="dbrepo-upload", filename="testdt01.csv") + response = S3Client().get_file(bucket="dbrepo-upload", filename="testdt01.csv") self.assertIsNotNone(response) # @Test @@ -72,7 +72,7 @@ class MinioClientTest(unittest.TestCase): # test try: - MinioClient().get_file(bucket="dbrepo-upload", filename="idonotexist.csv") + S3Client().get_file(bucket="dbrepo-upload", filename="idonotexist.csv") except ClientError: pass except Exception: @@ -84,7 +84,7 @@ class MinioClientTest(unittest.TestCase): def test_bucket_exists_succeeds(self): # test - response = MinioClient().bucket_exists_or_exit("dbrepo-upload") + response = S3Client().bucket_exists_or_exit("dbrepo-upload") self.assertIsNotNone(response) # @Test @@ -92,7 +92,7 @@ class MinioClientTest(unittest.TestCase): # test try: - MinioClient().bucket_exists_or_exit("idnonotexist") + S3Client().bucket_exists_or_exit("idnonotexist") except FileNotFoundError: pass except Exception: @@ -105,7 +105,7 @@ class MinioClientTest(unittest.TestCase): # test try: - MinioClient().bucket_exists_or_exit("idnonotexist") + S3Client().bucket_exists_or_exit("idnonotexist") except FileNotFoundError: pass except Exception: diff --git a/dbrepo-data-db/sidecar/Dockerfile b/dbrepo-data-db/sidecar/Dockerfile index 755ee168eba7f9d1005319385f24b11055121f0a..de0f2d76f2ee5d4788083fba56e22f719a93636a 100644 --- a/dbrepo-data-db/sidecar/Dockerfile +++ b/dbrepo-data-db/sidecar/Dockerfile @@ -19,8 +19,8 @@ COPY --chown=1001 ./ds-yml ./ds-yml COPY --chown=1001 ./app.py ./app.py ENV S3_STORAGE_ENDPOINT="http://storage-service:9000" -ENV S3_ACCESS_KEY_ID="minioadmin" -ENV S3_SECRET_ACCESS_KEY="minioadmin" +ENV S3_ACCESS_KEY_ID="seaweedfsadmin" +ENV S3_SECRET_ACCESS_KEY="seaweedfsadmin" EXPOSE 3305 diff --git a/dbrepo-data-db/sidecar/app.py b/dbrepo-data-db/sidecar/app.py index 105f3f03f2a7d0c0796ce975b51fd088c6999391..1bcf3a4a0f041d70d796d2c89319b40201583e13 100644 --- a/dbrepo-data-db/sidecar/app.py +++ b/dbrepo-data-db/sidecar/app.py @@ -5,7 +5,7 @@ import logging from flasgger import LazyJSONEncoder, Swagger from flask import Flask, request, Response from flasgger.utils import swag_from -from clients.minio_client import MinioClient +from clients.s3_client import S3Client from prometheus_flask_exporter import PrometheusMetrics logging.basicConfig(level=logging.DEBUG) @@ -106,8 +106,8 @@ def health(): @swag_from("ds-yml/import.yml") def import_csv(filename): logging.debug('endpoint import csv, filename=%s, body=%s', filename, request) - minio_client = MinioClient() - response = minio_client.download_file(filename) + s3_client = S3Client() + response = s3_client.download_file(filename) if response is False: return Response(), 400 return Response(json.dumps(response)), 202 @@ -117,8 +117,8 @@ def import_csv(filename): @swag_from("ds-yml/export.yml") def import_csv(filename): logging.debug('endpoint export csv, filename=%s, body=%s', filename, request) - minio_client = MinioClient() - response = minio_client.upload_file(filename) + s3_client = S3Client() + response = s3_client.upload_file(filename) if response is False: return Response(), 400 return Response(), 202 diff --git a/dbrepo-data-db/sidecar/clients/minio_client.py b/dbrepo-data-db/sidecar/clients/s3_client.py similarity index 72% rename from dbrepo-data-db/sidecar/clients/minio_client.py rename to dbrepo-data-db/sidecar/clients/s3_client.py index 9f38a83b497d5ebd4faadcd859a44dd244f4705e..65766cc02f248f93ad5fe74070552ee93dd9d176 100644 --- a/dbrepo-data-db/sidecar/clients/minio_client.py +++ b/dbrepo-data-db/sidecar/clients/s3_client.py @@ -6,14 +6,14 @@ import sys from botocore.exceptions import ClientError -class MinioClient: +class S3Client: def __init__(self): endpoint_url = os.getenv('S3_STORAGE_ENDPOINT', 'http://localhost:9000') - aws_access_key_id = os.getenv('S3_ACCESS_KEY_ID', 'minioadmin') - aws_secret_access_key = os.getenv('S3_SECRET_ACCESS_KEY', 'minioadmin') - logging.info("retrieve file from S3, endpoint_url=%s, aws_access_key_id=%s, aws_secret_access_key=(hidden)", - endpoint_url, aws_access_key_id) + aws_access_key_id = os.getenv('S3_ACCESS_KEY_ID', 'seaweedfsadmin') + aws_secret_access_key = os.getenv('S3_SECRET_ACCESS_KEY', 'seaweedfsadmin') + logging.info( + f"retrieve file from S3, endpoint_url={endpoint_url}, aws_access_key_id={aws_access_key_id}, aws_secret_access_key=(hidden)") self.client = boto3.client(service_name='s3', endpoint_url=endpoint_url, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) self.bucket_exists_or_exit("dbrepo-upload") @@ -29,7 +29,7 @@ class MinioClient: filepath = os.path.join("/tmp/", filename) try: self.client.upload_file(filepath, "dbrepo-download", filename) - logging.info("Uploaded .csv %s with key %s", filepath, filename) + logging.info(f"Uploaded .csv {filepath} with key {filename} into bucket dbrepo-download") return True except ClientError as e: logging.error(e) @@ -46,7 +46,7 @@ class MinioClient: filepath = os.path.join("/tmp/", filename) try: self.client.download_file("dbrepo-upload", filename, filepath) - logging.info("Downloaded .csv with key %s into %s", filename, filepath) + logging.info(f"Downloaded .csv with key {filename} into {filepath} from bucket dbrepo-upload") return True except ClientError as e: logging.error(e) @@ -58,10 +58,10 @@ class MinioClient: logging.debug(f"file with name {filename} exists in bucket with name {bucket}") except ClientError as e: if e.response["Error"]["Code"] == "404": - logging.error("Failed to find key %s in bucket %s", filename, bucket) + logging.error(f"Failed to find key {filename} in bucket {bucket}") else: - logging.error("Unexpected error when finding key %s in bucket %s: %s", filename, bucket, - e.response["Error"]["Code"]) + logging.error( + f"Unexpected error when finding key {filename} in bucket {bucket}: {e.response['Error']['Code']}") raise e def bucket_exists_or_exit(self, bucket): @@ -70,8 +70,7 @@ class MinioClient: logging.debug(f"bucket {bucket} exists.") except ClientError as e: if e.response["Error"]["Code"] == "404": - logging.error("Failed to find bucket %s", bucket) + logging.error(f"Failed to find bucket {bucket}") else: - logging.error("Unexpected error when finding bucket %s: %s", bucket, - e.response["Error"]["Code"]) + logging.error(f"Unexpected error when finding bucket {bucket}: {e.response['Error']['Code']}") sys.exit(1) diff --git a/dbrepo-gateway-service/dbrepo.conf b/dbrepo-gateway-service/dbrepo.conf index 38f846d0c67eeff3e0f9c885d0a4287bf33d00ed..d05efc2ef902923b1b6ea5de57c7d2bbfc1a6014 100644 --- a/dbrepo-gateway-service/dbrepo.conf +++ b/dbrepo-gateway-service/dbrepo.conf @@ -30,10 +30,6 @@ upstream search-db-dashboard { server search-db-dashboard:5601; } -upstream storage-service { - server storage-service:9001; -} - upstream upload { server upload-service:1080; } @@ -60,22 +56,6 @@ server { proxy_read_timeout 90; } - location /admin/storage/ { - rewrite /admin/storage/(.*) /$1 break; - # http - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - chunked_transfer_encoding off; - # proxy - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_pass http://storage-service; - proxy_read_timeout 90; - } - location /api/broker { rewrite /api/broker/(.*) /admin/broker/api/$1 break; proxy_set_header Host $host; diff --git a/dbrepo-metadata-service/Dockerfile b/dbrepo-metadata-service/Dockerfile index f3cebd4f4e6caa3e271d2ba4b2fa5967fb3d0fed..4a7bd0a5ba6726678a5894f6d04ef44fa82f078f 100644 --- a/dbrepo-metadata-service/Dockerfile +++ b/dbrepo-metadata-service/Dockerfile @@ -75,8 +75,8 @@ ENV DATACITE_PREFIX="" ENV DATACITE_USERNAME="" ENV DATACITE_PASSWORD="" ENV S3_STORAGE_ENDPOINT="http://storage-service:9000" -ENV S3_ACCESS_KEY_ID="minioadmin" -ENV S3_SECRET_ACCESS_KEY="minioadmin" +ENV S3_ACCESS_KEY_ID="seaweedfsadmin" +ENV S3_SECRET_ACCESS_KEY="seaweedfsadmin" WORKDIR /app diff --git a/dbrepo-storage-service/Dockerfile b/dbrepo-storage-service/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..39eaae4603993fe24f9ff0b8d113c0d7149301bc --- /dev/null +++ b/dbrepo-storage-service/Dockerfile @@ -0,0 +1,10 @@ +FROM chrislusf/seaweedfs:3.59 as runtime + +RUN apk add curl + +WORKDIR /app + +COPY ./create-buckets.sh ./create-buckets.sh +COPY ./docker-entrypoint.sh ./docker-entrypoint.sh + +ENTRYPOINT [ "/bin/sh", "./docker-entrypoint.sh" ] \ No newline at end of file diff --git a/dbrepo-storage-service/create-buckets.sh b/dbrepo-storage-service/create-buckets.sh new file mode 100644 index 0000000000000000000000000000000000000000..bc57fdf5cc90edef167bc9b849bb48d8e3a29ebd --- /dev/null +++ b/dbrepo-storage-service/create-buckets.sh @@ -0,0 +1,18 @@ +#!/bin/bash +function log { + echo "$(date '+%Y-%m-%d %H:%M:%S') $1" +} + +log "Sleep 15s to start S3 API" +sleep 15 +log "Start polling" +until curl -sSL 127.0.0.1:9000 +do + log "S3 API not ready on port 9000, wait 5s ..." + sleep 5 +done +log "Ready" +echo "s3.bucket.create -name dbrepo-upload" | weed shell +log "Created bucket dbrepo-upload" +echo "s3.bucket.create -name dbrepo-download" | weed shell +log "Created bucket dbrepo-download" \ No newline at end of file diff --git a/dbrepo-storage-service/docker-entrypoint.sh b/dbrepo-storage-service/docker-entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..a1121f5443b2ba26b3e56b4f50da2b6b0e4f7b8d --- /dev/null +++ b/dbrepo-storage-service/docker-entrypoint.sh @@ -0,0 +1,3 @@ +#!/bin/sh +/bin/sh ./create-buckets.sh & +/entrypoint.sh server -dir=/data -s3 -s3.port=9000 -s3.config=/app/s3_config.json -metricsPort=9091 \ No newline at end of file diff --git a/dbrepo-storage-service/s3_config.json b/dbrepo-storage-service/s3_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f270753cdc96278a039e483966ea864a16781cfe --- /dev/null +++ b/dbrepo-storage-service/s3_config.json @@ -0,0 +1,20 @@ +{ + "identities": [ + { + "name": "admin", + "credentials": [ + { + "accessKey": "seaweedfsadmin", + "secretKey": "seaweedfsadmin" + } + ], + "actions": [ + "Read", + "Write", + "List", + "Tagging", + "Admin" + ] + } + ] +} \ No newline at end of file diff --git a/dbrepo-ui/dbrepo.config.json b/dbrepo-ui/dbrepo.config.json index 7ae9d88cc06a78597dba85547be2a1103a953227..ba2286af337a8ebc5a46afecb09aa42f160aef15 100644 --- a/dbrepo-ui/dbrepo.config.json +++ b/dbrepo-ui/dbrepo.config.json @@ -61,11 +61,6 @@ "blank": true, "href": "http://localhost/admin/dashboard/" }, - { - "text": "Storage Admin", - "blank": true, - "href": "http://localhost/admin/storage/" - }, { "text": "RabbitMQ Admin", "blank": true, diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index f27ba6337fd003beca1e20f50d440db588efc734..463fa97f2dc8615ae3226879a597752e530b2200 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -39,7 +39,7 @@ services: restart: "no" container_name: dbrepo-data-db hostname: data-db - image: docker.io/bitnami/mariadb:11.1.3 + image: docker.io/bitnami/mariadb-galera:11.1.3-debian-11-r0 volumes: - data-db-data:/bitnami/mariadb - "${SHARED_FILESYSTEM:-/tmp}:/tmp" @@ -47,6 +47,7 @@ services: - "3307:3306" environment: MARIADB_ROOT_PASSWORD: "${USER_DB_PASSWORD:-dbrepo}" + MARIADB_GALERA_MARIABACKUP_PASSWORD: "${USER_DB_BACKUP_PASSWORD:-dbrepo}" healthcheck: test: mysqladmin ping --user="${USER_DB_USERNAME:-root}" --password="${USER_DB_PASSWORD:-dbrepo}" --silent interval: 10s @@ -178,8 +179,8 @@ services: - "5000:5000" environment: S3_STORAGE_ENDPOINT: "${STORAGE_ENDPOINT:-http://storage-service:9000}" - S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-minioadmin}" - S3_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-minioadmin}" + S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-seaweedfsadmin}" + S3_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-seaweedfsadmin}" volumes: - "${SHARED_FILESYSTEM:-/tmp}:/tmp" healthcheck: @@ -258,8 +259,8 @@ services: environment: FLASK_DEBUG: ${SEARCH_DEBUG_MODE:-true} S3_STORAGE_ENDPOINT: "${STORAGE_ENDPOINT:-http://storage-service:9000}" - S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-minioadmin}" - S3_SECRET_ACCESS_KEY: ${STORAGE_PASSWORD:-minioadmin} + S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-seaweedfsadmin}" + S3_SECRET_ACCESS_KEY: ${STORAGE_PASSWORD:-seaweedfsadmin} volumes: - "${SHARED_FILESYSTEM:-/tmp}:/tmp" healthcheck: @@ -342,22 +343,18 @@ services: restart: "no" container_name: dbrepo-storage-service hostname: storage-service - image: docker.io/bitnami/minio:2023-debian-11 - environment: - MINIO_ROOT_USER: "${STORAGE_USERNAME:-minioadmin}" - MINIO_ROOT_PASSWORD: "${STORAGE_PASSWORD:-minioadmin}" - MINIO_DEFAULT_BUCKETS: "${STORAGE_DBREPO_BUCKET:-dbrepo-upload:upload,dbrepo-download:download}" - MINIO_REGION_NAME: "${STORAGE_REGION_NAME:-eu-west-1}" - MINIO_BROWSER_REDIRECT_URL: "${STORAGE_BASE_URL:-http://localhost/admin/storage/}" + image: docker.io/dbrepo/storage-service:latest + build: ./dbrepo-storage-service ports: - 9000:9000 + volumes: + - ./dist/s3_config.json:/app/s3_config.json + - storage-service-data:/data healthcheck: - test: [ "CMD", "mc", "ready", "local" ] - interval: 5s + test: curl -sSL 127.0.0.1:9000 || exit 1 + interval: 10s timeout: 5s - retries: 5 - volumes: - - storage-service-data:/bitnami/minio/data + retries: 12 logging: driver: json-file @@ -373,9 +370,9 @@ services: - "-s3-endpoint=${STORAGE_ENDPOINT:-http://storage-service:9000}" - "-s3-bucket=dbrepo-upload" environment: - AWS_ACCESS_KEY_ID: "${STORAGE_USERNAME:-minioadmin}" - AWS_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-minioadmin}" - AWS_REGION: "${STORAGE_REGION_NAME:-eu-west-1}" + AWS_ACCESS_KEY_ID: "${STORAGE_USERNAME:-seaweedfsadmin}" + AWS_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-seaweedfsadmin}" + AWS_REGION: "${STORAGE_REGION_NAME:-default}" depends_on: dbrepo-storage-service: condition: service_healthy diff --git a/docker-compose.yml b/docker-compose.yml index 6431cb7a414b62fffdac3abe41222b1ce13cf800..711bea696e40dc3b073eea11a31c52b47bc54d66 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -183,8 +183,8 @@ services: - "5000:5000" environment: S3_STORAGE_ENDPOINT: "${STORAGE_ENDPOINT:-http://storage-service:9000}" - S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-minioadmin}" - S3_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-minioadmin}" + S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-seaweedfsadmin}" + S3_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-seaweedfsadmin}" volumes: - "${SHARED_FILESYSTEM:-/tmp}:/tmp" healthcheck: @@ -266,8 +266,8 @@ services: environment: FLASK_DEBUG: ${SEARCH_DEBUG_MODE:-true} S3_STORAGE_ENDPOINT: "${STORAGE_ENDPOINT:-http://storage-service:9000}" - S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-minioadmin}" - S3_SECRET_ACCESS_KEY: ${STORAGE_PASSWORD:-minioadmin} + S3_ACCESS_KEY_ID: "${STORAGE_USERNAME:-seaweedfsadmin}" + S3_SECRET_ACCESS_KEY: ${STORAGE_PASSWORD:-seaweedfsadmin} volumes: - "${SHARED_FILESYSTEM:-/tmp}:/tmp" healthcheck: @@ -352,22 +352,18 @@ services: restart: "no" container_name: dbrepo-storage-service hostname: storage-service - image: docker.io/bitnami/minio:2023-debian-11 - environment: - MINIO_ROOT_USER: "${STORAGE_USERNAME:-minioadmin}" - MINIO_ROOT_PASSWORD: "${STORAGE_PASSWORD:-minioadmin}" - MINIO_DEFAULT_BUCKETS: "${STORAGE_DBREPO_BUCKET:-dbrepo-upload:upload,dbrepo-download:download}" - MINIO_REGION_NAME: "${STORAGE_REGION_NAME:-eu-west-1}" - MINIO_BROWSER_REDIRECT_URL: "${STORAGE_BASE_URL:-http://localhost/admin/storage/}" + image: dbrepo-storage-service:latest + build: ./dbrepo-storage-service ports: - 9000:9000 + volumes: + - ./dbrepo-storage-service/s3_config.json:/app/s3_config.json + - storage-service-data:/data healthcheck: - test: [ "CMD", "mc", "ready", "local" ] - interval: 5s + test: curl -sSL 127.0.0.1:9000 || exit 1 + interval: 10s timeout: 5s - retries: 5 - volumes: - - storage-service-data:/bitnami/minio/data + retries: 12 logging: driver: json-file @@ -383,9 +379,9 @@ services: - "-s3-endpoint=${STORAGE_ENDPOINT:-http://storage-service:9000}" - "-s3-bucket=dbrepo-upload" environment: - AWS_ACCESS_KEY_ID: "${STORAGE_USERNAME:-minioadmin}" - AWS_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-minioadmin}" - AWS_REGION: "${STORAGE_REGION_NAME:-eu-west-1}" + AWS_ACCESS_KEY_ID: "${STORAGE_USERNAME:-seaweedfsadmin}" + AWS_SECRET_ACCESS_KEY: "${STORAGE_PASSWORD:-seaweedfsadmin}" + AWS_REGION: "${STORAGE_REGION_NAME:-default}" depends_on: dbrepo-storage-service: condition: service_healthy diff --git a/install.sh b/install.sh index 0a2941913fc5772aad5d79ee48448ba9c30fe408..da07dd9482681299a47356e55bbd1097ea338729 100644 --- a/install.sh +++ b/install.sh @@ -23,6 +23,7 @@ curl -sSL -o ./dist/definitions.json https://gitlab.phaidra.org/fair-data-austri curl -sSL -o ./dist/dbrepo.conf https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/dev/dbrepo-gateway-service/dbrepo.conf curl -sSL -o ./dist/opensearch_dashboards.yml https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/dev/dbrepo-search-db/opensearch_dashboards.yml curl -sSL -o ./dist/dbrepo.config.json https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/dev/dbrepo-ui/dbrepo.config.json +curl -sSL -o ./dist/s3_config.json https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/dev/dbrepo-storage-service/s3_config.json echo "[📦] Pulling images ..." docker compose pull