Skip to content
Snippets Groups Projects
Commit 5ce4cf94 authored by Martin Weise's avatar Martin Weise
Browse files

Merge branch 'master' into 'release-1.4.4'

Master

See merge request !288
parents c60a2885 94ab6205
No related branches found
No related tags found
4 merge requests!296Dev,!293Dev,!289Dev,!288Master
Showing
with 271 additions and 486 deletions
......@@ -14,10 +14,11 @@ services:
restart: "no"
container_name: dbrepo-metadata-db
hostname: metadata-db
image: docker.io/dbrepo/metadata-db:1.4.4
image: docker.io/bitnami/mariadb:11.1.3-debian-11-r6
volumes:
- metadata-db-data:/bitnami/mariadb
- ./dist/2_setup-data.sql:/docker-entrypoint-initdb.d/2_setup-data.sql
- ./dbrepo-metadata-db/setup-schema.sql:/docker-entrypoint-initdb.d/1_setup-schema.sql
- ./dbrepo-metadata-db/setup-data.sql:/docker-entrypoint-initdb.d/2_setup-data.sql
ports:
- "3306:3306"
environment:
......@@ -35,7 +36,7 @@ services:
restart: "no"
container_name: dbrepo-data-db
hostname: data-db
image: docker.io/bitnami/mariadb-galera:11.2.2-debian-11-r0
image: docker.io/bitnami/mariadb:11.1.3-debian-11-r6
volumes:
- data-db-data:/bitnami/mariadb
- "${SHARED_VOLUME:-/tmp}:/tmp"
......@@ -43,7 +44,6 @@ services:
- "3307:3306"
environment:
MARIADB_ROOT_PASSWORD: "${USER_DB_PASSWORD:-dbrepo}"
MARIADB_GALERA_MARIABACKUP_PASSWORD: "${USER_DB_BACKUP_PASSWORD:-dbrepo}"
healthcheck:
test: mysqladmin ping --user="${USER_DB_USERNAME:-root}" --password="${USER_DB_PASSWORD:-dbrepo}" --silent
interval: 10s
......@@ -56,7 +56,7 @@ services:
restart: "no"
container_name: dbrepo-auth-db
hostname: auth-db
image: docker.io/bitnami/mariadb:11.2.2-debian-11-r0
image: docker.io/bitnami/mariadb:11.1.3-debian-11-r6
volumes:
- auth-db-data:/bitnami/mariadb
ports:
......@@ -76,7 +76,7 @@ services:
restart: "no"
container_name: dbrepo-auth-service
hostname: auth-service
image: docker.io/dbrepo/auth-service:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/auth-service:1.4.4
healthcheck:
test: curl -sSL 'http://0.0.0.0:8080/realms/dbrepo' | grep "dbrepo" || exit 1
interval: 10s
......@@ -98,7 +98,7 @@ services:
restart: "no"
container_name: dbrepo-metadata-service
hostname: metadata-service
image: docker.io/dbrepo/metadata-service:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/metadata-service:1.4.4
volumes:
- "${SHARED_VOLUME:-/tmp}:/tmp"
environment:
......@@ -124,7 +124,7 @@ services:
DELETED_RECORD: "${DELETED_RECORD:-persistent}"
GRANULARITY: "${GRANULARITY:-YYYY-MM-DDThh:mm:ssZ}"
JWT_PUBKEY: "${JWT_PUBKEY:-MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqqnHQ2BWWW9vDNLRCcxD++xZg/16oqMo/c1l+lcFEjjAIJjJp/HqrPYU/U9GvquGE6PbVFtTzW1KcKawOW+FJNOA3CGo8Q1TFEfz43B8rZpKsFbJKvQGVv1Z4HaKPvLUm7iMm8Hv91cLduuoWx6Q3DPe2vg13GKKEZe7UFghF+0T9u8EKzA/XqQ0OiICmsmYPbwvf9N3bCKsB/Y10EYmZRb8IhCoV9mmO5TxgWgiuNeCTtNCv2ePYqL/U0WvyGFW0reasIK8eg3KrAUj8DpyOgPOVBn3lBGf+3KFSYi+0bwZbJZWqbC/Xlk20Go1YfeJPRIt7ImxD27R/lNjgDO/MwIDAQAB}"
LOG_LEVEL: "${LOG_LEVEL:-info}"
LOG_LEVEL: ${LOG_LEVEL:-info}
METADATA_DB: "${METADATA_DB:-dbrepo}"
METADATA_HOST: "${METADATA_HOST:-metadata-db}"
METADATA_JDBC_EXTRA_ARGS: "${METADATA_JDBC_EXTRA_ARGS:-}"
......@@ -134,13 +134,13 @@ services:
REPOSITORY_NAME: "${REPOSITORY_NAME:-Database Repository}"
SEARCH_SERVICE_ENDPOINT: "${SEARCH_SERVICE_ENDPOINT:-http://gateway-service}"
S3_ACCESS_KEY_ID: "${S3_ACCESS_KEY_ID:-seaweedfsadmin}"
S3_ENDPOINT: "${S3_ENDPOINT:-http://gateway-service/api/storage}"
S3_ENDPOINT: "${S3_ENDPOINT:-http://storage-service:9000}"
S3_EXPORT_BUCKET: "${S3_EXPORT_BUCKET:-dbrepo-download}"
S3_IMPORT_BUCKET: "${S3_IMPORT_BUCKET:-dbrepo-upload}"
S3_SECRET_ACCESS_KEY: "${S3_SECRET_ACCESS_KEY:-seaweedfsadmin}"
SPARQL_CONNECTION_TIMEOUT: "${SPARQL_CONNECTION_TIMEOUT:-10000}"
healthcheck:
test: wget -qO- localhost:8080/actuator/health/readiness | grep -q "UP" || exit 1
test: curl -sSL localhost:8080/actuator/health/liveness | grep 'UP' || exit 1
interval: 10s
timeout: 5s
retries: 12
......@@ -160,7 +160,7 @@ services:
restart: "no"
container_name: dbrepo-analyse-service
hostname: analyse-service
image: docker.io/dbrepo/analyse-service:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/analyse-service:1.4.4
environment:
ADMIN_PASSWORD: "${ADMIN_PASSWORD:-admin}"
ADMIN_USERNAME: "${ADMIN_USERNAME:-admin}"
......@@ -211,7 +211,7 @@ services:
restart: "no"
container_name: dbrepo-search-db
hostname: search-db
image: docker.io/dbrepo/search-db:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/search-db:1.4.4
healthcheck:
test: curl -sSL localhost:9200/_plugins/_security/health | jq .status | grep UP
interval: 10s
......@@ -235,7 +235,7 @@ services:
restart: "no"
container_name: dbrepo-search-service
hostname: search-service
image: docker.io/dbrepo/search-service:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/search-service:1.4.4
environment:
ADMIN_PASSWORD: "${ADMIN_PASSWORD:-admin}"
ADMIN_USERNAME: "${ADMIN_USERNAME:-admin}"
......@@ -243,6 +243,7 @@ services:
AUTH_SERVICE_CLIENT_SECRET: ${AUTH_SERVICE_CLIENT:-MUwRc7yfXSJwX8AdRMWaQC3Nep1VjwgG}
AUTH_SERVICE_ENDPOINT: ${AUTH_SERVICE_ENDPOINT:-http://auth-service:8080}
COLLECTION: ${COLLECTION:-['database','table','column','identifier','unit','concept','user','view']}
GATEWAY_SERVICE_ENDPOINT: ${GATEWAY_SERVICE_ENDPOINT:-http://gateway-service}
OPENSEARCH_HOST: ${OPENSEARCH_HOST:-search-db}
OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200}
OPENSEARCH_USERNAME: ${OPENSEARCH_USERNAME:-admin}
......@@ -253,7 +254,7 @@ services:
restart: "no"
container_name: dbrepo-data-db-sidecar
hostname: data-db-sidecar
image: docker.io/dbrepo/data-db-sidecar:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/data-db-sidecar:1.4.4
environment:
S3_ACCESS_KEY_ID: "${S3_ACCESS_KEY_ID:-seaweedfsadmin}"
S3_ENDPOINT: "${S3_ENDPOINT:-http://storage-service:9000}"
......@@ -275,7 +276,7 @@ services:
restart: "no"
container_name: dbrepo-ui
hostname: ui
image: docker.io/dbrepo/ui:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/ui:1.4.4
depends_on:
dbrepo-search-service:
condition: service_started
......@@ -293,7 +294,7 @@ services:
restart: "no"
container_name: dbrepo-gateway-service
hostname: gateway-service
image: docker.io/nginx:1.25-alpine-slim
image: docker.io/nginx:1.27.0-alpine3.19-slim
ports:
- "80:80"
- "443:443"
......@@ -319,7 +320,7 @@ services:
restart: "no"
container_name: dbrepo-search-service-init
hostname: search-service-init
image: docker.io/dbrepo/search-service-init:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/search-service-init:1.4.4
environment:
GATEWAY_SERVICE_ENDPOINT: ${GATEWAY_SERVICE_ENDPOINT:-http://gateway-service}
OPENSEARCH_HOST: ${OPENSEARCH_HOST:-search-db}
......@@ -354,7 +355,7 @@ services:
restart: "no"
container_name: dbrepo-storage-service-init
hostname: storage-service-init
image: docker.io/dbrepo/storage-service-init:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/storage-service-init:1.4.4
environment:
SEAWEEDFS_ENDPOINT: "${STORAGE_SEAWEEDFS_ENDPOINT:-storage-service:9333}"
depends_on:
......@@ -391,7 +392,7 @@ services:
restart: "no"
container_name: dbrepo-data-service
hostname: data-service
image: docker.io/dbrepo/data-service:1.4.4
image: registry.datalab.tuwien.ac.at/dbrepo/data-service:1.4.4
volumes:
- "${SHARED_VOLUME:-/tmp}:/tmp"
environment:
......@@ -429,7 +430,7 @@ services:
S3_IMPORT_BUCKET: "${S3_IMPORT_BUCKET:-dbrepo-upload}"
S3_SECRET_ACCESS_KEY: "${S3_SECRET_ACCESS_KEY:-seaweedfsadmin}"
healthcheck:
test: wget -qO- localhost:8080/actuator/health/readiness | grep -q "UP" || exit 1
test: curl -sSL localhost:8080/actuator/health/liveness | grep 'UP' || exit 1
interval: 10s
timeout: 5s
retries: 12
......
......@@ -6,7 +6,7 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`dbrepo/analyse-service:__APPVERSION__`](https://hub.docker.com/r/dbrepo/analyse-service)
Image: [`registry.datalab.tuwien.ac.at/dbrepo/analyse-service:1.4.4`](https://hub.docker.com/r/dbrepo/analyse-service)
* Ports: 5000/tcp
* Prometheus: `http://<hostname>:5000/metrics`
......@@ -15,37 +15,37 @@ author: Martin Weise
## Overview
It suggests data types for the [User Interface](./system-other-ui) when creating a table from a
It suggests data types for the [User Interface](../ui) when creating a table from a
*comma separated values* (CSV) -file. It recommends enumerations for columns and returns e.g. a list of potential
primary key candidates. The researcher is able to confirm these suggestions manually. Moreover, the Analyse Service
determines basic statistical properties of numerical columns.
### Analysis
After [uploading](./system-services-storage/#buckets) the CSV-file into the `dbrepo-upload` bucket of
the [Storage Service](./system-services-storage), analysis for data types and primary keys follows the flow:
After [uploading](../storage-service/#buckets) the CSV-file into the `dbrepo-upload` bucket of
the [Storage Service](../storage-service), analysis for data types and primary keys follows the flow:
1. Retrieve the CSV-file from the `dbrepo-upload` bucket of the Storage Service as data stream (=nothing is stored in
the service) with the [`boto3`](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client.
2. When no separator is known, the Analyse Service tries to guess the separator from the first line
with [`csv.Sniff().sniff(...)`](https://docs.python.org/3/library/csv.html#csv.Sniffer). This step is optional when
the separator was provided via HTTP-payload: `{"separator": ";", ...}`
3. With the separator known (either from step 2 or via HTTP-payload),
the [`messytables.CSVTableSet(...)`](https://messytables.readthedocs.io/en/latest/#csv-support) guesses the headers
and column types and enums, if the HTTP-payload contains `{"enum": true, ...}`.
3. With the separator known (either from step 2 or via HTTP-payload), the [`Pandas`](https://pypi.org/project/pandas/)
guesses the headers and column types and enums, if the HTTP-payload contains `{"enum": true, ...}`. The data type
is guessed by a combination of Pandas and heuristics.
### Examples
See the [usage page](./usage-analyse/) for examples.
See the [usage page](..) for examples.
## Limitations
!!! question "Do you miss functionality? Do these limitations affect you?"
We strongly encourage you to help us implement it as we are welcoming contributors to open-source software and get
in [contact](./contact) with us, we happily answer requests for collaboration with attached CV and your programming
in [contact](../../contact) with us, we happily answer requests for collaboration with attached CV and your programming
experience!
## Security
1. Credentials for the [Storage Service](./system-services-storage) are stored in plaintext environment variables.
1. Credentials for the [Storage Service](../storage-service) are stored in plaintext environment variables.
......@@ -6,17 +6,23 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`dbrepo/authentication-service:__APPVERSION__`](https://hub.docker.com/r/dbrepo/authentication-service)
Image: [`registry.datalab.tuwien.ac.at/dbrepo/authentication-service:1.4.4`](https://hub.docker.com/r/dbrepo/authentication-service)
* Ports: 8080/tcp
* UI: `http://<hostname>/api/auth/admin/`
* UI: `http://<hostname>/api/auth/`
## Overview
By default, users are created using the [User Interface](../system-other-ui) and the sign-up page in the User Interface.
This creates a new user in the [Authentication Database](../system-databases-authentication), the user identity is then
managed by the
Authentication Service.
By default, users are created using the [User Interface](../ui) and the sign-up page in the User Interface.
This creates a new user in Keycloak. The user identity is then managed by the Auth Service. Only a very small subset
of immutable properties (id, username) is mirrored in the [Metadata Database](../metadata-db) for faster access.
## Identities
:octicons-tag-16:{ title="Minimum version" } 1.4.4
Identities can also be added in Keycloak directly. When requesting a JWT token from the `/api/user` endpoint, the
immutable properties mentioned in c.f. [Overview](#overview) are copied transparent to the user on first login.
## Groups
......@@ -41,163 +47,16 @@ Each of the composite role has a set of other associated composite roles.
</figure>
There is one role for one specific action in the services. For example: the `create-database` role authorizes a user to
create a database in a Docker container. Therefore,
the [`DatabaseEndpoint.java`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/blob/a5bdd1e2169bae6497e2f7eee82dad8b9b059850/fda-database-service/rest-service/src/main/java/at/tuwien/endpoints/DatabaseEndpoint.java#L78)
endpoint requires a JWT access token with this authority.
```java
@PostMapping
@PreAuthorize("hasAuthority('create-database')")
public ResponseEntity<DatabaseBriefDto> create(@NotNull Long containerId,
@Valid @RequestBody DatabaseCreateDto createDto,
@NotNull Principal principal) {
...
}
```
### Default Container Handling
| Name | Description |
|-------------------|-------------------------------|
| `find-container` | Can find a specific container |
| `list-containers` | Can list all containers |
### Default Database Handling
| Name | Description |
|------------------------------|------------------------------------------------------|
| `check-database-access` | Can check the access to a database of a user |
| `create-database` | Can create a database |
| `create-database-access` | Can give a new access to a database of a user |
| `delete-database-access` | Can delete the access to a database of a user |
| `find-database` | Can find a specific database in a container |
| `list-databases` | Can list all databases in a container |
| `modify-database-image` | Can update the database image |
| `modify-database-owner` | Can modify the database owner |
| `modify-database-visibility` | Can modify the database visibility (public, private) |
| `update-database-access` | Can update the access to a database of a user |
### Default Table Handling
| Name | Description |
|---------------------------------|------------------------------------------------------|
| `create-table` | Can create a table |
| `find-tables` | Can list a specific table in a database |
| `list-tables` | Can list all tables |
| `modify-table-column-semantics` | Can modify the column semantics of a specific column |
| `delete-table` | Can delete tables owned by the user in a database |
### Default Query Handling
| Name | Description |
|---------------------------|-----------------------------------------------|
| `create-database-view` | Can create a view in a database |
| `delete-database-view` | Can delete a view in a database |
| `delete-table-data` | Can delete data in a table |
| `execute-query` | Can execute a query statement |
| `export-query-data` | Can export the data that a query has produced |
| `export-table-data` | Can export the data stored in a table |
| `find-database-view` | Can find a specific database view |
| `find-query` | Can find a specific query in the query store |
| `insert-table-data` | Can insert data into a table |
| `list-database-views` | Can list all database views |
| `list-queries` | Can list all queries in the query store |
| `persist-query` | Can persist a query in the query store |
| `re-execute-query` | Can re-execute a query to reproduce a result |
| `view-database-view-data` | Can view the data produced by a database view |
| `view-table-data` | Can view the data in a table |
| `view-table-history` | Can view the data history of a table |
### Default Identifier Handling
| Name | Description |
|---------------------|---------------------------------------------|
| `create-identifier` | Can create an identifier (subset, database) |
| `find-identifier` | Can find a specific identifier |
| `list-identifier` | Can list all identifiers |
### Default User Handling
| Name | Description |
|---------------------------|-----------------------------------------|
| `modify-user-theme` | Can modify the user theme (light, dark) |
| `modify-user-information` | Can modify the user information |
### Default Maintenance Handling
| Name | Description |
|------------------------------|------------------------------------------|
| `create-maintenance-message` | Can create a maintenance message banner |
| `delete-maintenance-message` | Can delete a maintenance message banner |
| `find-maintenance-message` | Can find a maintenance message banner |
| `list-maintenance-messages` | Can list all maintenance message banners |
| `update-maintenance-message` | Can update a maintenance message banner |
### Default Semantics Handling
| Name | Description |
|---------------------------|-----------------------------------------------------------------|
| `create-semantic-unit` | Can save a previously unknown unit for a table column |
| `create-semantic-concept` | Can save a previously unknown concept for a table column |
| `execute-semantic-query` | Can query remote SPARQL endpoints to get labels and description |
| `table-semantic-analyse` | Can automatically suggest units and concepts for a table |
### Escalated User Handling
| Name | Description |
|-------------|-----------------------------------------------|
| `find-user` | Can list user information for a specific user |
### Escalated Container Handling
| Name | Description |
|--------------------|--------------------------|
| `create-container` | Can create a container |
| `delete-container` | Can delete any container |
### Escalated Database Handling
| Name | Description |
|-------------------|------------------------------------------|
| `delete-database` | Can delete any database in any container |
### Escalated Table Handling
| Name | Description |
|------------------------|--------------------------------------|
| `delete-foreign-table` | Can delete any table in any database |
### Escalated Query Handling
| Name | Description |
|------|-------------|
| / | |
### Escalated Identifier Handling
| Name | Description |
|------------------------------|---------------------------------------------------|
| `create-foreign-identifier` | Can create an identifier to any database or query |
| `delete-identifier` | Can delete any identifier |
| `modify-identifier-metadata` | Can modify any identifier metadata |
### Escalated Semantics Handling
| Name | Description |
|-----------------------------------------|----------------------------------------------|
| `create-ontology` | Can register a new ontology |
| `delete-ontology` | Can unregister an ontology |
| `list-ontologies` | Can list all ontologies |
| `modify-foreign-table-column-semantics` | Can modify any table column concept and unit |
| `update-ontology` | Can update ontology metadata |
| `update-semantic-concept` | Can update own table column concept |
| `update-semantic-unit` | Can update own table column unit |
create a database.
A full list of available roles can be obtained
from [`dbrepo-realm.json`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/blob/fb8d14ba02ee32b9a69a30905437b5c9e28adc21/dbrepo-auth-service/dbrepo-realm.json#L46)
which is imported into Keycloak on startup.
## Limitations
* No support for sending e-mails through Keycloak by default.
* No support for temporary passwords.
* No support for adding identifies in Keycloak directly.
* No support for multi-factor authentication.
!!! question "Do you miss functionality? Do these limitations affect you?"
......@@ -208,5 +67,5 @@ public ResponseEntity<DatabaseBriefDto> create(@NotNull Long containerId,
## Security
1. Mount your TLS certificate / private key pair into `/app/tls.crt` and `/app/tls.key` and
set `KC_HTTPS_CERTIFICATE_FILE=/app/tls.crt` and set `KC_HTTPS_CERTIFICATE_KEY_FILE=/app/tls.key`.
1. Keycloak should be configured to use TLS certificates, follow
the [official documentation](https://www.keycloak.org/server/enabletls).
......@@ -4,7 +4,7 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`bitnami/mariadb-galera:11.2.2-debian-11-r0`](https://hub.docker.com/r/bitnami/mariadb-galera)
Image: [`docker.io/bitnami/mariadb:11.1.3-debian-11-r6`](https://hub.docker.com/r/bitnami/mariadb)
* Ports: 3306/tcp
* JDBC: `jdbc://mariadb:<hostname>:3306`
......@@ -17,20 +17,26 @@ author: Martin Weise
## Overview
By default, only one Data Database is deployed. You can deploy multiple (different) Data Database instances and make
them available in the repository as follows:
The Data Database contains the research data. In the default configuration, only one database of this type is deployed.
Any number of MariaDB ata databases can be integrated into DBRepo, even non-empty databases. The database needs to be
registered in the Metadata Database to be visible in the [User Interface](../ui) and usable from e.g. the Python
Library.
=== "Terminal"
## Architecture
```shell
curl \
-sSL \
http://<hostname>/api/container \
-X POST \
-d '{"name": "Data Database 2", "imageId": 1, "host": "example.com", "port": 3306, "privilegedUsername": "root", "privilegedPassword": "s3cr3t" }'
```
### Sidecar
We deploy a sidecar that handles the CSV-file upload/download operations between
the [Storage Service](../system-services-storage) and the Data Database using a Python Flask application and
the [`boto3`](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client until MariaDB supports S3
natively.
<figure markdown>
![Sidecar architecture detailed](../images/architecture-data-db.svg)
<figcaption>Sidecar that handles the CSV-file upload/download.</figcaption>
</figure>
### Settings
## Data
The procedures require the user-generated databases to have the same collation (because of comparison operations).
Ensure that the Data Database has the character set `utf8mb4` and collation `utf8mb4_general_ci` in your `my.cfg`:
......@@ -51,18 +57,6 @@ mariadb-galera:
extraFlags: "--character-set-server=utf8mb4 --collation-server=utf8mb4_general_ci"
```
### Sidecar
We deploy a sidecar that handles the CSV-file upload/download operations between
the [Storage Service](../system-services-storage) and the Data Database using a Python Flask application and
the [`boto3`](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) client until MariaDB supports S3
natively.
<figure markdown>
![Sidecar architecture detailed](../images/architecture-data-db.svg)
<figcaption>Sidecar that handles the CSV-file upload/download.</figcaption>
</figure>
### Backup
Export all databases with `--skip-lock-tables` option for MariaDB Galera clusters as it is not supported currently by
......
......@@ -6,7 +6,7 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`dbrepo/data-service:__APPVERSION__`](https://hub.docker.com/r/dbrepo/data-service)
Image: [`registry.datalab.tuwien.ac.at/dbrepo/data-service:1.4.4`](https://hub.docker.com/r/dbrepo/data-service)
* Ports: 9093/tcp
* Info: `http://<hostname>:9093/actuator/info`
......@@ -27,7 +27,7 @@ Data Service up.
!!! question "Do you miss functionality? Do these limitations affect you?"
We strongly encourage you to help us implement it as we are welcoming contributors to open-source software and get
in [contact](./contact) with us, we happily answer requests for collaboration with attached CV and your programming
in [contact](../../contact) with us, we happily answer requests for collaboration with attached CV and your programming
experience!
## Security
......
......@@ -6,15 +6,15 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`nginx:1.25-alpine-slim`](https://hub.docker.com/r/nginx)
Image: [`docker.io/nginx:1.27.0-alpine3.19-slim`](https://hub.docker.com/r/nginx)
* Ports: 80/tcp
## Overview
Provides a single point of access to the *application programming interface* (API) and configures a
standard [NGINX](https://www.nginx.com/) reverse proxy for load balancing. This component is optional if you already have a load balancer
or reverse proxy running.
standard [NGINX](https://www.nginx.com/) reverse proxy for load balancing. This component is optional if you already
have a load balancer or reverse proxy running.
## Settings
......@@ -41,14 +41,14 @@ If your TLS private key as a password, you need to specify it in the `dbrepo.con
### User Interface
To serve the [User Interface](./system-other-ui/) under different port than `80`, change the port mapping in
To serve the [User Interface](../ui/) under different port than `80`, change the port mapping in
the `docker-compose.yml` to e.g. port `8000`:
```yaml title="docker-compose.yml"
services:
...
dbrepo-gateway-service:
image: docker.io/nginx:1.25-alpine-slim
image: docker.io/nginx:1.27.0-alpine3.19-slim
ports:
- "8000:80"
...
......@@ -61,10 +61,9 @@ services:
!!! question "Do you miss functionality? Do these limitations affect you?"
We strongly encourage you to help us implement it as we are welcoming contributors to open-source software and get
in [contact](./contact) with us, we happily answer requests for collaboration with attached CV and your programming
in [contact](../../contact) with us, we happily answer requests for collaboration with attached CV and your programming
experience!
## Security
1. Enable TLS encryption by downloading
......
......@@ -4,7 +4,7 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`bitnami/mariadb-galera:11.2.2-debian-11-r0`](https://hub.docker.com/r/bitnami/mariadb-galera)
Image: [`docker.io/bitnami/mariadb:11.1.3-debian-11-r6`](https://hub.docker.com/r/bitnami/mariadb)
* Ports: 3306/tcp
* JDBC: `jdbc://mariadb:<hostname>:3306`
......@@ -23,11 +23,12 @@ services:
dbrepo-metadata-db:
...
volumes:
- /path/to/setup-some-data.sql:/docker-entrypoint-initdb.d/setup-some-data.sql
- /path/to/setup-schema.sql:/docker-entrypoint-initdb.d/1_setup-schema.sql
- /path/to/setup-data.sql:/docker-entrypoint-initdb.d/2_setup-data.sql
...
```
!!! warning "Alphabetic Filename Sorting"
Beware that the init script provided by Bitnami executes files in alphabetic order! For example: the file
`setup-schema.sql` is executed **after** the file `setup-data.sql`!
\ No newline at end of file
`setup-schema.sql` is executed **after** the file `setup-data.sql`! Thefore a sorting prefix 1-9 is recommended!
\ No newline at end of file
......@@ -6,7 +6,7 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`dbrepo/metadata-service:__APPVERSION__`](https://hub.docker.com/r/dbrepo/metadata-service)
Image: [`registry.datalab.tuwien.ac.at/dbrepo/metadata-service:1.4.4`](https://hub.docker.com/r/dbrepo/metadata-service)
* Ports: 9099/tcp
* Info: `http://<hostname>:9099/actuator/info`
......@@ -14,26 +14,20 @@ author: Martin Weise
- Readiness: `http://<hostname>:9099/actuator/health/readiness`
- Liveness: `http://<hostname>:9099/actuator/health/liveness`
* Prometheus: `http://<hostname>:9099/actuator/prometheus`
* Swagger UI: `http://<hostname>:9099/swagger-ui/index.html` <a href="./swagger/metadata" target="_blank">:fontawesome-solid-square-up-right: view online</a>
* Swagger UI: `http://<hostname>:9099/swagger-ui/index.html`
## Overview
This service manages the following topics:
The metadata service manages metadata of identities, the [Broker Service](../broker-service) (i.e. obtaining queue
types), semantic concepts (i.e. ontologies) and relational metadata (databases, tables, queries, views) and identifiers.
* Databases
* Identifiers (DataCite, OAI-PMH)
* Queries
* Semantics (Ontologies)
* Tables
* Users
* Views
## Generation
### Databases
Most of the metadata available in DBRepo is generated automatically, leveraging the available information and taking
the burden away from researchers, data stewards, etc. For example, the schema (names, constraints, data length) of
generated tables and views is obtained from the `information_schema` database maintained by MariaDB internally.
The service handles table operations inside a database. We use [Hibernate](https://hibernate.org/orm/) for schema and
data ingest operations.
### Identifiers
## Identifiers
The service is responsible for creating and resolving a *persistent identifier* (PID) attached to a database, subset,
table or view to obtain the metadata attached to it and allow reproduction of the exact same result.
......@@ -52,7 +46,7 @@ To activate DOI minting, pass your DataCite Fabrica credentials in the environme
```yaml title="docker-compose.yml"
services:
dbrepo-metadata-service:
image: docker.io/dbrepo/metadata-service:1.4.0
image: registry.datalab.tuwien.ac.at/dbrepo/metadata-service:1.4.4
environment:
spring_profiles_active: doi
DATACITE_URL: https://api.datacite.org
......@@ -62,72 +56,13 @@ services:
...
```
### Queries
It provides an interface to insert data into the tables. It also allows for view-only, paginated and versioned query
execution to the raw data. Any stale queries (query that have been executed by users in DBRepo but were not saved) are
periodically being deleted from the query store based on the `DELETE_STALE_QUERIES_RATE` environment variable (defaults
to 60 seconds).
Executing SQL queries through the Query Endpoint must fulfill some restrictions:
## Semantics
* The SQL query does not contain at semicolon `;`
### Semantics
The service provides metadata to the table columns in the [Metadata Database](./system-databases-metadata) from
registered ontologies like Wikidata [`wd:`](https://wikidata.org), Ontology of Units of
The service provides metadata to the table columns in the [Metadata Database](../metadata-db) fromregistered ontologies
like Wikidata [`wd:`](https://wikidata.org), Ontology of Units of
Measurement [`om2:`](https://www.ontology-of-units-of-measure.org/resource/om-2), Friend of a
Friend [`foaf:`](http://xmlns.com/foaf/0.1/), the [`prov:`](http://www.w3.org/ns/prov#) namespace, etc.
### Tables
The service manages tables in the [Data Database](./system-databases-data) and manages the metadata of these tables
in the [Metadata Database](./system-databases-metadata). Any tables that are created outside of DBRepo (e.g. directly via the JDBC API) are
periodically fetched by this service (based on the `OBTAIN_METADATA_RATE` environment variable, default interval is 60
seconds).
### Users
The service manages users in the [Data Database](./system-databases-data)
and [Metadata Database](./system-databases-metadata), as well as in the [Broker Service](./system-services-broker)
and the [Authentication Service](./system-services-authentication).
The default configuration grants the users only very basic permissions on the databases:
* `SELECT`
* `CREATE`
* `CREATE VIEW`
* `CREATE ROUTINE`
* `CREATE TEMPORARY TABLES`
* `LOCK TABLES`
* `INDEX`
* `TRIGGER`
* `INSERT`
* `UPDATE`
* `DELETE`
This configuration is passed as environment variable `GRANT_PRIVILEGES` to the service as comma-separated string. You
can add/remove grants by setting this environment variable, e.g. allow the users to only select data and create
temporary tables:
```yaml title="docker-compose.yml"
services:
dbrepo-metadata-service:
environment:
GRANT_PRIVILEGES=SELECT,CREATE TEMPORARY TABLES
...
```
A list of all grants is available in the MariaDB documentation for [`GRANT`](https://mariadb.com/kb/en/grant/)
### Views
The service manages views in the [Data Database](./system-databases-data)
and [Metadata Database](./system-databases-metadata). Any views that are created outside of DBRepo (e.g. directly via
the JDBC API) are periodically fetched by this service (based on the `OBTAIN_METADATA_RATE` environment variable,
default interval is 60 seconds).
## Limitations
* No support for other databases than [MariaDB](https://mariadb.org/) because of system-versioning capabilities missing
......@@ -136,7 +71,7 @@ default interval is 60 seconds).
!!! question "Do you miss functionality? Do these limitations affect you?"
We strongly encourage you to help us implement it as we are welcoming contributors to open-source software and get
in [contact](./contact) with us, we happily answer requests for collaboration with attached CV and your programming
in [contact](../../contact) with us, we happily answer requests for collaboration with attached CV and your programming
experience!
## Security
......
......@@ -8,6 +8,13 @@ author: Martin Weise
[:fontawesome-solid-cube: &nbsp;View Docs](../../python){ .md-button .md-button--primary }
## Overview
The DBRepo Python library is using some of the most pupular and maintained Python packages for Data Scientists under the
hood. For example: [`requests`](https://requests.readthedocs.io/) to interact with the HTTP API
endpoints, [`pandas`](https://pandas.pydata.org/) for data operations and [`pydantic`](https://docs.pydantic.dev/) for
information representation from/to the HTTP API.
## Installing
:octicons-tag-16:{ title="Minimum version" } 1.4.2
......
......@@ -6,18 +6,17 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`dbrepo/search-service:__APPVERSION__`](https://hub.docker.com/r/dbrepo/search-service)
Image: [`registry.datalab.tuwien.ac.at/dbrepo/search-service:1.4.4`](https://hub.docker.com/r/dbrepo/search-service)
* Ports: 4000/tcp
* Health: `http://<hostname>:4000/api/search/health`
* Prometheus: `http://<hostname>:4000/metrics`
* Swagger UI: `http://<hostname>:4000/swagger-ui/` <a href="../swagger/search" target="_blank">:fontawesome-solid-square-up-right: view online</a>
* Swagger UI: `http://<hostname>:4000/swagger-ui/`
## Overview
This service communicates between the [Search Database](../system-databases-search) and
the [User Interface](../system-other-ui) to allow structured search of databases, tables, columns, users, identifiers,
views, semantic concepts &amp; units of measurements used in databases.
This service communicates between the Search Database and the [User Interface](../ui) to allow structured search of
databases, tables, columns, users, identifiers, views, semantic concepts &amp; units of measurements used in databases.
<figure markdown>
![Built-in search](../images/screenshots/feature-search.png){ .img-border }
......@@ -28,7 +27,7 @@ views, semantic concepts &amp; units of measurements used in databases.
There is only one
index [`database`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/dev/dbrepo-search-db/init/indices/database.json)
that holds all the metadata information which is mirrored from the [Metadata Database](../system-databases-metadata).
that holds all the metadata information which is mirrored from the [Metadata Database](../metadata-db).
<figure markdown>
![Mirroring statistical properties in Metadata Database and Search Database](../images/statistics-mirror.png)
......@@ -40,41 +39,13 @@ that holds all the metadata information which is mirrored from the [Metadata Dat
This service enables the frontend to search the `database` index with eight different *types* of desired results
(database, table, column, view, identifier, user, concept, unit) and their *facets*.
For example, the [User Interface](../system-other-ui) allows for the search of databases that contain a certain
For example, the [User Interface](../ui) allows for the search of databases that contain a certain
semantic concept (provided as URI, e.g.
temperature [http://www.wikidata.org/entity/Q11466](http://www.wikidata.org/entity/Q11466)) and unit of measurement
(provided as URI, e.g. degree
Celsius [http://www.ontology-of-units-of-measure.org/resource/om-2/degreeCelsius](http://www.ontology-of-units-of-measure.org/resource/om-2/degreeCelsius)).
An example on faceted browsing is found in the [usage examples](../usage-search).
## Unit Independent Search
Since the repository automatically collects statistical properties (min, max, mean, median, std.dev) in both the
[Metadata Database](../system-databases-metadata) and the [Search Database](../system-databases-search), a special
search can be performed when at least two columns have the same semantic concept (e.g. temperature) annotated and
the units of measurements can be transformed.
<figure markdown>
![Two tables with compatible semantic concepts (Temperature) and units of measurement (left is in degree Celsius, right is in degree Fahrenheit)](../images/statistics-example.png)
<figcaption>Figure 3: Two tables with compatible semantic concepts and units of measurement</figcaption>
</figure>
In short, the search service transforms the statistical properties not in the target unit of measurements is transformed
by using the [`omlib`](https://github.com/dieudonneWillems/OMLib) package.
For example: a user wants to find datasets that contain *"temperature measurements between 0 - 10 &deg;C"*. Then the
search service transforms the query to the dataset on the right from &deg;F to contain *"temperature measurements
between 32 - 50 &deg;F"* instead.
<figure markdown>
![Unit independent search query transformation](../images/statistics-example-unit-independent-search.png)
<figcaption>Figure 4: Unit independent search query transformation</figcaption>
</figure>
## Examples
View [usage examples](../usage-search/).
An example on faceted browsing is found in the [usage examples](..).
## Limitations
......@@ -86,4 +57,4 @@ View [usage examples](../usage-search/).
## Security
(nothing)
(none)
......@@ -6,7 +6,7 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`chrislusf/seaweedfs:3.59`](https://hub.docker.com/r/chrislusf/seaweedfs)
Image: [`docker.io/chrislusf/seaweedfs:3.59`](https://hub.docker.com/r/chrislusf/seaweedfs)
* Ports: 9000/tcp
* Prometheus: `http://<hostname>:9091/metrics`
......@@ -36,7 +36,7 @@ The default configuration creates two buckets `dbrepo-upload`, `dbrepo-download`
!!! question "Do you miss functionality? Do these limitations affect you?"
We strongly encourage you to help us implement it as we are welcoming contributors to open-source software and get
in [contact](./contact) with us, we happily answer requests for collaboration with attached CV and your programming
in [contact](../../contact) with us, we happily answer requests for collaboration with attached CV and your programming
experience!
## Security
......
......@@ -2,6 +2,14 @@
author: Martin Weise
---
## tl;dr
!!! debug "Debug Information"
Image: [`registry.datalab.tuwien.ac.at/dbrepo/ui:1.4.4`](https://hub.docker.com/r/dbrepo/ui)
* Ports: 3000/tcp
The User Interface is configured in the `runtimeConfig` section of the `nuxt.config.ts` file during build time. For the
runtime, you need to override those values through environment variables or by mounting a `.env` file. As a small
example, you can configure the logo :material-numeric-1-circle-outline: in Figure 2. Make sure you mount the logo as
......@@ -27,7 +35,7 @@ if you use a Kubernetes deployment via ConfigMap and Volumes).
```yaml title="docker-compose.yml"
services:
dbrepo-ui:
image: docker.io/dbrepo/ui:__APPVERSION__
image: registry.datalab.tuwien.ac.at/dbrepo/ui:1.4.4
volumes:
- ./my_logo.png:/app/.output/public/my_logo.png
...
......
......@@ -6,46 +6,31 @@ author: Martin Weise
!!! debug "Debug Information"
Image: [`tusproject/tusd:v1.12`](https://hub.docker.com/r/tusproject/tusd)
Image: [`docker.io/tusproject/tusd:v1.12`](https://hub.docker.com/r/tusproject/tusd)
* Ports: 1080/tcp
* Prometheus: `http://<hostname>:1080/api/upload/metrics`
* API: `http://<hostname>:1080/api/upload`
* Swagger UI: <a href="../swagger/upload" target="_blank">:fontawesome-solid-square-up-right: view online</a>
## Overview
We use the [TUS](https://tus.io/) open protocol for resumable file uploads which based entirely on HTTP. Even though
We use the [TUS](https://tus.io/) open protocol for resume-able file uploads which based entirely on HTTP. Even though
the Upload Service is part of the standard installation, it is an entirely optional component and can be replaced with
any S3-compatible Blob Storage.
### Settings
The Upload Service is responsible for uploading files (mainly CSV-files) into a Blob Storage that can be accesses trough
the S3 protocol (e.g. our [Storage Service](../system-services-storage)). Make sure that the Upload Service can be
accessed from the Gateway Service and set the url in the User Interface configuration file.
```json title="dbrepo.config.json"
{
"upload": {
"url": "example.com",
"useSsl": true
},
...
}
```
If your deployment is secured with SSL/TLS (recommended) set the `useSsl` variable to `true`.
### Architecture
The Upload Service communicates internally with the [Storage Service](../system-services-storage) (c.f. [Figure 1](#fig1)).
The Upload Service communicates internally with the [Storage Service](../storage-service) (c.f. [Figure 1](#fig1)).
<figure id="fig1" markdown>
![Architecture of the Upload Service](../images/architecture-upload-service.svg)
<figcaption>Figure 1: Architecture of the Upload Service</figcaption>
</figure>
The Upload Service is responsible for uploading files (mainly CSV-files) into a Blob Storage that can be accesses trough
the S3 protocol (e.g. our [Storage Service](../storage-service)). Make sure that the Upload Service can be
accessed from the Gateway Service.
## Limitations
* No support for authentication.
......
......@@ -4,10 +4,34 @@ author: Martin Weise
## Index
TBD
tbd
## Document
TBD
## Query
## Unit Independent Search
Since the repository automatically collects statistical properties (min, max, mean, median, std.dev) in both the
[Metadata Database](../system-databases-metadata) and the [Search Database](../system-databases-search), a special
search can be performed when at least two columns have the same semantic concept (e.g. temperature) annotated and
the units of measurements can be transformed.
<figure markdown>
![Two tables with compatible semantic concepts (Temperature) and units of measurement (left is in degree Celsius, right is in degree Fahrenheit)](../images/statistics-example.png)
<figcaption>Figure 3: Two tables with compatible semantic concepts and units of measurement</figcaption>
</figure>
In short, the search service transforms the statistical properties not in the target unit of measurements is transformed
by using the [`omlib`](https://github.com/dieudonneWillems/OMLib) package.
For example: a user wants to find datasets that contain *"temperature measurements between 0 - 10 &deg;C"*. Then the
search service transforms the query to the dataset on the right from &deg;F to contain *"temperature measurements
between 32 - 50 &deg;F"* instead.
<figure markdown>
![Unit independent search query transformation](../images/statistics-example-unit-independent-search.png)
<figcaption>Figure 4: Unit independent search query transformation</figcaption>
</figure>
\ No newline at end of file
......@@ -5,6 +5,7 @@ author: Martin Weise
[![CI/CD Pipeline](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/badges/master/pipeline.svg)](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services){ tabindex=-1 }
[![Code Coverage](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/badges/master/coverage.svg)](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services){ tabindex=-1 }
[![GitLab Release](https://img.shields.io/gitlab/v/release/fair-data-austria-db-repository%2Ffda-services?gitlab_url=https%3A%2F%2Fgitlab.phaidra.org&display_name=release&style=flat&cacheSeconds=3600)](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services){ tabindex=-1 }
[![Image Pulls](https://img.shields.io/docker/pulls/dbrepo/data-service?style=flat&cacheSeconds=3600)](https://hub.docker.com/u/dbrepo){ tabindex=-1 }
[![GitLab License](https://img.shields.io/gitlab/license/fair-data-austria-db-repository%2Ffda-services?gitlab_url=https%3A%2F%2Fgitlab.phaidra.org%2F&style=flat&cacheSeconds=3600)](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services){ tabindex=-1 }
Documentation for version: [v1.4.4](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/releases).
......
......@@ -2,16 +2,20 @@
author: Martin Weise
---
# Docker Compose
# Installation
[![Image Pulls](https://img.shields.io/docker/pulls/dbrepo/data-service?style=flat&cacheSeconds=3600)](https://hub.docker.com/u/dbrepo){ tabindex=-1 }
## TL;DR
If you have [Docker](https://docs.docker.com/engine/install/) already installed on your system, you can install DBRepo with:
```shell
curl -sSL https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/release-__APPVERSION__/install.sh | bash
curl -sSL https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/release-1.4.4/install.sh | bash
```
Or perform a [custom install](#custom-install).
## Requirements
### Hardware
......@@ -26,7 +30,7 @@ the following settings.
*Optional*: public IP-address if you want to secure the deployment with a (free) TLS-certificate from Let's Encrypt.
!!! tip "Resource Consumption"
!!! info "Resource Consumption"
Note that most of the vCPU and RAM resources will be needed for starting the infrastructure, this is because of
Docker. During operation and especially idle times, the deployment will use significantly less resources.
......@@ -34,13 +38,15 @@ the following settings.
### Software
We only test the Docker Compose deployment with the
official [Docker engine](https://docs.docker.com/engine/install/debian/) installed on
official [Docker Engine](https://docs.docker.com/engine/install/debian/) installed on
a [Debian](https://www.debian.org/)-based operating system. Other software deployments (e.g. Docker Desktop on Windows)
are *not* recommended and not tested.
## Architecture
## Custom Install
### Overview
TBD
## Architecture
The repository is designed as a service-based architecture to ensure scalability and the utilization of various
technologies. The conceptualized microservices operate the basic database operations, data versioning as well as
......@@ -51,9 +57,7 @@ technologies. The conceptualized microservices operate the basic database operat
<figcaption>Architecture of the services deployed via Docker Compose</figcaption>
</figure>
### Notes
Please note that we only save the state of the databases as well as the [Broker Service](./system-services-broker)
Please note that we only save the state of the databases as well as the [Broker Service](../broker-service)
since RabbitMQ maintains state inside the container.
## Deployment
......@@ -61,43 +65,6 @@ since RabbitMQ maintains state inside the container.
We maintain a rapid prototype deployment option through Docker Compose (v2.17.0 and newer). This deployment creates the
core infrastructure and a single Docker container for all user-generated databases.
=== "Linux"
Download and install [Docker Engine](https://docs.docker.com/desktop/install/linux-install/) for your Linux
distribution. Although the installation might work, we *do not* recommend Docker Desktop.
Ensure the Docker daemon is running at all times:
systemctl enable docker --now
Install DBRepo with the default configuration:
curl -sSL https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/dev/install.sh | bash
=== "Windows"
Open `cmd.exe` as administrator and install WSL2 and the Debian subsystem:
wsl --install Debian
Open `optionalfeatures` by typing into the open terminal window or searching for it and enable "Windows Subsystem
for Linux":
<figure markdown>
![Data ingest](images/optionalfeatures.png){ .img-border }
<figcaption>Enable Subsystem for Linux in Windows Features</figcaption>
</figure>
Install [Docker Desktop](https://docs.docker.com/desktop/install/windows-install/) on the Windows host machine.
Open Docker Desktop and go to settings (:fontawesome-solid-gear:) > General > Tick "Use WSL2 based engine" if not
already ticked.
Open the Debian container by typing "Debian" into the search, you should see a terminal window.
Install DBRepo with the default configuration from the Debian container:
curl -sSL https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/raw/master/install.sh | bash
View the logs:
docker compose logs -f
......@@ -147,50 +114,11 @@ Please be warned that the default configuration is not intended for public deplo
running system within minutes to play around within the system and explore features. It is strongly advised to change
the default `.env` environment variables.
Next, create a [user account](./usage-overview/#create-user-account) and
then [create a database](./usage-overview/#create-database) to [import a dataset](./usage-overview/#import-dataset).
## Security
!!! warning "Known security issues with the default configuration"
The system is auto-configured for a small, local, test deployment and is *not* secure! You need to make modifications
in various places to make it secure:
* **Authentication Service**:
a. You need to use your own instance or configure a secure instance using a (self-signed) certificate.
Additionally, when serving from a non-default Authentication Service, you need to put it into the
`JWT_ISSUER` environment variable (`.env`).
b. You need to change the default admin user `fda` password in Realm
master > Users > fda > Credentials > Reset password.
c. You need to change the client secrets for the clients `dbrepo-client` and `broker-client`. Do this in Realm
dbrepo > Clients > dbrepo-client > Credentials > Client secret > Regenerate. Do the same for the
broker-client.
d. You need to regenerate the public key of the `RS256` algorithm which is shared with all services to verify
the signature of JWT tokens. Add your securely generated private key in Realm
dbrepo > Realm settings > Keys > Providers > Add provider > rsa.
* **Broker Service**: by default, this service is configured with an administrative user that has major privileges.
You need to change the password of the user *fda* in Admin > Update this user > Password. We found this
[simple guide](https://onlinehelp.coveo.com/en/ces/7.0/administrator/changing_the_rabbitmq_administrator_password.htm)
to be very useful.
* **Search Database**: by default, this service is configured to require authentication with an administrative user
that is allowed to write into the indizes. Following
this [simple guide](https://www.elastic.co/guide/en/elasticsearch/reference/8.7/reset-password.html), this can be
achieved using the command line.
* **Gateway Service**: by default, no HTTPS is used that protects the services behind. You need to provide a trusted
SSL/TLS certificate in the configuration file or use your own proxy in front of the Gateway Service. See this
[simple guide](http://nginx.org/en/docs/http/configuring_https_servers.html) on how to install a SSL/TLS
certificate on NGINX.
Next, create a [user account](../api/#create-user-account) and
then [create a database](../api/#create-database) to [import a dataset](../api/#import-dataset).
## Limitations
!!! info "Alternative Deployments"
Alternatively, you can also deploy DBRepo with [Helm](./deployment-helm/) in your virtual machine instead.
Alternatively, you can also deploy DBRepo with [Kubernetes](../deployment-helm) in your virtual machine instead.
......@@ -7,21 +7,48 @@ author: Martin Weise
## TL;DR
To install DBRepo in your existing cluster, download the
sample [`values.yaml`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-deployment/-/raw/master/charts/dbrepo-core/values.yaml?inline=false)
sample [`values.yaml`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/blob/release-1.4.4/helm/dbrepo/values.yaml)
for your deployment and update the variables, especially `hostname`.
```shell
helm upgrade --install dbrepo \
-n dbrepo \
"oci://s210.dl.hpc.tuwien.ac.at/dbrepo/helm/dbrepo" \
"oci://registry.datalab.tuwien.ac.at/dbrepo/helm/dbrepo" \
--values ./values.yaml \
--version "__CHARTVERSION__" \
--version "1.4.4" \
--create-namespace \
--cleanup-on-fail
```
This chart is also on [Artifact Hub](https://artifacthub.io/packages/helm/dbrepo/dbrepo) with a full documentation
about values, etc.
about values, etc. Before installing, you need to change credentials, e.g. the Broker Service administrator user
password:
```yaml title="values.yaml"
brokerservice:
...
auth:
...
username: broker
password: broker
passwordHash: 1gwjNNTBPKLgyzbsUykfR0JIFC6nNqbNJaxzZ14uPT8JGcTZ
```
The `brokerservice.auth.passwordHash` field is the RabbitMQ SHA512-hash of the `brokerservice.auth.password` field and
can be obtained with
the [`generate-rabbitmq-pw.sh`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/blob/release-1.4.4/helm/dbrepo/hack/generate-rabbitmq-pw.sh)
script:
```console
$ ./generate-rabbitmq-pw.sh my_password
klPdmv4dgnRH64czHolIHAfXvc0G9hc24FQmPlI6eeI1NOf9
```
The script needs the package `xxd` for generation of the random salt. If you don't have `xxd` installed, install it:
* Debian/Ubuntu: `apt install xxd`
* Windows: `choco install xxd`
* MacOS: `brew install coreutils`
## Prerequisites
......@@ -32,12 +59,12 @@ about values, etc.
## Limitations
1. MariaDB Galera does not (yet) support XA-transactions required by the authentication service (=Keycloak). Therefore
only a single MariaDB pod can be deployed at once for the [auth database](./system-databases-authentication).
only a single MariaDB pod can be deployed at once for the Auth database.
2. The entire Helm deployment is rootless (=`runAsNonRoot=true`) except for
the [Storage Service](./system-services-storage/) which still requires a root user.
the [Storage Service](../api/storage-service) which still requires a root user.
!!! question "Do you miss functionality? Do these limitations affect you?"
We strongly encourage you to help us implement it as we are welcoming contributors to open-source software and get
in [contact](./contact) with us, we happily answer requests for collaboration with attached CV and your programming
in [contact](../../contact) with us, we happily answer requests for collaboration with attached CV and your programming
experience!
......@@ -121,10 +121,11 @@ build-helm:
refs:
- /^release-.*/
before_script:
- echo "$CI_GPG_KEYRING" | base64 -d > ./secring.gpg
- echo "$CI_REGISTRY_PASSWORD" | docker login --username "$CI_REGISTRY_USER" --password-stdin $CI_REGISTRY_URL
script:
- apk add sed helm curl
- helm package ./helm/dbrepo --destination ./build
- helm package ./helm/dbrepo --sign --key 'Martin Weise' --keyring ./secring.gpg --destination ./build
verify-install-script:
image: docker.io/docker:24-dind
......@@ -590,7 +591,7 @@ release-images:
script:
- "make release-images"
release-chart:
release-helm:
stage: release
image: docker:24-dind
only:
......@@ -605,10 +606,11 @@ release-chart:
- "docker logout ${CI_REGISTRY2_URL}"
- "echo ${CI_REGISTRY2_PASSWORD} | docker login --username ${CI_REGISTRY2_USER} --password-stdin ${CI_REGISTRY2_URL}"
- "apk add sed helm curl"
- "helm package ./helm/dbrepo --destination ./build"
- echo "$CI_GPG_KEYRING" | base64 -d > ./secring.gpg
- helm package ./helm/dbrepo --sign --key 'Martin Weise' --keyring ./secring.gpg --destination ./build
- "helm plugin install https://github.com/sigstore/helm-sigstore"
script:
- "helm push ./build/dbrepo-${CHART_VERSION}.tgz oci://${CI_REGISTRY2_URL}/helm"
- "helm sigstore upload ./build/dbrepo-${CHART_VERSION}.tgz oci://${CI_REGISTRY2_URL}/helm"
release-docs:
stage: release
......
......@@ -2,7 +2,7 @@
APP_VERSION ?= 1.4.4
CHART_VERSION ?= 1.4.4
REPOSITORY_URL ?= docker.io/dbrepo
REPOSITORY_URL ?= registry.datalab.tuwien.ac.at/dbrepo
.PHONY: all
all: help
......
......@@ -167,19 +167,19 @@
},
"boto3": {
"hashes": [
"sha256:42b140fc850cf261ee4b1e8ef527fa071b1f1592a6d6a68d34b29f37cc46b4dd",
"sha256:56bec52d485d5670ce96d53ae7b2cd4ae4e8a705fb2298a21093cdd77d642331"
"sha256:7e8418b47dd43954a9088d504541bed8a42b6d06e712d02befba134c1c4d7c6d",
"sha256:7f676daef674fe74f34ce4063228eccc6e60c811f574720e31f230296c4bf29a"
],
"index": "pypi",
"version": "==1.34.123"
"version": "==1.34.126"
},
"botocore": {
"hashes": [
"sha256:8c34ada2a708c82e7174bff700611643db7ce2cb18f1130c35045c24310d299d",
"sha256:a8577f6574600c4d159b5cd103ee05744a443d77f7778304e17307940b369c4f"
"sha256:7a8ccb6a7c02456757a984a3a44331b6f51c94cb8b9b287cd045122fd177a4b0",
"sha256:7eff883c638fe30e0b036789df32d851e093d12544615a3b90062b42ac85bdbc"
],
"markers": "python_version >= '3.8'",
"version": "==1.34.123"
"version": "==1.34.126"
},
"certifi": {
"hashes": [
......@@ -1050,11 +1050,11 @@
},
"pydantic": {
"hashes": [
"sha256:c46c76a40bb1296728d7a8b99aa73dd70a48c3510111ff290034f860c99c419e",
"sha256:ea91b002777bf643bb20dd717c028ec43216b24a6001a280f83877fd2655d0b4"
"sha256:0c84efd9548d545f63ac0060c1e4d39bb9b14db8b3c0652338aecc07b5adec52",
"sha256:ee8538d41ccb9c0a9ad3e0e5f07bf15ed8015b481ced539a1759d8cc89ae90d0"
],
"index": "pypi",
"version": "==2.7.3"
"version": "==2.7.4"
},
"pydantic-core": {
"hashes": [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment