diff --git a/.docker/dist.tar.gz b/.docker/dist.tar.gz index 13e91ed189c2425dbb2d2493cc6fd4fa9d949b3e..41421af414e1f200cced2348ab00977bde12c8b1 100644 Binary files a/.docker/dist.tar.gz and b/.docker/dist.tar.gz differ diff --git a/.docker/docker-compose.yml b/.docker/docker-compose.yml index d13e63c803d544ef129a6f03dbf6267ab6c2f0ec..ce5680133fcfe2690220850359b9f0dc5c28f3c8 100644 --- a/.docker/docker-compose.yml +++ b/.docker/docker-compose.yml @@ -419,22 +419,22 @@ services: volumes: - "${SHARED_VOLUME:-/tmp}:/tmp" environment: - AUTH_SERVICE_ADMIN: ${AUTH_SERVICE_ADMIN:-admin} - AUTH_SERVICE_ADMIN_PASSWORD: ${AUTH_SERVICE_ADMIN_PASSWORD:-admin} - AUTH_SERVICE_CLIENT: ${AUTH_SERVICE_CLIENT:-dbrepo-client} - AUTH_SERVICE_CLIENT_SECRET: ${AUTH_SERVICE_CLIENT:-MUwRc7yfXSJwX8AdRMWaQC3Nep1VjwgG} - AUTH_SERVICE_ENDPOINT: ${AUTH_SERVICE_ENDPOINT:-http://auth-service:8080} - BROKER_EXCHANGE_NAME: ${BROKER_EXCHANGE_NAME:-dbrepo} - BROKER_QUEUE_NAME: ${BROKER_QUEUE_NAME:-dbrepo} + AUTH_SERVICE_ADMIN: "${AUTH_SERVICE_ADMIN:-admin}" + AUTH_SERVICE_ADMIN_PASSWORD: "${AUTH_SERVICE_ADMIN_PASSWORD:-admin}" + AUTH_SERVICE_CLIENT: "${AUTH_SERVICE_CLIENT:-dbrepo-client}" + AUTH_SERVICE_CLIENT_SECRET: "${AUTH_SERVICE_CLIENT:-MUwRc7yfXSJwX8AdRMWaQC3Nep1VjwgG}" + AUTH_SERVICE_ENDPOINT: "${AUTH_SERVICE_ENDPOINT:-http://auth-service:8080}" + BROKER_EXCHANGE_NAME: "${BROKER_EXCHANGE_NAME:-dbrepo}" + BROKER_QUEUE_NAME: "${BROKER_QUEUE_NAME:-dbrepo}" BROKER_HOST: "${BROKER_ENDPOINT:-broker-service}" - BROKER_PASSWORD: ${SYSTEM_PASSWORD:-admin} + BROKER_PASSWORD: "${SYSTEM_PASSWORD:-admin}" BROKER_PORT: ${BROKER_PORT:-5672} - BROKER_SERVICE_ENDPOINT: ${BROKER_SERVICE_ENDPOINT:-http://gateway-service/admin/broker} - BROKER_USERNAME: ${SYSTEM_USERNAME:-admin} + BROKER_SERVICE_ENDPOINT: "${BROKER_SERVICE_ENDPOINT:-http://gateway-service/admin/broker}" + BROKER_USERNAME: "${SYSTEM_USERNAME:-admin}" BROKER_VIRTUALHOST: "${BROKER_VIRTUALHOST:-dbrepo}" CONNECTION_TIMEOUT: ${CONNECTION_TIMEOUT:-60000} - EXCHANGE_NAME: ${EXCHANGE_NAME:-dbrepo} - METADATA_SERVICE_ENDPOINT: ${METADATA_SERVICE_ENDPOINT:-http://metadata-service:8080} + EXCHANGE_NAME: "${EXCHANGE_NAME:-dbrepo}" + METADATA_SERVICE_ENDPOINT: "${METADATA_SERVICE_ENDPOINT:-http://metadata-service:8080}" GRANT_DEFAULT_READ: "${GRANT_DEFAULT_READ:-SELECT}" GRANT_DEFAULT_WRITE: "${GRANT_DEFAULT_WRITE:-SELECT, CREATE, CREATE VIEW, CREATE ROUTINE, CREATE TEMPORARY TABLES, LOCK TABLES, INDEX, TRIGGER, INSERT, UPDATE, DELETE}" JWT_PUBKEY: "${JWT_PUBKEY:-MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqqnHQ2BWWW9vDNLRCcxD++xZg/16oqMo/c1l+lcFEjjAIJjJp/HqrPYU/U9GvquGE6PbVFtTzW1KcKawOW+FJNOA3CGo8Q1TFEfz43B8rZpKsFbJKvQGVv1Z4HaKPvLUm7iMm8Hv91cLduuoWx6Q3DPe2vg13GKKEZe7UFghF+0T9u8EKzA/XqQ0OiICmsmYPbwvf9N3bCKsB/Y10EYmZRb8IhCoV9mmO5TxgWgiuNeCTtNCv2ePYqL/U0WvyGFW0reasIK8eg3KrAUj8DpyOgPOVBn3lBGf+3KFSYi+0bwZbJZWqbC/Xlk20Go1YfeJPRIt7ImxD27R/lNjgDO/MwIDAQAB}" diff --git a/.docs/.swagger/api.yaml b/.docs/.swagger/api.yaml index c2b5b17fd4586065a803ed4cfe2a83cce2250abf..b24f1e6f93ec27934d2861971306c1126e839686 100644 --- a/.docs/.swagger/api.yaml +++ b/.docs/.swagger/api.yaml @@ -66,7 +66,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/DataTypesDto' + $ref: '#/components/schemas/AnalysisDto' description: Determined data types successfully '400': content: @@ -335,7 +335,6 @@ paths: user needs to have at least *READ* access to the associated database. Requests with HTTP method **GET** return the full dataset, requests with HTTP method **HEAD** only the number of tuples in the `X-Count` header. - Requires role `view-table-data`. operationId: getData_2 parameters: - name: databaseId @@ -596,7 +595,6 @@ paths: user needs to have at least *READ* access to the associated database. Requests with HTTP method **GET** return the full dataset, requests with HTTP method **HEAD** only the number of tuples in the `X-Count` header. - Requires role `view-table-data`. operationId: getData_3 parameters: - name: databaseId @@ -1140,7 +1138,7 @@ paths: schema: $ref: '#/components/schemas/ApiErrorDto' '404': - description: Failed to find table in metadata database + description: Failed to find table or database in metadata database content: application/json: schema: @@ -1491,7 +1489,7 @@ paths: id. Requests with HTTP method **GET** return the access object, requests with HTTP method **HEAD** only the status. When the user has at least *READ* access, the status 200 is returned, 403 otherwise. Requires role - `check-database-access` or `admin`. + `check-database-access` or `check-foreign-database-access`. operationId: find parameters: - name: databaseId @@ -1728,7 +1726,7 @@ paths: id. Requests with HTTP method **GET** return the access object, requests with HTTP method **HEAD** only the status. When the user has at least *READ* access, the status 200 is returned, 403 otherwise. Requires role - `check-database-access` or `admin`. + `check-database-access` or `check-foreign-database-access`. operationId: find_1 parameters: - name: databaseId @@ -3066,8 +3064,14 @@ paths: description: Parameters are not well-formed (likely email) content: application/json: {} + '403': + description: Internal authentication to the auth service is invalid + content: + application/json: + schema: + $ref: '#/components/schemas/ApiErrorDto' '404': - description: default role not found + description: Default role not found content: application/json: schema: @@ -4364,10 +4368,14 @@ components: scheme: bearer type: http schemas: - DataTypesDto: + AnalysisDto: properties: columns: - $ref: '#/components/schemas/SuggestedColumnDto' + items: + properties: + column_name: + $ref: '#/components/schemas/ColumnAnalysisDto' + type: array line_termination: example: "\r\n" type: string @@ -4375,6 +4383,33 @@ components: example: ',' type: string type: object + ColumnAnalysisDto: + properties: + d: + example: 4 + type: integer + dfid: + example: null + type: integer + enums: + example: null + properties: + type: string + type: array + null_allowed: + type: boolean + sets: + example: null + properties: + type: string + type: array + size: + example: 10 + type: integer + type: + example: decimal + type: string + type: object ErrorDto: properties: message: @@ -4396,11 +4431,6 @@ components: required: - keys type: object - SuggestedColumnDto: - properties: - column_name: - type: string - type: object ApiErrorDto: required: - code @@ -5622,7 +5652,6 @@ components: TableStatisticDto: required: - columns - - rows type: object properties: columns: @@ -5632,6 +5661,22 @@ components: rows: type: integer format: int64 + example: 5 + data_length: + type: integer + description: in bytes + format: int64 + example: 16384 + max_data_length: + type: integer + description: in bytes + format: int64 + example: 0 + avg_row_length: + type: integer + description: in bytes + format: int64 + example: 3276 TableHistoryDto: required: - event @@ -8232,8 +8277,6 @@ components: $ref: '#/components/schemas/ColumnCreateDto' constraints: $ref: '#/components/schemas/ConstraintsCreateDto' - need_sequence: - type: boolean ContainerCreateDto: required: - host diff --git a/dbrepo-analyse-service/api/dto.py b/dbrepo-analyse-service/api/dto.py index 66eed5ee5bd2bf511bc536fae9cacd0fd62ab422..c3c6a22c04f19540755ff027abf899d520c2b2c4 100644 --- a/dbrepo-analyse-service/api/dto.py +++ b/dbrepo-analyse-service/api/dto.py @@ -1,15 +1,66 @@ -from typing import Optional +from enum import Enum +from typing import Optional, List from pydantic import BaseModel -class ColumnStat(BaseModel): - val_min: Optional[float] - val_max: Optional[float] - mean: Optional[float] - median: Optional[float] - std_dev: Optional[float] +class DataTypeDto(str, Enum): + """ + Enumeration of languages. + """ + BIGINT = "bigint" + BINARY = "binary" + BIT = "bit" + BLOB = "blob" + BOOL = "bool" + CHAR = "char" + DATE = "date" + DATETIME = "datetime" + DECIMAL = "decimal" + DOUBLE = "double" + ENUM = "enum" + FLOAT = "float" + INT = "int" + LONGBLOB = "longblob" + LONGTEXT = "longtext" + MEDIUMBLOB = "mediumblob" + MEDIUMINT = "mediumint" + MEDIUMTEXT = "mediumtext" + SET = "set" + SMALLINT = "smallint" + TEXT = "text" + TIMESTAMP = "timestamp" + TINYBLOB = "tinyblob" + TINYINT = "tinyint" + TINYTEXT = "tinytext" + YEAR = "year" + VARBINARY = "varbinary" + VARCHAR = "varchar" -class TableStat(BaseModel): - columns: dict[str, ColumnStat] +class ColumnAnalysisDto(BaseModel): + type: DataTypeDto + null_allowed: bool + size: Optional[int] = None + d: Optional[int] = None + dfid: Optional[int] = None + enums: Optional[list] = None + sets: Optional[list] = None + + +class AnalysisDto(BaseModel): + columns: dict[str, ColumnAnalysisDto] + separator: str + line_termination: str + + +class ColumnStatDto(BaseModel): + val_min: Optional[float] = None + val_max: Optional[float] = None + mean: Optional[float] = None + median: Optional[float] = None + std_dev: Optional[float] = None + + +class TableStatDto(BaseModel): + columns: dict[str, ColumnStatDto] diff --git a/dbrepo-analyse-service/app.py b/dbrepo-analyse-service/app.py index bbce751508c62e8a5f375447b97a7486668b9673..2dc9161746fbc52fb523b2bcfe934a2dac8ffbb4 100644 --- a/dbrepo-analyse-service/app.py +++ b/dbrepo-analyse-service/app.py @@ -77,10 +77,17 @@ template = { "openapi": "3.0.0", "components": { "schemas": { - "DataTypesDto": { + "AnalysisDto": { "properties": { "columns": { - "$ref": "#/components/schemas/SuggestedColumnDto" + "type": "array", + "items": { + "properties": { + "column_name": { + "$ref": "#/components/schemas/ColumnAnalysisDto" + } + } + } }, "line_termination": { "example": "\r\n", @@ -125,10 +132,40 @@ template = { ], "type": "object" }, - "SuggestedColumnDto": { + "ColumnAnalysisDto": { "properties": { - "column_name": { - "type": "string" + "type": { + "type": "string", + "example": "decimal" + }, + "null_allowed": { + "type": "boolean" + }, + "size": { + "type": "integer", + "example": 10 + }, + "d": { + "type": "integer", + "example": 4 + }, + "dfid": { + "type": "integer", + "example": None + }, + "enums": { + "type": "array", + "example": None, + "properties": { + "type": "string" + } + }, + "sets": { + "type": "array", + "example": None, + "properties": { + "type": "string" + } } }, "type": "object" @@ -251,7 +288,7 @@ def analyse_datatypes(): try: res = determine_datatypes(filename, enum, enum_tol, separator) logging.debug("determine datatype resulted in datatypes %s", res) - return Response(res, mimetype="application/json"), 202 + return Response(res.model_dump_json(), mimetype="application/json"), 202 except OSError as e: logging.error(f"Failed to determine data types: {e}") return ApiError(status='BAD_REQUEST', message=str(e), code='error.analyse.invalid').model_dump_json(), 400 diff --git a/dbrepo-analyse-service/as-yml/analyse_datatypes.yml b/dbrepo-analyse-service/as-yml/analyse_datatypes.yml index 14529bb34bee7d9ef28df2ccc8ad4be6dd208929..78f84f9e275a2ac436e00bb688cef9e925b9ecf6 100644 --- a/dbrepo-analyse-service/as-yml/analyse_datatypes.yml +++ b/dbrepo-analyse-service/as-yml/analyse_datatypes.yml @@ -38,7 +38,7 @@ responses: content: application/json: schema: - $ref: '#/components/schemas/DataTypesDto' + $ref: '#/components/schemas/AnalysisDto' 400: description: "Failed to determine data types" content: diff --git a/dbrepo-analyse-service/determine_dt.py b/dbrepo-analyse-service/determine_dt.py index 6a224018665e9d2583da023f558837a9f24ab4e2..a0890c2b7a9cd5a9e53649464cfa19ec47f0e45d 100644 --- a/dbrepo-analyse-service/determine_dt.py +++ b/dbrepo-analyse-service/determine_dt.py @@ -9,12 +9,14 @@ import pandas from numpy import dtype, max, min from flask import current_app +from pandas import DataFrame from pandas.errors import EmptyDataError +from api.dto import ColumnAnalysisDto, DataTypeDto, AnalysisDto from clients.s3_client import S3Client -def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=',') -> {}: +def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=',') -> AnalysisDto: # Use option enum=True for searching Postgres ENUM Types in CSV file. Remark # Enum is not SQL standard, hence, it might not be supported by all db-engines. # However, it can be used in Postgres and MySQL. @@ -24,7 +26,7 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=',') -> stream = response['Body'] if response['ContentLength'] == 0: logging.warning(f'Failed to determine data types: file {filename} has empty body') - return json.dumps({'columns': [], 'separator': ','}) + return AnalysisDto(columns=dict(), separator=",", line_termination="\n") with io.BytesIO(stream.read()) as fh: line_terminator = None @@ -63,52 +65,58 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=',') -> r = {} for name, dataType in df.dtypes.items(): + col = ColumnAnalysisDto(type=DataTypeDto.TEXT, null_allowed=contains_null(df[name])) + r[name] = col if dataType == dtype('float64'): if pandas.to_numeric(df[name], errors='coerce').notnull().all(): logging.debug(f"mapped column {name} from float64 to decimal") - r[name] = 'decimal' + col.type = DataTypeDto.DECIMAL + col.size = 10 + col.d = 4 else: logging.debug(f"mapped column {name} from float64 to text") - r[name] = 'text' + col.type = DataTypeDto.TEXT elif dataType == dtype('int64'): min_val = min(df[name]) max_val = max(df[name]) if 0 <= min_val <= 1 and 0 <= max_val <= 1: logging.debug(f"mapped column {name} from int64 to bool") - r[name] = 'bool' + col.type = DataTypeDto.BOOL continue logging.debug(f"mapped column {name} from int64 to bigint") - r[name] = 'bigint' + col.type = DataTypeDto.BIGINT + col.size = 255 elif dataType == dtype('O'): try: pandas.to_datetime(df[name], format='mixed') logging.debug(f"mapped column {name} from O to timestamp") - r[name] = 'timestamp' + col.type = DataTypeDto.TIMESTAMP continue except ValueError: pass max_size = max(df[name].astype(str).map(len)) if max_size <= 1: logging.debug(f"mapped column {name} from O to char") - r[name] = 'char' + col.type = DataTypeDto.CHAR + col.size = 1 if 0 <= max_size <= 255: logging.debug(f"mapped column {name} from O to varchar") - r[name] = 'varchar' + col.type = DataTypeDto.VARCHAR + col.size = 255 else: logging.debug(f"mapped column {name} from O to text") - r[name] = 'text' + col.type = DataTypeDto.TEXT elif dataType == dtype('bool'): logging.debug(f"mapped column {name} from bool to bool") - r[name] = 'bool' + col.type = DataTypeDto.BOOL elif dataType == dtype('datetime64'): logging.debug(f"mapped column {name} from datetime64 to datetime") - r[name] = 'datetime' + col.type = DataTypeDto.DATETIME else: logging.warning(f'default to \'text\' for column {name} and type {dtype}') - r[name] = 'text' - s = {"columns": r, "separator": separator, "line_termination": line_terminator} + s = AnalysisDto(columns=r, separator=separator, line_termination=line_terminator) logging.info("Determined data types %s", s) - return json.dumps(s) + return s def peek_line(f) -> bytes: @@ -116,3 +124,9 @@ def peek_line(f) -> bytes: line: bytes = f.readline() f.seek(pos) return line + + +def contains_null(df: DataFrame) -> bool: + if '\\N' in df.values: + return True + return df.isnull().values.any() diff --git a/dbrepo-analyse-service/determine_pk.py b/dbrepo-analyse-service/determine_pk.py index 141d90b78e43b05cce8b2f6c10700a18d14c6073..b0ad8cbf769bb87b814bd2d22261b349ce9bd303 100644 --- a/dbrepo-analyse-service/determine_pk.py +++ b/dbrepo-analyse-service/determine_pk.py @@ -9,8 +9,8 @@ from clients.s3_client import S3Client def determine_pk(filename: str, separator: str = ','): - dt = json.loads(determine_datatypes(filename=filename, separator=separator)) - dt = {k.lower(): v for k, v in dt["columns"].items()} + dt = determine_datatypes(filename=filename, separator=separator) + dt = {k.lower(): v for k, v in dt.columns.items()} # {k.lower(): v for k, v in dt['columns'].items() if v != 'Numeric'} colnames = dt.keys() colindex = list(range(0, len(colnames))) diff --git a/dbrepo-analyse-service/test/test_determine_dt.py b/dbrepo-analyse-service/test/test_determine_dt.py index 3d7e4f8d3bee3f60d593572d420b3243fea179a2..73c443b7280e45295bb66a5ee4b4519daf50627c 100644 --- a/dbrepo-analyse-service/test/test_determine_dt.py +++ b/dbrepo-analyse-service/test/test_determine_dt.py @@ -1,6 +1,6 @@ -import json import unittest +from api.dto import AnalysisDto from clients.s3_client import S3Client from botocore.exceptions import ClientError from determine_dt import determine_datatypes @@ -9,96 +9,196 @@ from determine_dt import determine_datatypes class DetermineDatatypesTest(unittest.TestCase): # @Test def test_determine_datatypesDateTime_succeeds(self): - exp = { - "columns": { - "Datum": "timestamp", - "Standort": "varchar", - "Parameter": "varchar", - "Intervall": "varchar", - "Einheit": "varchar", - "Wert": "decimal", - "Status": "varchar", - }, - "separator": ",", - "line_termination": "\n" - } + exp = AnalysisDto(separator=",", line_termination="\n", columns={ + "Datum": { + "type": "timestamp", + "null_allowed": False, + }, + "Standort": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Parameter": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Intervall": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Einheit": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Wert": { + "type": "decimal", + "size": 10, + "d": 4, + "null_allowed": False, + }, + "Status": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + }) # mock S3Client().upload_file("datetime.csv", './data/test_dt/', 'dbrepo') # test response = determine_datatypes(filename="datetime.csv", separator=",") - self.assertEqual(response, json.dumps(exp)) + self.assertEqual(exp, response) + + # @Test - # @Test def test_determine_datatypesDateTimeWithTimezone_succeeds(self): - exp = { - "columns": { - "Datum": "timestamp", - "Standort": "varchar", - "Parameter": "varchar", - "Intervall": "varchar", - "Einheit": "varchar", - "Wert": "decimal", - "Status": "varchar", - }, - "separator": ",", - "line_termination": "\n" - } + exp = AnalysisDto(separator=",", line_termination="\n", columns={ + "Datum": { + "type": "timestamp", + "null_allowed": False, + }, + "Standort": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Parameter": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Intervall": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Einheit": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Wert": { + "type": "decimal", + "size": 10, + "d": 4, + "null_allowed": False, + }, + "Status": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + }) # mock S3Client().upload_file("datetime_tz.csv", './data/test_dt/', 'dbrepo') # test response = determine_datatypes(filename="datetime_tz.csv", separator=",") - self.assertEqual(response, json.dumps(exp)) + self.assertEqual(exp, response) + + # @Test - # @Test def test_determine_datatypesDateTimeWithT_succeeds(self): - exp = { - "columns": { - "Datum": "timestamp", - "Standort": "varchar", - "Parameter": "varchar", - "Intervall": "varchar", - "Einheit": "varchar", - "Wert": "decimal", - "Status": "varchar", - }, - "separator": ",", - "line_termination": "\n" - } + exp = AnalysisDto(separator=",", line_termination="\n", columns={ + "Datum": { + "type": "timestamp", + "null_allowed": False, + }, + "Standort": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Parameter": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Intervall": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Einheit": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "Wert": { + "type": "decimal", + "size": 10, + "d": 4, + "null_allowed": False, + }, + "Status": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + }) # mock S3Client().upload_file("datetime_t.csv", './data/test_dt/', 'dbrepo') # test response = determine_datatypes(filename="datetime_t.csv", separator=",") - self.assertEqual(response, json.dumps(exp)) + self.assertEqual(exp, response) # @Test def test_determine_datatypes_succeeds(self): - exp = { - "columns": { - "int": "bigint", - "float": "decimal", - "string": "varchar", - "boolean": "bool", - "bool": "bool", - "date": "timestamp", - "time": "timestamp", - "enum": "varchar", # currently not used - }, - "separator": ",", - "line_termination": "\n" - } + exp = AnalysisDto(separator=",", line_termination="\n", columns={ + "int": { + "type": "bigint", + "size": 255, + "null_allowed": False, + }, + "float": { + "type": "decimal", + "size": 10, + "d": 4, + "null_allowed": False, + }, + "string": { + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + "boolean": { + "type": "bool", + "size": None, + "null_allowed": False, + }, + "bool": { + "type": "bool", + "null_allowed": False, + }, + "date": { + "type": "timestamp", + "null_allowed": False, + }, + "time": { + "type": "timestamp", + "null_allowed": False, + }, + "enum": { # currently not used + "type": "varchar", + "size": 255, + "null_allowed": False, + }, + }) # mock S3Client().upload_file("datatypes.csv", './data/test_dt/', 'dbrepo') # test response = determine_datatypes(filename="datatypes.csv", separator=",") - self.assertEqual(response, json.dumps(exp)) + self.assertEqual(exp, response) # @Test def test_determine_datatypes_fileDoesNotExist_fails(self): @@ -121,9 +221,8 @@ class DetermineDatatypesTest(unittest.TestCase): # test response = determine_datatypes("empty.csv") - data = json.loads(response) - self.assertEqual([], data["columns"]) - self.assertEqual(",", data["separator"]) + self.assertEqual({}, response.columns) + self.assertEqual(",", response.separator) # @Test def test_determine_datatypes_separatorSemicolon_succeeds(self): @@ -133,8 +232,7 @@ class DetermineDatatypesTest(unittest.TestCase): # test response = determine_datatypes(filename="separator.csv", separator=";") - data = json.loads(response) - self.assertEqual(";", data["separator"]) + self.assertEqual(";", response.separator) # @Test def test_determine_datatypes_separatorGuess_succeeds(self): @@ -144,8 +242,7 @@ class DetermineDatatypesTest(unittest.TestCase): # test response = determine_datatypes(filename="separator.csv") - data = json.loads(response) - self.assertEqual(";", data["separator"]) + self.assertEqual(";", response.separator) # @Test def test_determine_datatypes_separatorGuessLargeDataset_succeeds(self): @@ -155,27 +252,33 @@ class DetermineDatatypesTest(unittest.TestCase): # test response = determine_datatypes(filename="large.csv") - data = json.loads(response) - self.assertEqual(",", data["separator"]) + self.assertEqual(",", response.separator) # @Test def test_determine_datatypes_separatorGuessText_succeeds(self): - exp = { - "columns": { - "id": "bigint", - "author": "varchar", - "abstract": "text" + exp = AnalysisDto(separator=";", line_termination="\n", columns={ + "id": { + "type": "bigint", + "size": 255, + "null_allowed": False + }, + "author": { + "type": "varchar", + "size": 255, + "null_allowed": False + }, + "abstract": { + "type": "text", + "null_allowed": False }, - "separator": ";", - "line_termination": "\n" - } + }) # mock S3Client().upload_file("novel.csv", './data/test_dt/', 'dbrepo') # test response = determine_datatypes(filename="novel.csv", separator=";") - self.assertEqual(response, json.dumps(exp)) + self.assertEqual(exp, response) if __name__ == "__main__": diff --git a/dbrepo-data-service/rest-service/src/test/java/at/tuwien/mapper/MariaDbMapperUnitTest.java b/dbrepo-data-service/rest-service/src/test/java/at/tuwien/mapper/MariaDbMapperUnitTest.java new file mode 100644 index 0000000000000000000000000000000000000000..a1a3ef4dad6060f40b60fb956e8b8f9d165228d9 --- /dev/null +++ b/dbrepo-data-service/rest-service/src/test/java/at/tuwien/mapper/MariaDbMapperUnitTest.java @@ -0,0 +1,43 @@ +package at.tuwien.mapper; + +import at.tuwien.test.AbstractUnitTest; +import lombok.extern.log4j.Log4j2; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.test.context.junit.jupiter.SpringExtension; + +import java.util.stream.Stream; + +import static org.junit.Assert.assertEquals; + +@Log4j2 +@SpringBootTest +@ExtendWith(SpringExtension.class) +public class MariaDbMapperUnitTest extends AbstractUnitTest { + + @Autowired + private MariaDbMapper mariaDbMapper; + + public static Stream<Arguments> nameToInternalName_parameters() { + return Stream.of( + Arguments.arguments("dash_minus", "OE/NO-027", "oe_no_027"), + Arguments.arguments("percent", "OE%NO-027", "oe_no_027"), + Arguments.arguments("umlaut", "OE/NĂ–-027", "oe_no__027"), + Arguments.arguments("dot", "OE.NO-027", "oe_no_027"), + Arguments.arguments("double_dot", "OE:NO-027", "oe_no_027") + ); + } + + @ParameterizedTest + @MethodSource("nameToInternalName_parameters") + public void nameToInternalName_succeeds(String name, String input, String expected) { + + /* test */ + assertEquals(expected, mariaDbMapper.nameToInternalName(input)); + } + +} diff --git a/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/SchemaServiceIntegrationTest.java b/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/SchemaServiceIntegrationTest.java index cc644769272b4697cbe6091284287690b10c1944..be1f6b5dae37db17ec5e90ad001a54ff77869886 100644 --- a/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/SchemaServiceIntegrationTest.java +++ b/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/SchemaServiceIntegrationTest.java @@ -59,7 +59,10 @@ public class SchemaServiceIntegrationTest extends AbstractUnitTest { } @Test - public void inspectTable_succeeds() throws TableNotFoundException, SQLException { + public void inspectTable_sameNameDifferentDb_succeeds() throws TableNotFoundException, SQLException { + + /* mock */ + MariaDbConfig.execute(DATABASE_2_PRIVILEGED_DTO, "CREATE TABLE not_in_metadata_db (wrong_id BIGINT NOT NULL PRIMARY KEY, given_name VARCHAR(255) NOT NULL, middle_name VARCHAR(255), family_name VARCHAR(255) NOT NULL, age INT NOT NULL) WITH SYSTEM VERSIONING;"); /* test */ final TableDto response = schemaService.inspectTable(DATABASE_1_PRIVILEGED_DTO, "not_in_metadata_db"); diff --git a/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/DataMapper.java b/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/DataMapper.java index 2e95fc34d28fdcea292f6336717ff4be2c5c03f6..796ef3edebc452471569dd6f04c81bb4731891cd 100644 --- a/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/DataMapper.java +++ b/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/DataMapper.java @@ -553,7 +553,6 @@ public interface DataMapper { } /* boolean encoding fix */ if (column.getColumnType().equals(ColumnTypeDto.TINYINT) && column.getSize() == 1) { - log.trace("column {} is of type tinyint with size {}: map to boolean", column.getInternalName(), column.getSize()); column.setColumnType(ColumnTypeDto.BOOL); } switch (column.getColumnType()) { @@ -562,10 +561,9 @@ public interface DataMapper { log.error("Missing date format for column {}", column.getId()); throw new IllegalArgumentException("Missing date format"); } - log.trace("mapping {} to date with format '{}'", data, column.getDateFormat()); final DateTimeFormatter formatter = new DateTimeFormatterBuilder() .parseCaseInsensitive() /* case insensitive to parse JAN and FEB */ - .appendPattern(column.getDateFormat().getUnixFormat()) + .appendPattern("yyyy-MM-dd") .toFormatter(Locale.ENGLISH); final LocalDate date = LocalDate.parse(String.valueOf(data), formatter); return date.atStartOfDay(ZoneId.of("UTC")) @@ -576,41 +574,32 @@ public interface DataMapper { log.error("Missing date format for column {}", column.getId()); throw new IllegalArgumentException("Missing date format"); } - log.trace("mapping {} to timestamp with format '{}'", data, column.getDateFormat()); return Timestamp.valueOf(data.toString()) .toInstant(); } case BINARY, VARBINARY, BIT -> { - log.trace("mapping {} -> binary", data); return Long.parseLong(String.valueOf(data), 2); } case TEXT, CHAR, VARCHAR, TINYTEXT, MEDIUMTEXT, LONGTEXT, ENUM, SET -> { - log.trace("mapping {} -> string", data); return String.valueOf(data); } case BIGINT -> { - log.trace("mapping {} -> biginteger", data); return new BigInteger(String.valueOf(data)); } case INT, SMALLINT, MEDIUMINT, TINYINT -> { - log.trace("mapping {} -> integer", data); return Integer.parseInt(String.valueOf(data)); } case DECIMAL, FLOAT, DOUBLE -> { - log.trace("mapping {} -> double", data); return Double.valueOf(String.valueOf(data)); } case BOOL -> { - log.trace("mapping {} -> boolean", data); return Boolean.valueOf(String.valueOf(data)); } case TIME -> { - log.trace("mapping {} -> time", data); return String.valueOf(data); } case YEAR -> { final String date = String.valueOf(data); - log.trace("mapping {} -> year", date); return Short.valueOf(date.substring(0, date.indexOf('-'))); } } @@ -641,9 +630,6 @@ public interface DataMapper { continue; } final Object object = dataColumnToObject(result.getObject(idx[0]++), column); - if (object == null) { - log.warn("result set for column {} is empty (=null)", column.getInternalName()); - } map.put(columnOrAlias, object); } resultList.add(map); @@ -664,7 +650,6 @@ public interface DataMapper { default void prepareStatementWithColumnTypeObject(PreparedStatement ps, ColumnTypeDto columnType, int idx, Object value) throws SQLException { switch (columnType) { case BLOB, TINYBLOB, MEDIUMBLOB, LONGBLOB: - log.trace("prepare statement idx {} blob", idx); if (value == null) { ps.setNull(idx, Types.BLOB); break; @@ -677,135 +662,105 @@ public interface DataMapper { } break; case TEXT, CHAR, VARCHAR, TINYTEXT, MEDIUMTEXT, LONGTEXT, ENUM, SET: - log.trace("prepare statement idx {} {} {}", idx, columnType, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.VARCHAR); break; } ps.setString(idx, String.valueOf(value)); break; case DATE: - log.trace("prepare statement idx {} date {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.DATE); break; } ps.setDate(idx, Date.valueOf(String.valueOf(value))); break; case BIGINT: - log.trace("prepare statement idx {} bigint {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.BIGINT); break; } ps.setLong(idx, Long.parseLong(String.valueOf(value))); break; case INT, MEDIUMINT: - log.trace("prepare statement idx {} {} {}", idx, columnType, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.INTEGER); break; } ps.setLong(idx, Long.parseLong(String.valueOf(value))); break; case TINYINT: - log.trace("prepare statement idx {} tinyint {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.TINYINT); break; } ps.setLong(idx, Long.parseLong(String.valueOf(value))); break; case SMALLINT: - log.trace("prepare statement idx {} smallint {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.SMALLINT); break; } ps.setLong(idx, Long.parseLong(String.valueOf(value))); break; case DECIMAL: - log.trace("prepare statement idx {} decimal {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.DECIMAL); break; } ps.setDouble(idx, Double.parseDouble(String.valueOf(value))); break; case FLOAT: - log.trace("prepare statement idx {} float {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.FLOAT); break; } ps.setDouble(idx, Double.parseDouble(String.valueOf(value))); break; case DOUBLE: - log.trace("prepare statement idx {} double {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.DOUBLE); break; } ps.setDouble(idx, Double.parseDouble(String.valueOf(value))); break; case BINARY, VARBINARY, BIT: - log.trace("prepare statement idx {} {} {}", idx, columnType, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.DECIMAL); break; } ps.setBinaryStream(idx, (InputStream) value); break; case BOOL: - log.trace("prepare statement idx {} boolean {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.BOOLEAN); break; } ps.setBoolean(idx, Boolean.parseBoolean(String.valueOf(value))); break; case TIMESTAMP: - log.trace("prepare statement idx {} timestamp {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.TIMESTAMP); break; } ps.setTimestamp(idx, Timestamp.valueOf(String.valueOf(value))); break; case DATETIME: - log.trace("prepare statement idx {} datetime {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.TIMESTAMP); break; } ps.setTimestamp(idx, Timestamp.valueOf(String.valueOf(value))); break; case TIME: - log.trace("prepare statement idx {} time {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.TIME); break; } ps.setTime(idx, Time.valueOf(String.valueOf(value))); break; case YEAR: - log.trace("prepare statement idx {} year {}", idx, value); if (value == null) { - log.trace("idx {} is null, prepare with null value", idx); ps.setNull(idx, Types.TIME); break; } diff --git a/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/MariaDbMapper.java b/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/MariaDbMapper.java index b2ed933049687a9ea35aa4cfa673999b81db31af..74e9ffe66d7428d5fdfb7d94909794f034cfd045 100644 --- a/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/MariaDbMapper.java +++ b/dbrepo-data-service/services/src/main/java/at/tuwien/mapper/MariaDbMapper.java @@ -120,7 +120,7 @@ public interface MariaDbMapper { } default String databaseTableConstraintsSelectRawQuery() { - final String statement = "SELECT k.`ORDINAL_POSITION`, c.`CONSTRAINT_TYPE`, k.`CONSTRAINT_NAME`, k.`COLUMN_NAME`, k.`REFERENCED_TABLE_NAME`, k.`REFERENCED_COLUMN_NAME`, r.`DELETE_RULE`, r.`UPDATE_RULE` FROM information_schema.TABLE_CONSTRAINTS c JOIN information_schema.KEY_COLUMN_USAGE k ON c.`TABLE_NAME` = k.`TABLE_NAME` AND c.`CONSTRAINT_NAME` = k.`CONSTRAINT_NAME` LEFT JOIN information_schema.REFERENTIAL_CONSTRAINTS r ON r.`CONSTRAINT_NAME` = k.`CONSTRAINT_NAME` AND r.`CONSTRAINT_SCHEMA` = c.`TABLE_SCHEMA` AND r.`TABLE_NAME` = c.`TABLE_NAME` WHERE LOWER(k.`COLUMN_NAME`) != 'row_end' AND c.`TABLE_SCHEMA` = ? AND c.`TABLE_NAME` = ? ORDER BY k.`ORDINAL_POSITION` ASC;"; + final String statement = "SELECT k.`ORDINAL_POSITION`, c.`CONSTRAINT_TYPE`, k.`CONSTRAINT_NAME`, k.`COLUMN_NAME`, k.`REFERENCED_TABLE_NAME`, k.`REFERENCED_COLUMN_NAME`, r.`DELETE_RULE`, r.`UPDATE_RULE` FROM information_schema.TABLE_CONSTRAINTS c JOIN information_schema.KEY_COLUMN_USAGE k ON c.`TABLE_NAME` = k.`TABLE_NAME` AND c.`CONSTRAINT_NAME` = k.`CONSTRAINT_NAME` AND c.`CONSTRAINT_SCHEMA` = k.`CONSTRAINT_SCHEMA` LEFT JOIN information_schema.REFERENTIAL_CONSTRAINTS r ON r.`CONSTRAINT_NAME` = k.`CONSTRAINT_NAME` AND r.`CONSTRAINT_SCHEMA` = c.`TABLE_SCHEMA` AND r.`TABLE_NAME` = c.`TABLE_NAME` WHERE LOWER(k.`COLUMN_NAME`) != 'row_end' AND c.`TABLE_SCHEMA` = ? AND c.`TABLE_NAME` = ? ORDER BY k.`ORDINAL_POSITION` ASC;"; log.trace("mapped select table constraints statement: {}", statement); return statement; } @@ -753,7 +753,6 @@ public interface MariaDbMapper { switch (columnType) { case BLOB, TINYBLOB, MEDIUMBLOB, LONGBLOB: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.BLOB); break; } @@ -762,7 +761,6 @@ public interface MariaDbMapper { try (ObjectOutputStream ois = new ObjectOutputStream(boas)) { ois.writeObject(value); statement.setBlob(idx, new ByteArrayInputStream(boas.toByteArray())); - log.trace("prepare statement idx {} = {} blob", idx, columnName); } } catch (IOException e) { @@ -772,128 +770,100 @@ public interface MariaDbMapper { break; case TEXT, CHAR, VARCHAR, TINYTEXT, MEDIUMTEXT, LONGTEXT, ENUM, SET: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.VARCHAR); break; } - log.trace("prepare statement idx {} = {} text/char/varchar/tinytext/mediumtext/longtext/enum/set: {}", idx, columnName, value); statement.setString(idx, String.valueOf(value)); break; case DATE: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.DATE); break; } - log.trace("prepare statement idx {} date: {}", idx, value); statement.setDate(idx, Date.valueOf(String.valueOf(value))); break; case BIGINT: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.BIGINT); break; } - log.trace("prepare statement idx {} bigint: {}", idx, value); statement.setLong(idx, Long.parseLong(String.valueOf(value))); break; case INT, MEDIUMINT: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.INTEGER); break; } - log.trace("prepare statement idx {} = {} int/mediumint: {}", idx, columnName, value); statement.setLong(idx, Long.parseLong(String.valueOf(value))); break; case TINYINT: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.TINYINT); break; } - log.trace("prepare statement idx {} = {} tinyint: {}", idx, columnName, value); statement.setLong(idx, Long.parseLong(String.valueOf(value))); break; case SMALLINT: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.SMALLINT); break; } - log.trace("prepare statement idx {} = {} smallint: {}", idx, columnName, value); statement.setLong(idx, Long.parseLong(String.valueOf(value))); break; case DECIMAL: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.DECIMAL); break; } - log.trace("prepare statement idx {} = {} decimal: {}", idx, columnName, value); statement.setDouble(idx, Double.parseDouble(String.valueOf(value))); break; case FLOAT: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.FLOAT); break; } - log.trace("prepare statement idx {} = {} float: {}", idx, columnName, value); statement.setDouble(idx, Double.parseDouble(String.valueOf(value))); break; case DOUBLE: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.DOUBLE); break; } - log.trace("prepare statement idx {} = {} double: {}", idx, columnName, value); statement.setDouble(idx, Double.parseDouble(String.valueOf(value))); break; case BINARY, VARBINARY, BIT: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.DECIMAL); break; } - log.trace("prepare statement idx {} = {} binary/varbinary/bit", idx, columnName); statement.setBinaryStream(idx, (InputStream) value); break; case BOOL: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.BOOLEAN); break; } - log.trace("prepare statement idx {} = {} bool: {}", idx, columnName, value); statement.setBoolean(idx, Boolean.parseBoolean(String.valueOf(value))); break; case TIMESTAMP, DATETIME: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.TIMESTAMP); break; } - log.trace("prepare statement idx {} timestamp/datetime: {}", idx, value); statement.setTimestamp(idx, Timestamp.valueOf(String.valueOf(value))); break; case TIME: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.TIME); break; } - log.trace("prepare statement idx {} = {} time: {}", idx, columnName, value); statement.setTime(idx, Time.valueOf(String.valueOf(value))); break; case YEAR: if (value == null) { - log.trace("idx {} = {} is null, prepare with null value", idx, columnName); statement.setNull(idx, Types.TIME); break; } - log.trace("prepare statement idx {} = {} year: {}", idx, columnName, value); statement.setString(idx, String.valueOf(value)); break; default: diff --git a/dbrepo-metadata-db/1_setup-schema.sql b/dbrepo-metadata-db/1_setup-schema.sql index e385ea4d9b2bc624dbc62ef6db6aaa53d44e2d07..62dc5c3095529d74349a88725724c02c95c15710 100644 --- a/dbrepo-metadata-db/1_setup-schema.sql +++ b/dbrepo-metadata-db/1_setup-schema.sql @@ -539,7 +539,8 @@ INSERT INTO `mdb_images_date` (iid, database_format, unix_format, example, has_t VALUES (1, '%Y-%c-%d %H:%i:%S.%f', 'yyyy-MM-dd HH:mm:ss.SSSSSS', '2022-01-30 13:44:25.499', true), (1, '%Y-%c-%d %H:%i:%S', 'yyyy-MM-dd HH:mm:ss', '2022-01-30 13:44:25', true), (1, '%Y-%c-%d', 'yyyy-MM-dd', '2022-01-30', false), - (1, '%H:%i:%S', 'HH:mm:ss', '13:44:25', true); + (1, '%H:%i:%S', 'HH:mm:ss', '13:44:25', true), + (1, '%d.%c.%Y', 'dd.MM.yyyy', '30.01.2022', false); INSERT INTO `mdb_ontologies` (prefix, uri, uri_pattern, sparql_endpoint, rdf_path) VALUES ('om', 'http://www.ontology-of-units-of-measure.org/resource/om-2/', diff --git a/dbrepo-ui/components/table/TableImport.vue b/dbrepo-ui/components/table/TableImport.vue index 22f708175134d40bc50b3c630a3431181c40018a..e89c920d32e83eae82ccc346593bbd6650456aea 100644 --- a/dbrepo-ui/components/table/TableImport.vue +++ b/dbrepo-ui/components/table/TableImport.vue @@ -142,7 +142,6 @@ </v-col> </v-row> <v-form - v-if="!$route.query.location" ref="form" v-model="validStep2" :disabled="disabled" @@ -213,6 +212,7 @@ <v-col cols="8"> <v-btn + v-if="create && !$route.query.location" :disabled="!isAnalyseAllowed || !validStep1 || !validStep2 || disabled" :loading="loading" :variant="buttonVariant" @@ -220,45 +220,38 @@ size="small" :text="$t('pages.table.subpages.import.analyse.text')" @click="uploadAndAnalyse"/> + <v-btn + v-if="!create && !$route.query.location" + :disabled="!isAnalyseAllowed || !validStep1 || !validStep2 || disabled" + :loading="loading || loadingImport" + :variant="buttonVariant" + color="secondary" + size="small" + :text="$t('pages.table.subpages.import.upload.text')" + @click="uploadAndImport"/> + <v-btn + v-if="!create && $route.query.location" + :disabled="step > 2 || disabled" + :loading="loading || loadingImport" + :variant="buttonVariant" + color="secondary" + size="small" + class="mt-2" + :text="$t('pages.table.subpages.import.text')" + @click="importCsv"/> </v-col> </v-row> </v-form> </v-container> </v-stepper-window> - <v-stepper-header - v-if="!create"> - <v-stepper-item - :title="$t('pages.table.subpages.import.dataset.title')" - :complete="validStep3" - :value="3" /> - </v-stepper-header> - <v-stepper-window - v-if="!create" - direction="vertical"> - <v-container> - <v-row - dense> - <v-col> - <v-btn - color="secondary" - :disabled="step !== 3 || disabled" - size="small" - variant="flat" - :loading="loadingImport" - :text="$t('navigation.import')" - @click="importCsv"/> - </v-col> - </v-row> - </v-container> - </v-stepper-window> <v-stepper-header v-if="!create"> <v-stepper-item :title="$t('pages.table.subpages.import.summary.title')" - :value="4"/> + :value="3"/> </v-stepper-header> <v-stepper-window - v-if="!create && step === 4" + v-if="!create && step === 3" direction="vertical"> <v-container> <v-row @@ -280,7 +273,7 @@ <v-btn v-if="rowCount !== null" color="secondary" - :disabled="step !== 4 || disabled" + :disabled="step !== 3 || disabled" size="small" variant="flat" :text="$t('navigation.data')" @@ -364,7 +357,7 @@ export default { this.setQueryParamSafely('line_termination') this.setQueryParamSafely('skip_lines') if (this.$route.query.location) { - this.step = 3 + this.step = 2 this.validStep2 = true } }, @@ -458,7 +451,7 @@ export default { .then((rowCount) => { this.rowCount = rowCount }) - this.step = 4 + this.step = 3 this.validStep3 = true this.loadingImport = false }) @@ -473,22 +466,39 @@ export default { }) }, uploadAndAnalyse() { - this.loading = true - console.debug('upload file', this.file) - const uploadService = useUploadService() - return uploadService.create(this.file) + this.upload() .then((s3key) => { - const toast = useToastInstance() - toast.success(this.$t('success.upload.dataset')) this.analyse(s3key) }) - .catch((error) => { - console.error('Failed to upload dataset', error) - const toast = useToastInstance() - toast.error(this.$t('error.upload.dataset')) - this.loading = false + }, + uploadAndImport() { + this.upload() + .then((s3key) => { + this.tableImport.location = s3key + this.importCsv() }) }, + upload() { + this.loading = true + console.debug('upload file', this.file) + const uploadService = useUploadService() + return new Promise((resolve, reject) => { + return uploadService.create(this.file) + .then((s3key) => { + const toast = useToastInstance() + toast.success(this.$t('success.upload.dataset')) + this.loading = false + resolve(s3key) + }) + .catch((error) => { + console.error('Failed to upload dataset', error) + const toast = useToastInstance() + toast.error(this.$t('error.upload.dataset')) + this.loading = false + reject(error) + }) + }) + }, analyse(filename) { const analyseService = useAnalyseService() const payload = { filename } @@ -499,19 +509,17 @@ export default { analyseService.suggest(payload) .then((analysis) => { const {columns, separator, line_termination} = analysis - const queryService = useQueryService() - const dataTypes = queryService.mySql8DataTypes() this.columns = Object.entries(columns) - .map(([key, val]) => { + .map(([name, analyse]) => { return { - name: key, - type: val, - null_allowed: true, + name: name, + type: analyse.type, + null_allowed: analyse.null_allowed, primary_key: false, - size: dataTypes.filter(d => d.value === val).length > 0 ? dataTypes.filter(d => d.value === val)[0].defaultSize : null, - d: dataTypes.filter(d => d.value === val).length > 0 ? dataTypes.filter(d => d.value === val)[0].defaultD : null, - enums: [], - sets: [] + size: analyse.size, + d: analyse.d, + enums: analyse.enums, + sets: analyse.sets } }) this.suggestedAnalyseSeparator = separator @@ -520,7 +528,17 @@ export default { this.step = 3 const toast = useToastInstance() toast.success(this.$t('success.analyse.dataset')) - this.$emit('analyse', {columns: this.columns, filename, line_termination}) + this.$emit('analyse', { + columns: this.columns, + filename, + line_termination, + separator: this.tableImport.separator, + skip_lines: this.tableImport.skip_lines, + quote: this.tableImport.quote, + null_element: this.tableImport.null_element, + true_element: this.tableImport.true_element, + false_element: this.tableImport.false_element + }) this.loading = false }) .catch(({code, message}) => { diff --git a/dbrepo-ui/components/table/TableSchema.vue b/dbrepo-ui/components/table/TableSchema.vue index 51f186dd4a861850c36c4c9443ae73193c172b9d..25c4f66cb55993ff6e5f482b4bf63749021ec155 100644 --- a/dbrepo-ui/components/table/TableSchema.vue +++ b/dbrepo-ui/components/table/TableSchema.vue @@ -147,7 +147,8 @@ @click="removeColumn(idx)" /> </v-col> </v-row> - <v-row dense> + <v-row + dense> <v-col> <v-btn size="small" @@ -158,6 +159,16 @@ @click="addColumn()" /> </v-col> </v-row> + <v-row + v-if="showPrimaryKeyWarning"> + <v-col md="8"> + <v-alert + border="start" + color="warning"> + {{ $t('pages.table.subpages.import.schema.primary.warn') }} + </v-alert> + </v-col> + </v-row> <v-row> <v-col> <v-btn @@ -216,9 +227,6 @@ export default { database () { return this.cacheStore.getDatabase }, - needsSequence () { - return this.columns.filter(c => c.primary_key).length === 0 - }, dateFormats () { if (!this.database || !('container' in this.database) || !('image' in this.database.container) || !('date_formats' in this.database.container.image)) { return [] @@ -232,6 +240,9 @@ export default { buttonVariant () { const runtimeConfig = useRuntimeConfig() return this.$vuetify.theme.global.name.toLowerCase().endsWith('contrast') ? runtimeConfig.public.variant.button.contrast : runtimeConfig.public.variant.button.normal + }, + showPrimaryKeyWarning () { + return this.columns.filter(c => c.primary_key).length === 0 } }, watch: { @@ -273,9 +284,6 @@ export default { if (idx > 0) { return true } - if (this.needsSequence) { - return true - } if (this.columns[0].primary_key) { return false } diff --git a/dbrepo-ui/locales/de-AT.json b/dbrepo-ui/locales/de-AT.json index 4d9e25b36e9a38a3e87c8176ec04751811c63528..3cf6c0dfc11e86a397c605c76c7a316fa30d5e8f 100644 --- a/dbrepo-ui/locales/de-AT.json +++ b/dbrepo-ui/locales/de-AT.json @@ -324,6 +324,7 @@ "subpages": { "import": { "title": "Erstellen Sie eine Tabelle aus einem CSV-/TSV-Datensatz", + "text": "Importieren", "metadata": { "title": "Tabellenmetadaten" }, @@ -404,6 +405,9 @@ }, "analyse": { "text": "Hochladen und analysieren" + }, + "upload": { + "text": "Hochladen und importieren" } }, "create": { @@ -571,7 +575,7 @@ "database": { "title": "Datenbank", "image": { - "title": "Teaser-Bild", + "title": "Datenbankbild", "alt": "Datenbanklogo/Standardbild" }, "name": { diff --git a/dbrepo-ui/locales/en-US.json b/dbrepo-ui/locales/en-US.json index 935023300e5c1fd86934d6187605ea1c1b3b2078..cf8dcfd03e8df91fd4ba737c4c3662dec41402f0 100644 --- a/dbrepo-ui/locales/en-US.json +++ b/dbrepo-ui/locales/en-US.json @@ -324,12 +324,16 @@ "subpages": { "import": { "title": "Create table from .csv/.tsv dataset", + "text": "Import", "metadata": { "title": "Table Metadata" }, "schema": { "title": "Dataset Structure", - "text": "the table schema manually." + "text": "the table schema manually.", + "primary": { + "warn": "No primary key column(s) selected. Please select a column that uniquely identifies data entries." + } }, "dataset": { "title": "Import Dataset", @@ -404,6 +408,9 @@ }, "analyse": { "text": "Upload & Analyse" + }, + "upload": { + "text": "Upload & Import" } }, "create": { @@ -571,7 +578,7 @@ "database": { "title": "Database", "image": { - "title": "Teaser Image", + "title": "Database Image", "alt": "Database logo/default image" }, "name": { diff --git a/dbrepo-ui/nuxt.config.ts b/dbrepo-ui/nuxt.config.ts index 14a6c3034dbb36e756d0fe58c58a88e6d7edd8dc..454cadf9090d23f92198c63e033ba7061221a29c 100644 --- a/dbrepo-ui/nuxt.config.ts +++ b/dbrepo-ui/nuxt.config.ts @@ -85,8 +85,8 @@ export default defineNuxtConfig({ database: { unsupported: '*,AVG,BIT_AND,BIT_OR,BIT_XOR,COUNT,COUNTDISTINCT,GROUP_CONCAT,JSON_ARRAYAGG,JSON_OBJECTAGG,MAX,MIN,STD,STDDEV,STDDEV_POP,STDDEV_SAMP,SUM,VARIANCE,VAR_POP,VAR_SAMP,--', image: { - width: 400, - height: 400 + width: 200, + height: 200 }, extra: '' }, @@ -167,4 +167,4 @@ export default defineNuxtConfig({ devtools: { enabled: true }, compatibilityDate: '2024-07-24' -}) \ No newline at end of file +}) diff --git a/dbrepo-ui/pages/database/[database_id]/settings.vue b/dbrepo-ui/pages/database/[database_id]/settings.vue index b7cca1b7aa02c4367c064a9033fc37b2416a2c08..61e9dc62449b48cba17d77b4bb717e8164c9f257 100644 --- a/dbrepo-ui/pages/database/[database_id]/settings.vue +++ b/dbrepo-ui/pages/database/[database_id]/settings.vue @@ -366,10 +366,13 @@ export default { return this.roles.includes('modify-database-image') }, databaseImage () { - if (!this.file) { + if (this.file) { + return URL.createObjectURL(this.file) + } + if (!this.database || !this.database.image) { return null } - return URL.createObjectURL(this.file[0]) + return `data:image/webp;base64,${this.database.image}` }, maxWidth () { return this.$config.public.database.image.width diff --git a/dbrepo-ui/pages/database/[database_id]/table/[table_id]/import.vue b/dbrepo-ui/pages/database/[database_id]/table/[table_id]/import.vue index 38c9be5567046b7b7d97b4941aa4f8bb5170daff..9da3e1c9fd9774ca61be0be90af9a7050d6fba3c 100644 --- a/dbrepo-ui/pages/database/[database_id]/table/[table_id]/import.vue +++ b/dbrepo-ui/pages/database/[database_id]/table/[table_id]/import.vue @@ -19,6 +19,7 @@ vertical variant="flat"> <TableImport + :create="false" :table-id="$route.params.table_id" /> </v-stepper> </v-card-text> diff --git a/dbrepo-ui/pages/database/[database_id]/table/create/dataset.vue b/dbrepo-ui/pages/database/[database_id]/table/create/dataset.vue index 9a68b5786f286b5a8123c71669aac75307c2782f..c3b5a38c7ab8f95892ecf496325d8365dd5f1fcb 100644 --- a/dbrepo-ui/pages/database/[database_id]/table/create/dataset.vue +++ b/dbrepo-ui/pages/database/[database_id]/table/create/dataset.vue @@ -356,21 +356,31 @@ export default { this.loading = false }) }, - onAnalyse({columns, filename, line_termination}) { + onAnalyse({columns, filename, line_termination, separator, skip_lines, quote, null_element, true_element, false_element}) { console.debug('analysed', columns) this.tableCreate.columns = columns this.tableImport.location = filename this.tableImport.line_termination = line_termination + this.tableImport.separator = separator + this.tableImport.skip_lines = skip_lines + this.tableImport.quote = quote + this.tableImport.null_element = null_element + this.tableImport.true_element = true_element + this.tableImport.false_element = false_element if (filename) { this.step = 4 } }, async onImport () { this.loadingImport = true + const cacheStore = useCacheStore() + cacheStore.reloadDatabase() await this.$router.push({ path: `/database/${this.$route.params.database_id}/table/${this.table.id}/import`, query: this.tableImport }) }, async onContinue () { this.loadingContinue = true + const cacheStore = useCacheStore() + cacheStore.reloadDatabase() await this.$router.push(`/database/${this.$route.params.database_id}/table/${this.table.id}/data`) } } diff --git a/docker-compose.yml b/docker-compose.yml index 3c78d4d9e9db11765aa5ea47b7f4c781c9ca1743..3ffd0e04a33d4e09e73b4cb8b8a56dfa487283a3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -492,10 +492,10 @@ services: BROKER_EXCHANGE_NAME: ${BROKER_EXCHANGE_NAME:-dbrepo} BROKER_QUEUE_NAME: ${BROKER_QUEUE_NAME:-dbrepo} BROKER_HOST: "${BROKER_ENDPOINT:-broker-service}" - BROKER_PASSWORD: ${SYSTEM_PASSWORD:-admin} + BROKER_PASSWORD: "${SYSTEM_PASSWORD:-admin}" BROKER_PORT: ${BROKER_PORT:-5672} BROKER_SERVICE_ENDPOINT: ${BROKER_SERVICE_ENDPOINT:-http://gateway-service/admin/broker} - BROKER_USERNAME: ${SYSTEM_USERNAME:-admin} + BROKER_USERNAME: "${SYSTEM_USERNAME:-admin}" BROKER_VIRTUALHOST: "${BROKER_VIRTUALHOST:-dbrepo}" CONNECTION_TIMEOUT: ${CONNECTION_TIMEOUT:-60000} EXCHANGE_NAME: ${EXCHANGE_NAME:-dbrepo} diff --git a/lib/python/dbrepo/RestClient.py b/lib/python/dbrepo/RestClient.py index 85c55613ef0eb5259b37d72393677476c980f94f..6be85146a97763e32b95a6272c9758dff46d4d77 100644 --- a/lib/python/dbrepo/RestClient.py +++ b/lib/python/dbrepo/RestClient.py @@ -1103,7 +1103,7 @@ class RestClient: f'201 (CREATED): {response.text}') def import_table_data(self, database_id: int, table_id: int, file_name_or_data_frame: str | DataFrame, - separator: str = None, quote: str = None, skip_lines: int = 0, + separator: str = ",", quote: str = "\"", skip_lines: int = 0, false_encoding: str = None, true_encoding: str = None, null_encoding: str = None, line_encoding: str = "\n") -> None: """ diff --git a/lib/python/pyproject.toml b/lib/python/pyproject.toml index 99d20acc30f636a2089ec0379c7247827dd6a32f..8c89061ce82d02b398c4c45f14a5b43efa64ac15 100644 --- a/lib/python/pyproject.toml +++ b/lib/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dbrepo" -version = "1.4.5" +version = "1.4.6" description = "DBRepo Python Library" keywords = [ "DBRepo", diff --git a/lib/python/setup.py b/lib/python/setup.py index 8785f71036fb3b888b278b48c3be9cff68f85eff..34c44d115b6169e64f356b8d9593a65b59ab2bb1 100644 --- a/lib/python/setup.py +++ b/lib/python/setup.py @@ -2,7 +2,7 @@ from distutils.core import setup setup(name="dbrepo", - version="1.4.5", + version="1.4.6", description="A library for communicating with DBRepo", url="https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.5/", author="Martin Weise", diff --git a/make/dev.mk b/make/dev.mk index fa3c71a537aec28528036d7a891a79336e78e4ca..2ddc6f07d083af9bae197acb2a813b04bc29c3ea 100644 --- a/make/dev.mk +++ b/make/dev.mk @@ -19,4 +19,4 @@ package-config: ## Package the config files cp ./dbrepo-broker-service/advanced.config ./.docker/config cp ./dbrepo-storage-service/s3_config.json ./.docker/config cp ./dbrepo-gateway-service/dbrepo.conf ./.docker/config - cd ./.docker && tar czfv ./dist.tar.gz ./docker-compose.yml ./.env ./config + cd ./.docker && tar czf ./dist.tar.gz ./docker-compose.yml ./.env ./config