Skip to content
Snippets Groups Projects
Commit 772a1e6f authored by Martin Weise's avatar Martin Weise
Browse files

Merge branch 'master' into 'release-1.4.5'

Master

See merge request !309
parents d9327bf4 8805712e
No related branches found
No related tags found
1 merge request!309Master
Showing
with 466 additions and 426 deletions
# general
BASE_URL=https://example.com
ADMIN_EMAIL=support@example.com
BASE_URL=http://localhost
ADMIN_EMAIL=support@localhost
# password for the identity service admin user
IDENTITY_SERVICE_ADMIN_PASSWORD=admin
# password for the auth service admin user
......@@ -9,7 +9,7 @@ AUTH_SERVICE_ADMIN_PASSWORD=admin
METADATA_DB_PASSWORD=dbrepo
DATA_DB_PASSWORD=dbrepo
AUTH_DB_PASSWORD=dbrepo
SEARCH_DB_PASSWORD=dbrepo
SEARCH_DB_PASSWORD=admin
# storage service
S3_SECRET_ACCESS_KEY=seaweedfsadmin
# internal admin user, requires a change of the value of auth_ldap.dn_lookup_bind.password in dist/rabbitmq.conf
......
File added
version: "3.6"
volumes:
metadata-db-data:
data-db-data:
......@@ -18,8 +16,8 @@ services:
image: docker.io/bitnami/mariadb:11.1.3-debian-11-r6
volumes:
- metadata-db-data:/bitnami/mariadb
- ./dist/setup-schema.sql:/docker-entrypoint-initdb.d/1_setup-schema.sql
- ./dist/setup-data.sql:/docker-entrypoint-initdb.d/2_setup-data.sql
- ./config/1_setup-schema.sql:/docker-entrypoint-initdb.d/1_setup-schema.sql
- ./config/2_setup-data.sql:/docker-entrypoint-initdb.d/2_setup-data.sql
ports:
- "3306:3306"
environment:
......@@ -195,10 +193,10 @@ services:
ports:
- 5672:5672
volumes:
- ./dist/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf
- ./dist/advanced.config:/etc/rabbitmq/advanced.config
- ./dist/enabled_plugins:/etc/rabbitmq/enabled_plugins
- ./dist/definitions.json:/app/definitions.json
- ./config/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf
- ./config/advanced.config:/etc/rabbitmq/advanced.config
- ./config/enabled_plugins:/etc/rabbitmq/enabled_plugins
- ./config/definitions.json:/app/definitions.json
- broker-service-data:/bitnami/rabbitmq/mnesia
depends_on:
dbrepo-identity-service:
......@@ -300,7 +298,7 @@ services:
- "80:80"
- "443:443"
volumes:
- ./dist/dbrepo.conf:/etc/nginx/conf.d/default.conf
- ./config/dbrepo.conf:/etc/nginx/conf.d/default.conf
depends_on:
dbrepo-analyse-service:
condition: service_healthy
......@@ -363,7 +361,7 @@ services:
image: docker.io/chrislusf/seaweedfs:3.59
command: [ "server", "-dir=/data", "-s3", "-s3.port=9000", "-s3.config=/app/s3_config.json", "-metricsPort=9091" ]
volumes:
- ./dist/s3_config.json:/app/s3_config.json
- ./config/s3_config.json:/app/s3_config.json
- storage-service-data:/data
ports:
- "9000:9000"
......
......@@ -13,6 +13,7 @@ build/
tmp.yaml
.docs/.swagger/api-*
.scannerwork/
.docker/config/*
# docs
.docs/.swagger/dist/
......
This diff is collapsed.
......@@ -254,10 +254,10 @@ def analyse_datatypes():
return Response(res, mimetype="application/json"), 202
except OSError as e:
logging.error(f"Failed to determine data types: {e}")
return ApiError(status='BAD_REQUEST', message=str(e), code='analyse.csv.invalid'), 400
return ApiError(status='BAD_REQUEST', message=str(e), code='error.analyse.invalid').model_dump_json(), 400
except ClientError as e:
logging.error(f"Failed to determine separator: {e}")
return ApiError(status='NOT_FOUND', message='Failed to find csv', code='analyse.csv.missing'), 404
return ApiError(status='NOT_FOUND', message='Failed to find csv', code='error.analyse.missing').model_dump_json(), 404
@app.route("/api/analyse/keys", methods=["GET"], endpoint="analyse_analyse_keys")
......@@ -269,7 +269,7 @@ def analyse_keys():
logging.debug(f"Analyse keys from filename '{filename}' with separator {separator}")
if filename is None or separator is None:
return ApiError(status='BAD_REQUEST', message="Missing required query parameters 'filename' and 'separator'",
code='analyse.csv.invalid'), 400
code='analyse.csv.invalid').model_dump_json(), 400
try:
res = {
'keys': determine_pk(filename, separator)
......@@ -278,4 +278,4 @@ def analyse_keys():
return Response(dumps(res), mimetype="application/json"), 202
except OSError as e:
logging.error(f"Failed to determine primary key: {e}")
return ApiError(status='BAD_REQUEST', message=str(e), code='analyse.database.invalid'), 400
return ApiError(status='BAD_REQUEST', message=str(e), code='analyse.database.invalid').model_dump_json(), 400
......@@ -9,11 +9,12 @@ import pandas
from numpy import dtype, max, min
from flask import current_app
from pandas.errors import EmptyDataError
from clients.s3_client import S3Client
def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) -> {}:
def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=',') -> {}:
# Use option enum=True for searching Postgres ENUM Types in CSV file. Remark
# Enum is not SQL standard, hence, it might not be supported by all db-engines.
# However, it can be used in Postgres and MySQL.
......@@ -35,10 +36,22 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) -
line_terminator = "\r"
elif b"\r\n" in line:
line_terminator = "\r\n"
logging.info("Analysing corpus with separator: %s", separator)
logging.info(f"Analysing corpus with separator: {separator}")
# index_col=False -> prevent shared index & count length correct
df = pandas.read_csv(fh, delimiter=separator, nrows=100, lineterminator=line_terminator, index_col=False)
df = None
for encoding in ['utf-8', 'cp1252', 'latin1', 'iso-8859-1']:
try:
logging.debug(f"attempt parsing .csv using encoding {encoding}")
df = pandas.read_csv(fh, delimiter=separator, nrows=100, lineterminator=line_terminator,
index_col=False, encoding=encoding)
logging.debug(f"parsing .csv using encoding {encoding} was successful")
break
except (UnicodeDecodeError, EmptyDataError) as error:
logging.warning(f"Failed to parse .csv using encoding {encoding}: {error}")
if df is None:
raise IOError(
f"Failed to parse .csv: no supported encoding found (one of: utf-8, cp1252, latin1, iso-8859-1)")
if b"," in line:
separator = ","
......@@ -51,31 +64,44 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) -
for name, dataType in df.dtypes.items():
if dataType == dtype('float64'):
if pandas.to_numeric(df[name], errors='coerce').notnull().all():
logging.debug(f"mapped column {name} from float64 to decimal")
r[name] = 'decimal'
else:
logging.debug(f"mapped column {name} from float64 to text")
r[name] = 'text'
elif dataType == dtype('int64'):
min_val = min(df[name])
max_val = max(df[name])
if 0 <= min_val <= 1 and 0 <= max_val <= 1:
logging.debug(f"mapped column {name} from int64 to bool")
r[name] = 'bool'
continue
logging.debug(f"mapped column {name} from int64 to bigint")
r[name] = 'bigint'
elif dataType == dtype('O'):
try:
pandas.to_datetime(df[name], format='mixed')
logging.debug(f"mapped column {name} from O to timestamp")
r[name] = 'timestamp'
continue
except ValueError:
pass
max_size = max(df[name].astype(str).map(len))
if max_size <= 1:
logging.debug(f"mapped column {name} from O to char")
r[name] = 'char'
if 0 <= max_size <= 255:
logging.debug(f"mapped column {name} from O to varchar")
r[name] = 'varchar'
else:
logging.debug(f"mapped column {name} from O to text")
r[name] = 'text'
elif dataType == dtype('bool'):
logging.debug(f"mapped column {name} from bool to bool")
r[name] = 'bool'
elif dataType == dtype('datetime64'):
logging.debug(f"mapped column {name} from datetime64 to datetime")
r[name] = 'datetime'
else:
logging.warning(f'default to \'text\' for column {name} and type {dtype}')
......
......@@ -8,7 +8,7 @@ from determine_dt import determine_datatypes
from clients.s3_client import S3Client
def determine_pk(filename, separator=","):
def determine_pk(filename: str, separator: str = ','):
dt = json.loads(determine_datatypes(filename=filename, separator=separator))
dt = {k.lower(): v for k, v in dt["columns"].items()}
# {k.lower(): v for k, v in dt['columns'].items() if v != 'Numeric'}
......
File deleted
File deleted
......@@ -261,12 +261,12 @@ public class SubsetEndpoint {
QueryNotFoundException, StorageUnavailableException, QueryMalformedException, SidecarExportException,
StorageNotFoundException, QueryStoreInsertException, TableMalformedException, PaginationException,
QueryNotSupportedException, NotAllowedException, UserNotFoundException, MetadataServiceException {
log.debug("endpoint create subset in database, databaseId={}, data.statement={}, principal.name={}, page={}, " +
"size={}, timestamp={}", databaseId, data.getStatement(), principal.getName(), page, size, timestamp);
log.debug("endpoint create subset in database, databaseId={}, data.statement={}, principal.name={}, " +
"page={}, size={}, timestamp={}", databaseId, data.getStatement(), principal.getName(), page, size,
timestamp);
/* check */
endpointValidator.validateDataParams(page, size);
endpointValidator.validateForbiddenStatements(data.getStatement());
metadataServiceGateway.getAccess(databaseId, UserUtil.getId(principal));
/* parameters */
if (page == null) {
page = 0L;
......
......@@ -646,7 +646,7 @@ public class TableEndpoint {
mediaType = "application/json",
schema = @Schema(implementation = ApiErrorDto.class))}),
@ApiResponse(responseCode = "404",
description = "Failed to find table in metadata database",
description = "Failed to find table or database in metadata database",
content = {@Content(
mediaType = "application/json",
schema = @Schema(implementation = ApiErrorDto.class))}),
......@@ -659,9 +659,10 @@ public class TableEndpoint {
public ResponseEntity<TableStatisticDto> statistic(@NotBlank @PathVariable("databaseId") Long databaseId,
@NotBlank @PathVariable("tableId") Long tableId)
throws DatabaseUnavailableException, RemoteUnavailableException, TableNotFoundException,
MetadataServiceException, TableMalformedException, QueryMalformedException {
MetadataServiceException, TableMalformedException, DatabaseNotFoundException {
log.debug("endpoint generate table statistic, databaseId={}, tableId={}", databaseId, tableId);
final PrivilegedTableDto table = metadataServiceGateway.getTableById(databaseId, tableId);
table.setDatabase(metadataServiceGateway.getDatabaseById(databaseId));
try {
final TableStatisticDto dto = tableService.getStatistics(table);
return ResponseEntity.ok(dto);
......
......@@ -167,7 +167,7 @@ public class SubsetEndpointUnitTest extends AbstractUnitTest {
@Test
@WithMockUser(username = USER_1_USERNAME, authorities = {"execute-query"})
public void create_succeeds() throws UserNotFoundException, QueryStoreInsertException, TableMalformedException,
public void create_noAccess_succeeds() throws UserNotFoundException, QueryStoreInsertException, TableMalformedException,
NotAllowedException, SidecarExportException, QueryNotSupportedException, PaginationException,
StorageNotFoundException, DatabaseUnavailableException, StorageUnavailableException,
QueryMalformedException, QueryNotFoundException, DatabaseNotFoundException, RemoteUnavailableException,
......@@ -177,8 +177,6 @@ public class SubsetEndpointUnitTest extends AbstractUnitTest {
.build();
/* mock */
when(metadataServiceGateway.getAccess(DATABASE_3_ID, USER_1_ID))
.thenReturn(DATABASE_3_USER_1_READ_ACCESS_DTO);
when(metadataServiceGateway.getDatabaseById(DATABASE_3_ID))
.thenReturn(DATABASE_3_PRIVILEGED_DTO);
when(queryService.execute(eq(DATABASE_3_PRIVILEGED_DTO), anyString(), any(Instant.class), eq(USER_1_ID), eq(0L), eq(10L), eq(null), eq(null)))
......@@ -213,8 +211,6 @@ public class SubsetEndpointUnitTest extends AbstractUnitTest {
.build();
/* mock */
when(metadataServiceGateway.getAccess(DATABASE_3_ID, USER_1_ID))
.thenReturn(DATABASE_3_USER_1_READ_ACCESS_DTO);
when(metadataServiceGateway.getDatabaseById(DATABASE_3_ID))
.thenReturn(DATABASE_3_PRIVILEGED_DTO);
when(queryService.execute(eq(DATABASE_3_PRIVILEGED_DTO), anyString(), any(Instant.class), eq(USER_1_ID), eq(0L), eq(10L), eq(null), eq(null)))
......@@ -226,15 +222,13 @@ public class SubsetEndpointUnitTest extends AbstractUnitTest {
@Test
@WithMockUser(username = USER_1_USERNAME, authorities = {"execute-query"})
public void create_databaseNotFound_fails() throws NotAllowedException, RemoteUnavailableException,
public void create_databaseNotFound_fails() throws RemoteUnavailableException,
DatabaseNotFoundException, MetadataServiceException {
final ExecuteStatementDto request = ExecuteStatementDto.builder()
.statement(QUERY_5_STATEMENT)
.build();
/* mock */
when(metadataServiceGateway.getAccess(DATABASE_3_ID, USER_1_ID))
.thenReturn(DATABASE_3_USER_1_READ_ACCESS_DTO);
doThrow(DatabaseNotFoundException.class)
.when(metadataServiceGateway)
.getDatabaseById(DATABASE_3_ID);
......@@ -258,24 +252,6 @@ public class SubsetEndpointUnitTest extends AbstractUnitTest {
});
}
@Test
@WithMockUser(username = USER_4_USERNAME, authorities = {"execute-query"})
public void create_noAccess_fails() throws NotAllowedException, RemoteUnavailableException, MetadataServiceException {
final ExecuteStatementDto request = ExecuteStatementDto.builder()
.statement(QUERY_5_STATEMENT)
.build();
/* mock */
doThrow(NotAllowedException.class)
.when(metadataServiceGateway)
.getAccess(DATABASE_3_ID, USER_4_ID);
/* test */
assertThrows(NotAllowedException.class, () -> {
subsetEndpoint.create(DATABASE_3_ID, request, USER_4_PRINCIPAL, null, null, null);
});
}
@Test
public void getData_succeeds() throws DatabaseNotFoundException, RemoteUnavailableException, UserNotFoundException,
NotAllowedException, SQLException, QueryNotFoundException, TableMalformedException, QueryMalformedException,
......
......@@ -451,7 +451,7 @@ public class TableServiceIntegrationTest extends AbstractUnitTest {
}
@Test
public void getStatistics_succeeds() throws TableMalformedException, SQLException, QueryMalformedException {
public void getStatistics_succeeds() throws TableMalformedException, SQLException, TableNotFoundException {
/* test */
final TableStatisticDto response = tableService.getStatistics(TABLE_1_PRIVILEGED_DTO);
......@@ -493,13 +493,8 @@ public class TableServiceIntegrationTest extends AbstractUnitTest {
@Test
public void create_malformed_fails() {
final at.tuwien.api.database.table.internal.TableCreateDto request = TableCreateDto.builder()
.needSequence(false)
.name("missing_foreign_key")
.columns(List.of(ColumnCreateDto.builder()
.name("id")
.type(ColumnTypeDto.BIGINT)
.nullAllowed(false)
.build()))
.columns(List.of())
.constraints(ConstraintsCreateDto.builder()
.foreignKeys(List.of(ForeignKeyCreateDto.builder()
.columns(List.of("i_do_not_exist"))
......
......@@ -41,9 +41,17 @@ public class KeycloakGatewayImpl implements KeycloakGateway {
final String url = keycloakConfig.getKeycloakEndpoint() + "/realms/dbrepo/protocol/openid-connect/token";
log.trace("request user token from url: {}", url);
log.trace("request username: {}", username);
log.trace("request password: {}", password != null ? "(set)" : "(not set)");
if (password.isEmpty() || password.isBlank()) {
log.warn("request password: (empty)");
} else {
log.trace("request password: (set)");
}
log.trace("request client_id: {}", keycloakConfig.getKeycloakClient());
log.trace("request client_secret: {}", keycloakConfig.getKeycloakClientSecret());
if (keycloakConfig.getKeycloakClientSecret().isEmpty() || keycloakConfig.getKeycloakClientSecret().isBlank()) {
log.warn("request client_secret: (empty)");
} else {
log.trace("request client_secret: (set)");
}
final ResponseEntity<TokenDto> response;
try {
response = new RestTemplate()
......
......@@ -313,6 +313,7 @@ public class MetadataServiceGatewayImpl implements MetadataServiceGateway {
RemoteUnavailableException {
final ResponseEntity<Void> response;
final String url = "/api/database/" + databaseId + "/table/" + tableId;
log.trace("mapped url: {}", url);
try {
response = restTemplate.exchange(url, HttpMethod.PUT, HttpEntity.EMPTY, Void.class);
} catch (ResourceAccessException | HttpServerErrorException e) {
......
......@@ -234,9 +234,7 @@ public interface MariaDbMapper {
/* data type */
.append(columnTypeDtoToDataType(column))
/* null expressions */
.append(column.getNullAllowed() != null && column.getNullAllowed() ? " NULL" : " NOT NULL")
/* default expressions */
.append(data.getNeedSequence() && column.getName().equals("id") ? " DEFAULT NEXTVAL(`" + tableCreateDtoToSequenceName(data) + "`)" : "");
.append(column.getNullAllowed() != null && column.getNullAllowed() ? " NULL" : " NOT NULL");
if (column.getDescription() != null && !column.getDescription().isEmpty()) {
/* comments */
stringBuilder.append(" COMMENT \"")
......@@ -565,7 +563,7 @@ public interface MariaDbMapper {
final int[] jdx = new int[]{0};
data.getKeys()
.forEach((key, value) -> {
statement.append(jdx[0] == 0 ? "" : ", ")
statement.append(jdx[0] == 0 ? "" : " AND ")
.append("`")
.append(key)
.append("`");
......
......@@ -32,10 +32,10 @@ public interface TableService {
* @return The table statistic, if successful.
* @throws SQLException Failed to parse SQL query, contains invalid syntax.
* @throws TableMalformedException The table statistic generation was unsuccessful, likely due to a bug in the mapping.
* @throws QueryMalformedException The inspection query is malformed.
* @throws TableNotFoundException The table could not be inspected in the data database.
*/
TableStatisticDto getStatistics(PrivilegedTableDto table) throws SQLException, TableMalformedException,
QueryMalformedException;
TableNotFoundException;
/**
* Finds a table with given data database and table name.
......
......@@ -86,7 +86,7 @@ public class TableServiceMariaDbImpl extends HibernateConnector implements Table
@Override
public TableStatisticDto getStatistics(PrivilegedTableDto table) throws SQLException, TableMalformedException,
QueryMalformedException {
TableNotFoundException {
final ComboPooledDataSource dataSource = getPrivilegedDataSource(table.getDatabase());
final Connection connection = dataSource.getConnection();
final TableStatisticDto statistic;
......@@ -95,7 +95,11 @@ public class TableServiceMariaDbImpl extends HibernateConnector implements Table
final ResultSet resultSet = connection.prepareStatement(mariaDbMapper.tableColumnStatisticsSelectRawQuery(table.getColumns(), table.getInternalName()))
.executeQuery();
statistic = dataMapper.resultSetToTableStatistic(resultSet);
statistic.setRows(getCount(table, null));
final TableDto tmpTable = schemaService.inspectTable(table.getDatabase(), table.getInternalName());
statistic.setAvgRowLength(tmpTable.getAvgRowLength());
statistic.setDataLength(tmpTable.getDataLength());
statistic.setMaxDataLength(tmpTable.getMaxDataLength());
statistic.setRows(tmpTable.getNumRows());
} catch (SQLException e) {
connection.rollback();
log.error("Failed to obtain column statistics: {}", e.getMessage());
......@@ -107,7 +111,8 @@ public class TableServiceMariaDbImpl extends HibernateConnector implements Table
.stream()
.filter(column -> !MariaDbUtil.numericDataTypes.contains(column.getColumnType()))
.forEach(column -> statistic.getColumns().put(column.getInternalName(), new ColumnStatisticDto()));
log.info("Obtained column statistics for table: {}", table.getInternalName());
log.info("Obtained statistics for the table and {} column(s)", statistic.getColumns().size());
log.trace("obtained statistics: {}", statistic);
return statistic;
}
......@@ -123,12 +128,6 @@ public class TableServiceMariaDbImpl extends HibernateConnector implements Table
final ComboPooledDataSource dataSource = getPrivilegedDataSource(database);
final Connection connection = dataSource.getConnection();
try {
if (data.getNeedSequence()) {
/* create table sequence if not exists */
connection.prepareStatement(mariaDbMapper.tableCreateDtoToCreateSequenceRawQuery(data))
.execute();
log.info("Created sequence as primary key");
}
/* create table if not exists */
connection.prepareStatement(mariaDbMapper.tableCreateDtoToCreateTableRawQuery(data))
.execute();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment