Skip to content
Snippets Groups Projects
Verified Commit e4da7c76 authored by Martin Weise's avatar Martin Weise
Browse files

Merge branch 'hotfix/helm-data-db' into release-1.4.6

parents 1f936504 2d6ef65b
No related branches found
No related tags found
No related merge requests found
Showing
with 813 additions and 726 deletions
......@@ -2,7 +2,7 @@
author: Martin Weise
---
## v1.4.7 (???)
## v1.4.7 (2024-10-21)
[:simple-gitlab: GitLab Release](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/tags/v1.4.7)
......@@ -19,12 +19,19 @@ author: Martin Weise
#### Changes
* Allow anonymous users to create subsets for public databases
in [#449](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/issues/449).
* Show file upload progress
in [#448](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/issues/448).
* Change the Docker image of the Auth Service to Bitnami-maintained similar to Kubernetes deployment with accompanying
Auth Database change to PostgreSQL
in [#455](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/issues/455)
#### Fixes
* Multiple UI errors in [#453](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/issues/453).
* Fixed install script.sh
in [#444](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services/-/issues/444)
* No hardcoded data type metadata in UI but instead added it hardcoded (associated with `image_id`) Metadata Database.
## v1.4.6 (2024-10-11)
......
......@@ -260,10 +260,37 @@ test-search-service:
- build-search-service
dependencies:
- build-search-service
before_script:
- "cp -r ./dbrepo-search-service/init/clients ./dbrepo-search-service/clients"
- "cp -r ./dbrepo-search-service/init/omlib ./dbrepo-search-service/omlib"
script:
- "pip install pipenv"
- "pipenv install gunicorn && pipenv install --dev --system --deploy"
- cd ./dbrepo-search-service/ && coverage run -m pytest test/test_app.py test/test_jwt.py test/test_opensearch_client.py test/test_keycloak_client.py --junitxml=report.xml && coverage html && coverage report > ./coverage.txt
- "cat ./coverage.txt | grep -o 'TOTAL[^%]*%'"
artifacts:
when: always
paths:
- ./dbrepo-search-service/report.xml
- ./dbrepo-search-service/coverage.txt
expire_in: 1 days
reports:
junit: ./dbrepo-search-service/report.xml
coverage: '/TOTAL.*?([0-9]{1,3})%/'
test-search-service-init:
image: docker.io/python:3.11-alpine
stage: test
variables:
PIPENV_PIPFILE: "./dbrepo-search-service/init/Pipfile"
needs:
- build-search-service
dependencies:
- build-search-service
script:
- "pip install pipenv"
- "pipenv install gunicorn && pipenv install --dev --system --deploy"
- cd ./dbrepo-search-service/ && coverage run -m pytest test/test_opensearch_client.py --junitxml=report.xml && coverage html --omit="test/*,omlib/*" && coverage report --omit="test/*,omlib/*" > ./coverage.txt
- cd ./dbrepo-search-service/init/ && coverage run -m pytest test/test_app.py --junitxml=report.xml && coverage html && coverage report > ./coverage.txt
- "cat ./coverage.txt | grep -o 'TOTAL[^%]*%'"
artifacts:
when: always
......
This diff is collapsed.
No preview for this file type
......@@ -22,10 +22,7 @@ import at.tuwien.gateway.DataDatabaseSidecarGateway;
import at.tuwien.gateway.MetadataServiceGateway;
import at.tuwien.test.AbstractUnitTest;
import lombok.extern.log4j.Log4j2;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.extension.ExtendWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
......@@ -507,6 +504,7 @@ public class TableServiceIntegrationTest extends AbstractUnitTest {
}
@Test
@Disabled("Not stable CI/CD")
public void getStatistics_succeeds() throws TableMalformedException, SQLException, TableNotFoundException {
/* test */
......
[report]
omit =
*/test/*
*/omlib/*
*/init/*
\ No newline at end of file
......@@ -9,6 +9,8 @@ __pycache__/
# Generated
coverage.txt
report.xml
clients/
omlib/
# Libraries
./lib/dbrepo-1.4.4*
......
......@@ -17,8 +17,8 @@ USER 1001
WORKDIR /app
COPY --chown=1001 ./clients ./clients
COPY --chown=1001 ./omlib ./omlib
COPY --chown=1001 ./init/clients ./clients
COPY --chown=1001 ./init/omlib ./omlib
COPY --chown=1001 ./os-yml ./os-yml
COPY --chown=1001 ./app.py ./app.py
COPY --chown=1001 ./friendly_names_overrides.json ./friendly_names_overrides.json
......
......@@ -24,6 +24,7 @@ gunicorn = "*"
[dev-packages]
coverage = "*"
pytest = "*"
requests-mock = "*"
[requires]
python_version = "3.11"
This diff is collapsed.
......@@ -2,6 +2,7 @@ import math
import os
import logging
from ast import literal_eval
from json import dumps
from typing import List, Any
import requests
......@@ -10,6 +11,7 @@ from flasgger import LazyJSONEncoder, Swagger, swag_from
from flask import Flask, request
from flask_cors import CORS
from flask_httpauth import HTTPTokenAuth, HTTPBasicAuth, MultiAuth
from jwt.exceptions import JWTDecodeError
from opensearchpy import TransportError, NotFoundError
from prometheus_flask_exporter import PrometheusMetrics
from pydantic import ValidationError
......@@ -206,9 +208,6 @@ app.config["OPENSEARCH_PASSWORD"] = os.getenv('OPENSEARCH_PASSWORD', 'admin')
app.json_encoder = LazyJSONEncoder
available_types = literal_eval(
os.getenv("COLLECTION", "['database','table','column','identifier','unit','concept','user','view']"))
@token_auth.verify_token
def verify_token(token: str):
......@@ -217,7 +216,7 @@ def verify_token(token: str):
try:
client = KeycloakClient()
return client.verify_jwt(access_token=token)
except AssertionError:
except JWTDecodeError as error:
return False
......@@ -268,8 +267,7 @@ def general_filter(index, results):
"view": ["id", "name", "creator", " created"],
}
if index not in important_keys.keys():
error_msg = "the keys to be returned to the user for your index aren't specified in the important Keys dict"
raise KeyError(error_msg)
raise KeyError(f"Failed to find index {index} in: {important_keys.keys()}")
for result in results:
result_keys_copy = tuple(result.keys())
for key in result_keys_copy:
......@@ -294,10 +292,8 @@ def get_index(index: str):
:return: list of the results
"""
logging.info(f'Searching for index: {index}')
if index not in available_types:
return ApiError(status='NOT_FOUND', message='Failed to find index',
code='search.index.missing').model_dump(), 404
results = OpenSearchClient().query_index_by_term_opensearch("*", "contains")
try:
results = general_filter(index, results)
results_per_page = min(request.args.get("results_per_page", 50, type=int), 500)
......@@ -305,24 +301,28 @@ def get_index(index: str):
page = min(request.args.get("page", 1, type=int), max_pages)
results = results[(results_per_page * (page - 1)): (results_per_page * page)]
return dict({"results": results}), 200
except KeyError:
return ApiError(status='NOT_FOUND', message=f'Failed to find get index: {index}',
code='search.index.missing').model_dump(), 404
@app.route("/api/search/<string:type>/fields", methods=["GET"], endpoint="search_get_index_fields")
@app.route("/api/search/<string:field_type>/fields", methods=["GET"], endpoint="search_get_index_fields")
@metrics.gauge(name='dbrepo_search_type_list', description='Time needed to list search types')
@swag_from("os-yml/get_fields.yml")
def get_fields(type: str):
def get_fields(field_type: str):
"""
returns a list of attributes of the data for a specific index.
:param type: The search type
:param field_type: The search type
:return:
"""
logging.info(f'Searching in index database for type: {type}')
if type not in available_types:
return ApiError(status='NOT_FOUND', message='Failed to find type',
code='search.type.missing').model_dump(), 404
fields = OpenSearchClient().get_fields_for_index(type)
logging.debug(f'get fields for type {type} resulted in {len(fields)} field(s)')
logging.info(f'Searching in index database for type: {field_type}')
try:
fields = OpenSearchClient().get_fields_for_index(field_type)
logging.debug(f'get fields for field_type {field_type} resulted in {len(fields)} field(s)')
return fields, 200
except NotFoundError:
return ApiError(status='NOT_FOUND', message=f'Failed to find fields for search type {field_type}',
code='search.type.missing').model_dump(), 404
@app.route("/api/search", methods=["GET"], endpoint="search_fuzzy_search")
......@@ -344,10 +344,10 @@ def get_fuzzy_search():
return dict({"results": results}), 200
@app.route("/api/search/<string:type>", methods=["POST"], endpoint="search_post_general_search")
@app.route("/api/search/<string:field_type>", methods=["POST"], endpoint="search_post_general_search")
@metrics.gauge(name='dbrepo_search_type', description='Time needed to search by type')
@swag_from("os-yml/post_general_search.yml")
def post_general_search(type):
def post_general_search(field_type):
"""
Main endpoint for fuzzy searching.
:return:
......@@ -356,11 +356,7 @@ def post_general_search(type):
return ApiError(status='UNSUPPORTED_MEDIA_TYPE', message='Content type needs to be application/json',
code='search.general.media').model_dump(), 415
req_body = request.json
logging.info(f'Searching in index database for type: {type}')
logging.debug(f"search request body: {req_body}")
if type is not None and type not in available_types:
return ApiError(status='NOT_FOUND', message=f'Type {type} is not in collection: {available_types}',
code='search.general.missing').model_dump(), 404
logging.info(f'Searching in index database for type: {field_type}')
t1 = request.args.get("t1")
if not str(t1).isdigit():
t1 = None
......@@ -370,9 +366,9 @@ def post_general_search(type):
if t1 is not None and t2 is not None and "unit.uri" in req_body and "concept.uri" in req_body:
response = OpenSearchClient().unit_independent_search(t1, t2, req_body)
else:
response = OpenSearchClient().general_search(type, req_body)
response = OpenSearchClient().general_search(field_type, req_body)
# filter by type
if type == 'table':
if field_type == 'table':
tmp = []
for database in response:
if database["tables"] is not None:
......@@ -380,7 +376,7 @@ def post_general_search(type):
table["is_public"] = database["is_public"]
tmp.append(table)
response = tmp
if type == 'identifier':
if field_type == 'identifier':
tmp = []
for database in response:
if database["identifiers"] is not None:
......@@ -398,30 +394,30 @@ def post_general_search(type):
if 'identifier' in view:
tmp.append(view['identifier'])
response = tmp
elif type == 'column':
elif field_type == 'column':
response = [x for xs in response for x in xs["tables"]]
for table in response:
for column in table["columns"]:
column["table_id"] = table["id"]
column["database_id"] = table["database_id"]
response = [x for xs in response for x in xs["columns"]]
elif type == 'concept':
elif field_type == 'concept':
tmp = []
tables = [x for xs in response for x in xs["tables"]]
for column in [x for xs in tables for x in xs["columns"]]:
if 'concept' in column and column["concept"] is not None:
tmp.append(column["concept"])
response = tmp
elif type == 'unit':
elif field_type == 'unit':
tmp = []
tables = [x for xs in response for x in xs["tables"]]
for column in [x for xs in tables for x in xs["columns"]]:
if 'unit' in column and column["unit"] is not None:
tmp.append(column["unit"])
response = tmp
elif type == 'view':
elif field_type == 'view':
response = [x for xs in response for x in xs["views"]]
return dict({'results': response, 'type': type}), 200
return dict({'results': response, 'type': field_type}), 200
@app.route("/api/search/database/<int:database_id>", methods=["PUT"], endpoint="search_put_database")
......@@ -436,16 +432,9 @@ def update_database(database_id: int) -> Database | ApiError:
logging.error(f"Failed to validate: {e}")
return ApiError(status='BAD_REQUEST', message=f'Malformed payload: {e}',
code='search.general.missing').model_dump(), 400
try:
database = OpenSearchClient().update_database(database_id, payload)
logging.info(f"Updated database with id : {database_id}")
return database.model_dump(), 202
except NotFoundError:
return ApiError(status='NOT_FOUND', message='Failed to find database',
code='search.database.missing').model_dump(), 404
except TransportError:
return ApiError(status='BAD_REQUEST', message='Failed to update database',
code='search.database.invalid').model_dump(), 400
@app.route("/api/search/database/<int:database_id>", methods=["DELETE"], endpoint="database_delete_database")
......@@ -455,7 +444,7 @@ def update_database(database_id: int) -> Database | ApiError:
def delete_database(database_id: int):
try:
OpenSearchClient().delete_database(database_id)
return None, 202
return dumps({}), 202
except NotFoundError:
return ApiError(status='NOT_FOUND', message='Failed to find database',
code='search.database.missing').model_dump(), 404
FROM python:3.11-alpine
LABEL org.opencontainers.image.authors="martin.weise@tuwien.ac.at"
RUN apk add bash curl
RUN apk add --no-cache curl bash jq
WORKDIR /home/alpine
......@@ -16,6 +17,8 @@ USER 1001
WORKDIR /app
COPY --chown=1001 ./clients ./clients
COPY --chown=1001 ./omlib ./omlib
COPY --chown=1001 ./app.py ./app.py
COPY --chown=1001 ./database.json ./database.json
......
......@@ -9,7 +9,8 @@ opensearch-py = "~=2.2"
python-dotenv = "~=1.0"
testcontainers-opensearch = "*"
pytest = "*"
dbrepo = {path = "./lib/dbrepo-1.4.6.tar.gz"}
dbrepo = {path = "./lib/dbrepo-1.4.7.tar.gz"}
rdflib = "*"
[dev-packages]
coverage = "*"
......
This diff is collapsed.
......@@ -5,3 +5,7 @@ Responsible for:
* Creating `database` index if not existing
* Importing database(s) from the Metadata Database
* Exit
## Development
Open in `./dbrepo-search-service` directory (depends on `clients` package).
\ No newline at end of file
......@@ -6,7 +6,6 @@ from typing import List
import opensearchpy.exceptions
from dbrepo.RestClient import RestClient
from logging.config import dictConfig
from pathlib import Path
from dbrepo.api.dto import Database
from opensearchpy import OpenSearch
......@@ -68,9 +67,6 @@ class App:
logging.debug(f"create instance {self.search_host}:{self.search_port}")
return self.search_instance
def index_exists(self):
return self._instance().indices.exists(index="database")
def database_exists(self, database_id: int):
try:
self._instance().get(index="database", id=database_id)
......@@ -78,28 +74,13 @@ class App:
except opensearchpy.exceptions.NotFoundError:
return False
def index_update(self, is_created: bool) -> bool:
"""
:param is_created:
:return: True if the index was updated
"""
if is_created:
logging.debug(f"index 'database' does not exist, creating...")
def index_update(self) -> None:
if self._instance().indices.exists(index="database"):
logging.debug(f"index 'database' exists, removing...")
self._instance().indices.delete(index="database")
with open('./database.json', 'r') as f:
self._instance().indices.create(index="database", body=json.load(f))
logging.info(f"Created index 'database'")
return True
mapping = dict(self._instance().indices.get_mapping(index="database"))
identifier_props = mapping["database"]["mappings"]["properties"]["identifiers"]["properties"]
if "status" in identifier_props:
logging.debug(f"found mapping database.identifiers.status: detected current mapping")
return False
logging.debug(f"index 'database' exists, updating mapping...")
with open('./database.json', 'r') as f:
self._instance().indices.put_mapping(index="database", body=json.load(f))
logging.info(f"Updated index 'database'")
return True
def fetch_databases(self) -> List[Database]:
logging.debug(f"fetching database from endpoint: {self.metadata_service_endpoint}")
......@@ -126,7 +107,6 @@ class App:
if __name__ == "__main__":
app = App()
create = not app.index_exists()
update = app.index_update(is_created=create)
update = app.index_update()
app.save_databases(databases=app.fetch_databases())
logging.info("Finished. Exiting.")
"""
The opensearch_client.py is used by the different API endpoints in routes.py to handle requests to the opensearch db
"""
import os
from json import dumps, load
import logging
from dbrepo.api.dto import Database
from flask import current_app
from collections.abc import MutableMapping
from opensearchpy import OpenSearch, TransportError, RequestError
from opensearchpy import OpenSearch, TransportError, RequestError, NotFoundError
from omlib.measure import om
from omlib.constants import OM_IDS
......@@ -26,11 +26,11 @@ class OpenSearchClient:
password: str = None
instance: OpenSearch = None
def __init__(self):
self.host = current_app.config["OPENSEARCH_HOST"]
self.port = int(current_app.config["OPENSEARCH_PORT"])
self.username = current_app.config["OPENSEARCH_USERNAME"]
self.password = current_app.config["OPENSEARCH_PASSWORD"]
def __init__(self, host: str = None, port: int = None, username: str = None, password: str = None):
self.host = os.getenv('OPENSEARCH_HOST', host)
self.port = int(os.getenv('OPENSEARCH_PORT', port))
self.username = os.getenv('OPENSEARCH_USERNAME', username)
self.password = os.getenv('OPENSEARCH_PASSWORD', password)
def _instance(self) -> OpenSearch:
"""
......@@ -42,7 +42,6 @@ class OpenSearchClient:
self.instance = OpenSearch(hosts=[{"host": self.host, "port": self.port}],
http_compress=True,
http_auth=(self.username, self.password))
logging.debug(f"create instance {self.host}:{self.port}")
return self.instance
def get_database(self, database_id: int) -> Database:
......@@ -68,16 +67,8 @@ class OpenSearchClient:
@throws: opensearchpy.exceptions.NotFoundError If the database was not found in the Search Database.
"""
logging.debug(f"updating database with id: {database_id} in search database")
try:
self._instance().index(index="database", id=database_id, body=dumps(data.model_dump()))
except RequestError as e:
logging.error(f"Failed to update in search database: {e.info}")
raise e
try:
response: dict = self._instance().get(index="database", id=database_id)
except TransportError as e:
logging.error(f"Failed to get updated database in search database: {e.status_code}")
raise e
database = Database.parse_obj(response["_source"])
logging.info(f"Updated database with id {database_id} in index 'database'")
return database
......@@ -119,10 +110,10 @@ class OpenSearchClient:
results = [hit["_source"] for hit in response["hits"]["hits"]]
return results
def get_fields_for_index(self, type: str):
def get_fields_for_index(self, field_type: str):
"""
returns a list of attributes of the data for a specific index.
:param type: The search type
:param field_type: The search type
:return: list of fields
"""
fields = {
......@@ -135,8 +126,10 @@ class OpenSearchClient:
"view": "views.*",
"user": "creator.*",
}
logging.debug(f'requesting field(s) {fields[type]} for filter: {type}')
fields = self._instance().indices.get_field_mapping(fields[type])
if field_type not in fields.keys():
raise NotFoundError(f"Failed to find field type: {field_type}")
logging.debug(f'requesting field(s) {fields[field_type]} for filter: {field_type}')
fields = self._instance().indices.get_field_mapping(fields[field_type])
fields_list = []
fd = flatten_dict(fields)
for key in fd.keys():
......@@ -170,13 +163,13 @@ class OpenSearchClient:
logging.info(f"Found {len(response['hits']['hits'])} result(s)")
return response
def general_search(self, type: str = None, field_value_pairs: dict = None):
def general_search(self, field_type: str = None, field_value_pairs: dict = None):
"""
Main method for searching stuff in the opensearch db
all parameters are optional
:param type: The index to be searched. Optional.
:param field_type: The index to be searched. Optional.
:param field_value_pairs: The key-value pair of properties that need to match. Optional.
:return: The object of results and HTTP status code. e.g. { "hits": { "hits": [] } }, 200
"""
......@@ -205,7 +198,7 @@ class OpenSearchClient:
body = {
"query": {"bool": {"must": musts}}
}
logging.debug(f'search in index database for type: {type}')
logging.debug(f'search in index database for type: {field_type}')
logging.debug(f'search body: {dumps(body)}')
response = self._instance().search(
index="database",
......@@ -214,12 +207,10 @@ class OpenSearchClient:
results = [hit["_source"] for hit in response["hits"]["hits"]]
return results
def unit_independent_search(self, t1=None, t2=None, field_value_pairs=None):
def unit_independent_search(self, t1: float, t2: float, field_value_pairs):
"""
Main method for searching stuff in the opensearch db
all parameters are optional
:param t1: start value
:param t2: end value
:param field_value_pairs: the key-value pairs
......@@ -241,6 +232,8 @@ class OpenSearchClient:
)
unit_uris = [hit["key"] for hit in response["aggregations"]["units"]["buckets"]]
logging.debug(f"found {len(unit_uris)} unit(s) in column index")
if len(unit_uris) == 0:
raise NotFoundError("Failed to search: no unit assigned")
base_unit = unit_uri_to_unit(field_value_pairs["unit.uri"])
for unit_uri in unit_uris:
gte = t1
......
......@@ -125,40 +125,6 @@
},
"image": {
"properties": {
"date_formats": {
"properties": {
"created_at": {
"type": "date"
},
"database_format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"has_time": {
"type": "boolean"
},
"id": {
"type": "long"
},
"unix_format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"default_port": {
"type": "long"
},
"dialect": {
"type": "text",
"fields": {
......@@ -639,37 +605,6 @@
"database_id": {
"type": "long"
},
"date_format": {
"properties": {
"created_at": {
"type": "date"
},
"database_format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"has_time": {
"type": "boolean"
},
"id": {
"type": "long"
},
"unix_format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "long"
},
......@@ -959,37 +894,6 @@
"database_id": {
"type": "long"
},
"date_format": {
"properties": {
"created_at": {
"type": "date"
},
"database_format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"has_time": {
"type": "boolean"
},
"id": {
"type": "long"
},
"unix_format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"id": {
"type": "long"
},
......
File deleted
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment