From 61fd323b1a158a6a01fd44da9074b068e7c59f30 Mon Sep 17 00:00:00 2001 From: Martin Weise <martin.weise@tuwien.ac.at> Date: Wed, 13 Sep 2023 19:57:32 +0200 Subject: [PATCH] Fixed the index generation on startup - Add more stability as index is generated on database creation - Schema is fixed, any schema conflict will be detected --- dbrepo-search-db/Dockerfile | 19 ++ dbrepo-search-db/create-indices.sh | 16 ++ dbrepo-search-db/docker-entrypoint.sh | 3 + dbrepo-search-db/healthcheck.sh | 8 + dbrepo-search-db/indices/column.json | 112 +++++++++++ dbrepo-search-db/indices/concept.json | 34 ++++ dbrepo-search-db/indices/database.json | 53 +++++ dbrepo-search-db/indices/identifier.json | 235 +++++++++++++++++++++++ dbrepo-search-db/indices/table.json | 142 ++++++++++++++ dbrepo-search-db/indices/unit.json | 29 +++ dbrepo-search-db/indices/user.json | 40 ++++ dbrepo-search-db/indices/view.json | 74 +++++++ docker-compose.yml | 6 +- 13 files changed, 768 insertions(+), 3 deletions(-) create mode 100644 dbrepo-search-db/Dockerfile create mode 100644 dbrepo-search-db/create-indices.sh create mode 100644 dbrepo-search-db/docker-entrypoint.sh create mode 100644 dbrepo-search-db/healthcheck.sh create mode 100644 dbrepo-search-db/indices/column.json create mode 100644 dbrepo-search-db/indices/concept.json create mode 100644 dbrepo-search-db/indices/database.json create mode 100644 dbrepo-search-db/indices/identifier.json create mode 100644 dbrepo-search-db/indices/table.json create mode 100644 dbrepo-search-db/indices/unit.json create mode 100644 dbrepo-search-db/indices/user.json create mode 100644 dbrepo-search-db/indices/view.json diff --git a/dbrepo-search-db/Dockerfile b/dbrepo-search-db/Dockerfile new file mode 100644 index 0000000000..ca9423191b --- /dev/null +++ b/dbrepo-search-db/Dockerfile @@ -0,0 +1,19 @@ +FROM opensearchproject/opensearch:2.8.0 as runtime + +USER root + +RUN yum install -y jq + +USER opensearch + +WORKDIR /app + +COPY ./limits.conf /etc/security/limits.conf + +COPY ./indices/*.json ./indices + +COPY ./create-indices.sh ./create-indices.sh +COPY ./docker-entrypoint.sh ./docker-entrypoint.sh +COPY ./healthcheck.sh ./healthcheck.sh + +ENTRYPOINT [ "bash", "./docker-entrypoint.sh" ] \ No newline at end of file diff --git a/dbrepo-search-db/create-indices.sh b/dbrepo-search-db/create-indices.sh new file mode 100644 index 0000000000..57e4ef7adb --- /dev/null +++ b/dbrepo-search-db/create-indices.sh @@ -0,0 +1,16 @@ +#!/bin/bash +until curl -sSL -o /dev/null 127.0.0.1:9200/_cat/indices 2>&1 +do + echo "[create-indices.sh] OpenSearch not yet ready" + sleep 5 +done +echo "[create-indices.sh] OpenSearch ready" +for index in "user" "view" "database" "identifier" "concept" "column" "table" "unit"; do + RES=$(curl -sSL -X PUT "127.0.0.1:9200/$index" -H "Content-Type: application/json" --data "@indices/$index.json") + ACK=$(echo "$RES" | jq .acknowledged) + if [ $ACK ]; then + echo "[create-indices.sh] Created $index index" + else + echo "[create-indices.sh] Failed to create $index index: $RES" + fi +done \ No newline at end of file diff --git a/dbrepo-search-db/docker-entrypoint.sh b/dbrepo-search-db/docker-entrypoint.sh new file mode 100644 index 0000000000..174fd3292f --- /dev/null +++ b/dbrepo-search-db/docker-entrypoint.sh @@ -0,0 +1,3 @@ +#!/bin/bash +bash /app/create-indices.sh & +bash /usr/share/opensearch/opensearch-docker-entrypoint.sh \ No newline at end of file diff --git a/dbrepo-search-db/healthcheck.sh b/dbrepo-search-db/healthcheck.sh new file mode 100644 index 0000000000..6f35d6cc29 --- /dev/null +++ b/dbrepo-search-db/healthcheck.sh @@ -0,0 +1,8 @@ +#!/bin/bash +INDICES=$(curl -sSL http://localhost:9200/_cat/indices | awk '{ if ($6) { print $3 } }') +for index in "user" "view" "database" "identifier" "concept" "column" "table" "unit"; do + if [ ! $(echo $INDICES | grep $index) ]; then + echo "[healtcheck.sh] Index $index does not exist" + exit 1 + fi +done \ No newline at end of file diff --git a/dbrepo-search-db/indices/column.json b/dbrepo-search-db/indices/column.json new file mode 100644 index 0000000000..f0654c92e7 --- /dev/null +++ b/dbrepo-search-db/indices/column.json @@ -0,0 +1,112 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "auto_generated": { + "type": "boolean" + }, + "column_type": { + "type": "keyword" + }, + "concept": { + "type": "nested", + "include_in_parent": true, + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "description": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + } + } + }, + "d": { + "type": "integer" + }, + "database_id": { + "type": "keyword" + }, + "date_format": { + "type": "nested", + "include_in_parent": true, + "properties": { + "created_at": { + "type": "date", + "format": "date_optional_time||epoch_millis" + } + } + }, + "enums": { + "type": "nested" + }, + "id": { + "type": "keyword" + }, + "index_length": { + "type": "integer" + }, + "internal_name": { + "type": "keyword" + }, + "is_null_allowed": { + "type": "boolean" + }, + "is_primary_key": { + "type": "boolean" + }, + "is_public": { + "type": "boolean" + }, + "name": { + "type": "keyword" + }, + "sets": { + "type": "nested" + }, + "size": { + "type": "integer" + }, + "table_id": { + "type": "keyword" + }, + "unit": { + "type": "nested", + "include_in_parent": true, + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "description": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + } + } + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/concept.json b/dbrepo-search-db/indices/concept.json new file mode 100644 index 0000000000..8564e7ba77 --- /dev/null +++ b/dbrepo-search-db/indices/concept.json @@ -0,0 +1,34 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "_class": { + "type": "keyword", + "index": false, + "doc_values": false + }, + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "description": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/database.json b/dbrepo-search-db/indices/database.json new file mode 100644 index 0000000000..c363ce12fc --- /dev/null +++ b/dbrepo-search-db/indices/database.json @@ -0,0 +1,53 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "container": { + "type": "nested", + "include_in_parent": true, + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + } + } + }, + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "creator": { + "type": "nested", + "include_in_parent": true + }, + "description": { + "type": "text" + }, + "exchange_name": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "internal_name": { + "type": "keyword" + }, + "is_public": { + "type": "boolean" + }, + "name": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "include_in_parent": true + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/identifier.json b/dbrepo-search-db/indices/identifier.json new file mode 100644 index 0000000000..2d2390992a --- /dev/null +++ b/dbrepo-search-db/indices/identifier.json @@ -0,0 +1,235 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "creator": { + "type": "nested", + "properties": { + "attributes": { + "type": "nested", + "include_in_parent": true, + "properties": { + "affiliation": { + "type": "keyword" + }, + "orcid": { + "type": "keyword" + } + } + }, + "firstname": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "lastname": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "username": { + "type": "keyword" + } + } + }, + "creators": { + "type": "nested", + "include_in_parent": true, + "properties": { + "affiliation": { + "type": "keyword" + }, + "affiliation_identifier": { + "type": "keyword" + }, + "affiliation_identifier_scheme": { + "type": "keyword" + }, + "affiliation_identifier_scheme_uri": { + "type": "keyword" + }, + "creator_name": { + "type": "text" + }, + "firstname": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "lastname": { + "type": "text" + }, + "name_identifier": { + "type": "keyword" + }, + "name_identifier_scheme": { + "type": "keyword" + }, + "name_identifier_scheme_uri": { + "type": "keyword" + }, + "name_type": { + "type": "keyword" + } + } + }, + "database_id": { + "type": "keyword" + }, + "descriptions": { + "type": "nested", + "include_in_parent": true, + "properties": { + "description": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "language": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } + }, + "doi": { + "type": "keyword" + }, + "execution": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "funders": { + "type": "nested", + "include_in_parent": true, + "properties": { + "award_number": { + "type": "keyword" + }, + "award_title": { + "type": "keyword" + }, + "funder_identifier": { + "type": "keyword" + }, + "funder_identifier_type": { + "type": "keyword" + }, + "funder_name": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "scheme_uri": { + "type": "keyword" + } + } + }, + "id": { + "type": "keyword" + }, + "language": { + "type": "keyword" + }, + "licenses": { + "type": "nested", + "include_in_parent": true + }, + "publication_day": { + "type": "integer" + }, + "publication_month": { + "type": "integer" + }, + "publication_year": { + "type": "integer" + }, + "publisher": { + "type": "text" + }, + "query": { + "type": "text" + }, + "query_hash": { + "type": "text" + }, + "query_id": { + "type": "keyword" + }, + "query_normalized": { + "type": "text" + }, + "related_identifiers": { + "type": "nested", + "include_in_parent": true, + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "id": { + "type": "keyword" + }, + "relation": { + "type": "keyword" + }, + "type": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "result_hash": { + "type": "text" + }, + "result_number": { + "type": "long" + }, + "titles": { + "type": "nested", + "include_in_parent": true, + "properties": { + "id": { + "type": "keyword" + }, + "language": { + "type": "keyword" + }, + "title": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } + }, + "type": { + "type": "keyword" + }, + "view_id": { + "type": "keyword" + }, + "visibility": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/table.json b/dbrepo-search-db/indices/table.json new file mode 100644 index 0000000000..fac3d74c76 --- /dev/null +++ b/dbrepo-search-db/indices/table.json @@ -0,0 +1,142 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "constraints": { + "type": "nested", + "include_in_parent": true, + "properties": { + "checks": { + "type": "nested" + }, + "foreign_keys": { + "type": "nested", + "include_in_parent": true, + "properties": { + "_class": { + "type": "keyword", + "index": false, + "doc_values": false + }, + "on_delete": { + "type": "keyword" + }, + "on_update": { + "type": "keyword" + } + } + }, + "uniques": { + "type": "nested", + "include_in_parent": true, + "properties": { + "id": { + "type": "keyword" + } + } + } + } + }, + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "creator": { + "type": "nested", + "include_in_parent": true, + "properties": { + "attributes": { + "type": "nested", + "include_in_parent": true, + "properties": { + "affiliation": { + "type": "keyword" + }, + "orcid": { + "type": "keyword" + } + } + }, + "firstname": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "lastname": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "username": { + "type": "keyword" + } + } + }, + "description": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "internal_name": { + "type": "keyword" + }, + "is_public": { + "type": "boolean" + }, + "is_versioned": { + "type": "boolean" + }, + "name": { + "type": "keyword" + }, + "owner": { + "type": "nested", + "include_in_parent": true, + "properties": { + "attributes": { + "type": "nested", + "include_in_parent": true, + "properties": { + "affiliation": { + "type": "keyword" + }, + "orcid": { + "type": "keyword" + } + } + }, + "firstname": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "lastname": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "username": { + "type": "keyword" + } + } + }, + "queue_name": { + "type": "keyword" + }, + "routing_key": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/unit.json b/dbrepo-search-db/indices/unit.json new file mode 100644 index 0000000000..aeb66c7eaa --- /dev/null +++ b/dbrepo-search-db/indices/unit.json @@ -0,0 +1,29 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "description": { + "type": "text" + }, + "id": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "uri": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/user.json b/dbrepo-search-db/indices/user.json new file mode 100644 index 0000000000..687518d52a --- /dev/null +++ b/dbrepo-search-db/indices/user.json @@ -0,0 +1,40 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "attributes": { + "type": "nested", + "include_in_parent": true, + "properties": { + "affiliation": { + "type": "keyword" + }, + "orcid": { + "type": "keyword" + } + } + }, + "firstname": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "lastname": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "username": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/dbrepo-search-db/indices/view.json b/dbrepo-search-db/indices/view.json new file mode 100644 index 0000000000..9483af5c1e --- /dev/null +++ b/dbrepo-search-db/indices/view.json @@ -0,0 +1,74 @@ +{ + "aliases": {}, + "mappings": { + "properties": { + "created": { + "type": "date", + "format": "date_optional_time||epoch_millis" + }, + "creator": { + "type": "nested", + "include_in_parent": true, + "properties": { + "attributes": { + "type": "nested", + "include_in_parent": true, + "properties": { + "affiliation": { + "type": "keyword" + }, + "orcid": { + "type": "keyword" + } + } + }, + "firstname": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "lastname": { + "type": "keyword" + }, + "name": { + "type": "keyword" + }, + "username": { + "type": "keyword" + } + } + }, + "database_id": { + "type": "keyword" + }, + "id": { + "type": "keyword" + }, + "initial_view": { + "type": "boolean" + }, + "internal_name": { + "type": "keyword" + }, + "is_public": { + "type": "boolean" + }, + "name": { + "type": "keyword" + }, + "query": { + "type": "text" + }, + "query_hash": { + "type": "keyword" + } + } + }, + "settings": { + "index": { + "number_of_shards": "1", + "number_of_replicas": "1" + } + } +} \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 5230afe9e7..44c598ea0e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -212,7 +212,8 @@ services: restart: "no" container_name: dbrepo-search-db hostname: search-db - image: opensearchproject/opensearch:2.8.0 + build: ./dbrepo-search-db + image: dbrepo-search-db networks: core: ports: @@ -221,7 +222,7 @@ services: env_file: - .env healthcheck: - test: curl -s localhost:9200/_cat/indices || exit 1 + test: bash ./healthcheck.sh interval: 10s timeout: 5s retries: 12 @@ -236,7 +237,6 @@ services: memory: 4G volumes: - search-db-data:/usr/share/elasticsearch/data - - ./dbrepo-search-db/limits.conf:/etc/security/limits.conf logging: driver: json-file -- GitLab