diff --git a/dbrepo-search-service/app/api/routes.py b/dbrepo-search-service/app/api/routes.py index 21afad4e76eff81662b282504c11114279712283..f666637037a2cf5289387de1801194d733d7bffd 100644 --- a/dbrepo-search-service/app/api/routes.py +++ b/dbrepo-search-service/app/api/routes.py @@ -158,6 +158,9 @@ def search(): t2 = req_body.get("t2") if not str(t2).isdigit(): t2 = None - fieldValuePairs = req_body.get("field_value_pairs") - response = general_search(search_term, t1, t2, fieldValuePairs) + field_value_pairs = req_body.get("field_value_pairs") + if t1 is not None and t2 is not None and "unit.uri" in field_value_pairs and "concept.uri" in field_value_pairs: + response = unit_independent_search(t1, t2, field_value_pairs) + else: + response = general_search(search_term, t1, t2, field_value_pairs) return response, 200 diff --git a/dbrepo-search-service/app/opensearch_client.py b/dbrepo-search-service/app/opensearch_client.py index 9423e0379f59c45834a9e1950a7d9c4ea869e9b9..68d8e247b1b3ca6e2be90cffa2103222a4557dd4 100644 --- a/dbrepo-search-service/app/opensearch_client.py +++ b/dbrepo-search-service/app/opensearch_client.py @@ -1,6 +1,7 @@ """ The opensearch_client.py is used by the different API endpoints in routes.py to handle requests to the opensearch db """ +import json import logging import re from flask import current_app @@ -123,42 +124,9 @@ def general_search(search_term=None, t1=None, t2=None, fieldValuePairs=None): :param value: the value the specified field should match :return: """ + logging.info(f"Performing general search") searchable_indices = ["database", "user", "table", "column", "identifier", "view", "concept", "unit"] index = searchable_indices - # field_list = [ - # "id", - # "internal_name", - # "table.name", - # "database.is_public", - # "database.container.image.name", - # "database.container.image.version", - # "table.description", - # "identifier.titles.title", - # "identifier.descriptions.description", - # "identifier.publisher", - # "identifier.creators.*.firstname", - # "identifier.creators.*.lastname", - # "identifier.creators.*.creator_name", - # "column.column_type", - # "column.is_null_allowed", - # "column.is_primary_key", - # "unit.uri", - # "unit.name", - # "unit.description", - # "concept.uri", - # "concept.name", - # "concept.description", - # "funders", - # "title", - # "description", - # "creator.username", - # "author", - # "name", - # "uri", - # "database.*", - # "internal_name", - # "is_public", - # ] queries = [] if search_term is not None: logging.debug('query has search_term present') @@ -195,6 +163,7 @@ def general_search(search_term=None, t1=None, t2=None, fieldValuePairs=None): is_range_query = True logging.debug(f"query has start value {t1} and end value {t2} present") for key, value in fieldValuePairs.items(): + logging.debug(f"current key={key}, value={value}") if key == "type" and value in searchable_indices: logging.debug("search for specific index: %s", value) index = value @@ -231,7 +200,8 @@ def general_search(search_term=None, t1=None, t2=None, fieldValuePairs=None): } }) elif is_range_query and re.match(f"unit\.", key): - logging.debug(f"omit key={key} because query type=full range and key is somewhat unit") + logging.debug( + f"omit key={key} because query type=full range and key is somewhat unit") logging.info(f"add match-query for range [{t1},{t2}]") musts.append({ "range": { @@ -249,7 +219,7 @@ def general_search(search_term=None, t1=None, t2=None, fieldValuePairs=None): }) else: precision = "90%" - if key in ["attributes.orcid", "creators.name_identifier"]: + if key in ["attributes.orcid", "creators.name_identifier", "concept.uri"]: precision = "100%" logging.debug(f"key {key} needs precision of 100%") musts.append({ @@ -261,36 +231,6 @@ def general_search(search_term=None, t1=None, t2=None, fieldValuePairs=None): queries.append(specific_query) body = { "query": {"bool": {"must": queries}} - # "_source": [ - # "_class", - # "id", - # "table_id", - # "database_id", - # "name", - # "identifier.*", - # "column_type", - # "description", - # "titles", - # "descriptions", - # "funders", - # "licenses", - # "creators", - # "visibility", - # "title", - # "type", - # "uri", - # "username", - # "is_public", - # "created", - # "_score", - # "concept", - # "unit", - # "author", - # "docID", - # "creator.*", - # "owner.*", - # "details.*", - # ], } logging.debug('search index: %s', index) logging.debug('search body: %s', body) @@ -301,3 +241,100 @@ def general_search(search_term=None, t1=None, t2=None, fieldValuePairs=None): response["status"] = 200 # response = [hit["_source"] for hit in response["hits"]["hits"]] return response + + +def flatten(mylist): + return [item for sublist in mylist for item in sublist] + + +def unit_independent_search(t1=None, t2=None, field_value_pairs=None): + """ + Main method for seaching stuff in the opensearch db + + all parameters are optional + + :param t1: start value + :param t2: end value + :param field_value_pairs: the key-value pairs + :return: + """ + logging.info(f"Performing unit-independent search") + searches = [] + response = current_app.opensearch_client.search( + index="column", + body={ + "size": 0, + "aggs": { + "units": { + "terms": {"field": "unit.uri", "size": 500} + } + } + } + ) + unit_uris = [hit["key"] for hit in response["aggregations"]["units"]["buckets"]] + logging.debug(f"found {len(unit_uris)} unit(s) in column index") + for unit_uri in unit_uris: + gte = t1 + lte = t2 + if unit_uri != field_value_pairs["unit.uri"]: + gte = -100 + lte = 100 + logging.debug(f"converted original range [{t1},{t2}] -> mapped range [{gte},{lte}] for unit_uri={unit_uri}") + searches.append({'index': 'column'}) + searches.append({ + "query": { + "bool": { + "must": [ + { + "match": { + "concept.uri": { + "query": field_value_pairs["concept.uri"] + } + } + }, + { + "range": { + "val_min": { + "gte": gte + } + } + }, + { + "range": { + "val_max": { + "lte": lte + } + } + }, + { + "match": { + "unit.uri": { + "query": unit_uri + } + } + } + ] + } + } + }) + # searches.append({'index': 'column'}) + # searches.append({ + # "query": { + # "match_all": {} + # } + # }) + logging.debug('searches: %s', searches) + body = '' + for search in searches: + body += '%s \n' % json.dumps(search) + responses = current_app.opensearch_client.msearch( + body=body + ) + response = { + "hits": { + "hits": flatten([hits["hits"]["hits"] for hits in responses["responses"]]) + }, + "took": responses["took"], + "status": 200 + } + return response diff --git a/dbrepo-ui/api/search.service.js b/dbrepo-ui/api/search.service.js index faca4dd92a5576350c8d88e692d3e3a8c2b92202..c531256ee6d678beb258ae944350182ce2cce6aa 100644 --- a/dbrepo-ui/api/search.service.js +++ b/dbrepo-ui/api/search.service.js @@ -21,16 +21,19 @@ class SearchService { search (searchData) { // transform values to what the search API expects - const searchTerm = searchData.search_term - delete searchData.search_term - const t1 = searchData.t1 - const t2 = searchData.t2 - searchData = Object.fromEntries(Object.entries(searchData).filter(([_, v]) => v != null && v !== '')) // https://stackoverflow.com/questions/286141/remove-blank-attributes-from-an-object-in-javascript + let localSearchData = Object.assign({}, searchData) + const searchTerm = localSearchData.search_term + delete localSearchData.search_term + const t1 = localSearchData.t1 + delete localSearchData.t1 + const t2 = localSearchData.t2 + delete localSearchData.t2 + localSearchData = Object.fromEntries(Object.entries(localSearchData).filter(([_, v]) => v != null && v !== '')) // https://stackoverflow.com/questions/286141/remove-blank-attributes-from-an-object-in-javascript const payload = { t1, t2, search_term: searchTerm, - field_value_pairs: { ...searchData } + field_value_pairs: { ...localSearchData } } return new Promise((resolve, reject) => { axios.post('/api/search', payload, { headers: { Accept: 'application/json' } })