diff --git a/dbrepo-somapper/app.py b/dbrepo-somapper/app.py index 6f98ff15058aeb8ddd4cd4faf524cdbd7322d985..fa0d4d6dcd07b9b6e2800298c0b1b7d5da6b79ce 100644 --- a/dbrepo-somapper/app.py +++ b/dbrepo-somapper/app.py @@ -1,3 +1,5 @@ +import logging +from logging.config import dictConfig from flask import Flask, request from flask_cors import CORS from flask_caching import Cache @@ -5,6 +7,32 @@ import services.so_matching_service as so_matching_service import services.initialize_service as initialize_service import settings as settings +logging.addLevelName(level=logging.NOTSET, levelName='TRACE') +logging.basicConfig(level=logging.DEBUG) + +# logging configuration +dictConfig({ + 'version': 1, + 'formatters': { + 'default': { + 'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s', + }, + 'simple': { + 'format': '[%(asctime)s] %(levelname)s: %(message)s', + }, + }, + 'handlers': {'wsgi': { + 'class': 'logging.StreamHandler', + 'stream': 'ext://flask.logging.wsgi_errors_stream', + 'formatter': 'simple' # default + }}, + 'root': { + 'level': 'DEBUG', + 'handlers': ['wsgi'] + } +}) + + # API app = Flask(__name__) cors = CORS(app) @@ -13,17 +41,17 @@ cache = Cache(app) initialize_service.initialize_cache(cache, settings.flask_config["CACHE_DIR"]) -@app.route('/match', methods=['POST']) +@app.route('/api/semantic/match', methods=['POST']) def match_object(): data = request.get_json() schema_name = data['schema_name'] schema_columns = data['schema_columns'] schema_column_datatypes = data['schema_column_datatypes'] ignore_columns_for_unit = data['ignore_columns_for_unit'] - + return so_matching_service.object_unit(schema_name, schema_columns, schema_column_datatypes, ignore_columns_for_unit=ignore_columns_for_unit) -@app.route('/match/object/specific', methods=['GET']) +@app.route('/api/semantic/match/object/specific', methods=['GET']) def match_object_specific(): schema_id = request.values.get('schema_id') source_filter = request.values.get('source_filter') @@ -31,22 +59,22 @@ def match_object_specific(): return so_matching_service.specific_object(schema_id, source_filter, target_filter) -@app.route('/match/unit/specific', methods=['GET']) +@app.route('/api/semantic/match/unit/specific', methods=['GET']) def match_unit_specific(): schema_id = request.values.get('schema_id') source_filter = request.values.get('source_filter') target_filter = request.values.get('target_filter') return so_matching_service.specific_unit(schema_id, source_filter, target_filter) -@app.route('/influence/indirect', methods=['POST']) +@app.route('/api/semantic/influence/indirect', methods=['POST']) def indirect_influence(): data = request.get_json() schema_id = data['schema_id'] influencer_ontology = data['influencer_ontology'] exclude_sources = data['exclude_sources'] return so_matching_service.indirect(schema_id, influencer_ontology, exclude_sources=exclude_sources) - -@app.route('/influence/direct', methods=['POST']) + +@app.route('/api/semantic/influence/direct', methods=['POST']) def direct_influence(): data = request.get_json() schema_id = data['schema_id'] @@ -55,7 +83,7 @@ def direct_influence(): influencer_targets = data['influencer_targets'] return so_matching_service.direct(schema_id, sources, influencer_targets, influencer_ontology) -@app.route('/reload', methods=['POST']) +@app.route('/api/semantic/reload', methods=['POST']) def reload(): data = request.get_json() directive = data['directive'] @@ -71,7 +99,7 @@ def reload(): ignore_columns_for_unit = data['ignore_columns_for_unit'] return so_matching_service.reload(directive=directive, schema_name=schema_name, schema_columns=schema_columns, schema_column_datatypes=schema_column_datatypes, ignore_columns_for_unit=ignore_columns_for_unit, schema_id=schema_id, indirect_influencer_ontology=indirect_influencer_ontology, exclude_sources=exclude_sources, direct_influencer_ontology=direct_influencer_ontology, direct_influencer_ontology_targets=direct_influencer_ontology_targets, sources=sources) -@app.route('/add/columns', methods=['POST']) +@app.route('/api/semantic/add/columns', methods=['POST']) def add_columns(): data = request.get_json() schema_id = data['schema_id'] @@ -86,19 +114,19 @@ def add_columns(): ignore_columns_for_unit = data['ignore_columns_for_unit'] return so_matching_service.add_columns(schema_id, schema_name, new_columns, new_column_datatypes, also_indirect, also_direct, ignore_columns_for_unit=ignore_columns_for_unit, indirect_influencer_ontology=indirect_influencer_ontology, direct_influencer_ontology=direct_influencer_ontology, direct_influencer_ontology_targets=direct_influencer_ontology_targets) -@app.route('/remove/columns', methods=['POST']) +@app.route('/api/semantic/remove/columns', methods=['POST']) def remove_columns(): data = request.get_json() schema_id = data['schema_id'] schema_columns = data['schema_columns'] return so_matching_service.remove_columns(schema_id, schema_columns) -@app.route('/load/ontologies', methods=['POST']) +@app.route('/api/semantic/load/ontologies', methods=['POST']) def load_new_ontologies(): _ = request.get_json() return so_matching_service.load_new_ontologies() -@app.route('/remove/ontology', methods=['POST']) +@app.route('/api/semantic/remove/ontology', methods=['POST']) def remove_ontology(): data = request.get_json() ontology_name = data['ontology_name'] diff --git a/dbrepo-somapper/data/object_ontologies/building/building_BGEM3FlagModelDense_encode.h5 b/dbrepo-somapper/data/object_ontologies/building/building_BGEM3FlagModelDense_encode.h5 new file mode 100644 index 0000000000000000000000000000000000000000..9db57d71c216a89c03d99b7533e6071f0edb47b8 Binary files /dev/null and b/dbrepo-somapper/data/object_ontologies/building/building_BGEM3FlagModelDense_encode.h5 differ diff --git a/dbrepo-somapper/data/object_ontologies/car/car_BGEM3FlagModelDense_encode.h5 b/dbrepo-somapper/data/object_ontologies/car/car_BGEM3FlagModelDense_encode.h5 new file mode 100644 index 0000000000000000000000000000000000000000..aefffa600c031665972202a9d8180fbfe37def2b Binary files /dev/null and b/dbrepo-somapper/data/object_ontologies/car/car_BGEM3FlagModelDense_encode.h5 differ diff --git a/dbrepo-somapper/data/object_ontologies/fastfoodfacts/fastfoodfacts_BGEM3FlagModelDense_encode.h5 b/dbrepo-somapper/data/object_ontologies/fastfoodfacts/fastfoodfacts_BGEM3FlagModelDense_encode.h5 new file mode 100644 index 0000000000000000000000000000000000000000..fe45c3dddc56a67c3147e32ab1d3404db2bf6460 Binary files /dev/null and b/dbrepo-somapper/data/object_ontologies/fastfoodfacts/fastfoodfacts_BGEM3FlagModelDense_encode.h5 differ diff --git a/dbrepo-somapper/data/unit_ontologies/om2/om2_BGEM3FlagModelDense_encode_with_unit_keyword.h5 b/dbrepo-somapper/data/unit_ontologies/om2/om2_BGEM3FlagModelDense_encode_with_unit_keyword.h5 new file mode 100644 index 0000000000000000000000000000000000000000..246db65d617db9b1f2cb1c89f355ad80e321b186 Binary files /dev/null and b/dbrepo-somapper/data/unit_ontologies/om2/om2_BGEM3FlagModelDense_encode_with_unit_keyword.h5 differ diff --git a/dbrepo-somapper/data/unit_ontologies/qudt/qudt_BGEM3FlagModelDense_encode_with_unit_keyword.h5 b/dbrepo-somapper/data/unit_ontologies/qudt/qudt_BGEM3FlagModelDense_encode_with_unit_keyword.h5 new file mode 100644 index 0000000000000000000000000000000000000000..233d6ee72d84f93ef3434ec83ef7c61234655b6a Binary files /dev/null and b/dbrepo-somapper/data/unit_ontologies/qudt/qudt_BGEM3FlagModelDense_encode_with_unit_keyword.h5 differ diff --git a/dbrepo-somapper/data/unit_ontologies/ucum/ucum_BGEM3FlagModelDense_encode_with_unit_keyword.h5 b/dbrepo-somapper/data/unit_ontologies/ucum/ucum_BGEM3FlagModelDense_encode_with_unit_keyword.h5 new file mode 100644 index 0000000000000000000000000000000000000000..a4950c03c0b6449c5024776234027dd8ab55b282 Binary files /dev/null and b/dbrepo-somapper/data/unit_ontologies/ucum/ucum_BGEM3FlagModelDense_encode_with_unit_keyword.h5 differ diff --git a/dbrepo-somapper/logger.py b/dbrepo-somapper/logger.py deleted file mode 100644 index a5c4d10b1ca225e4ea0c80886001fcbc6a90420f..0000000000000000000000000000000000000000 --- a/dbrepo-somapper/logger.py +++ /dev/null @@ -1,37 +0,0 @@ -import logging -from enum import Enum - -class LoggerTypes(Enum): - LOAD_SERVICE = 1 - MATCH_SERVICE = 2 - RESULT_SERVICE = 3 - FIELD_SERVICE = 4 - CACHE_SERVICE = 5 - EMBEDDING_SERVICE = 6 - - -class Loggers(): - - formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') - exists = {} - - def create(type:LoggerTypes, level=logging.DEBUG, log_file_dictionary:str="./logs/"): - handler = logging.FileHandler(log_file_dictionary+type.name+".log", mode='w') - handler.setFormatter(Loggers.formatter) - logger = logging.getLogger(type.name) - logger.setLevel(level) - logger.addHandler(handler) - Loggers.exists[type.name] = True - return logger - - def info(type:LoggerTypes, message:str): - logging.getLogger(type.name).info(message) - - def debug(type:LoggerTypes, message:str): - logging.getLogger(type.name).debug(message) - - def error(type:LoggerTypes, message:str): - logging.getLogger(type.name).error(message) - - def check_existence(type): - return type.name in Loggers.exists.keys() and Loggers.exists[type.name] == True \ No newline at end of file diff --git a/dbrepo-somapper/matching/embeddings.py b/dbrepo-somapper/matching/embeddings.py index 984351a7a75510cfa8baf46099b488bc76f2e54d..3c0f18bfa317a36dc07dea21a689e85d0ec07acb 100644 --- a/dbrepo-somapper/matching/embeddings.py +++ b/dbrepo-somapper/matching/embeddings.py @@ -1,7 +1,7 @@ import torch from sentence_transformers import SentenceTransformer from FlagEmbedding import BGEM3FlagModel -from logger import LoggerTypes, Loggers +import logging class Embedding: # _data: {text:embedding} @@ -34,48 +34,48 @@ class Embedding: return missing_texts def add_embeddings(self, texts:[str], embeddings:[torch.Tensor]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Adding "+str(len(texts))+" texts.") + logging.debug("Adding "+str(len(texts))+" texts.") for text, embedding in zip(texts, embeddings): self._data[text] = embedding if self._empty_tensor is None: self._empty_tensor = torch.zeros(embeddings[0].size()) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Empty tensor created with size "+str(embeddings[0].size())+".") + logging.debug("Empty tensor created with size "+str(embeddings[0].size())+".") def return_embeddings(self, texts:[str], asList:bool=False) -> torch.stack: text_embeddings = [] for text in texts: try: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Trying to find text "+str(text)+" in cache.") + logging.debug("Trying to find text "+str(text)+" in cache.") text_embeddings.append(self._data[text]) except KeyError: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Text "+str(text)+" not found in cache.") - Loggers.info(LoggerTypes.LOAD_SERVICE, "Existing texts "+str(list(self._data.keys()))) + logging.debug("Text "+str(text)+" not found in cache.") + logging.debug("Existing texts "+str(list(self._data.keys()))) if self._empty_tensor is not None: text_embeddings.append(self._empty_tensor) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Empty tensor added for text "+str(text)+".") + logging.debug("Empty tensor added for text "+str(text)+".") else: - Loggers.error(LoggerTypes.LOAD_SERVICE, "No empty tensor available. There is no reference.") + logging.error("No empty tensor available. There is no reference.") raise ValueError("No empty tensor available.") else: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Text "+str(text)+" found in cache.") + logging.debug("Text "+str(text)+" found in cache.") return torch.stack(text_embeddings) if not asList else text_embeddings def return_embedding(self, text:str) -> torch.Tensor: try: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Trying to find text "+str(text)+" in cache.") + logging.debug("Trying to find text "+str(text)+" in cache.") tensor = self._data[text] except KeyError: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Text "+str(text)+" not found in cache.") - Loggers.info(LoggerTypes.LOAD_SERVICE, "Existing texts "+str(list(self._data.keys()))) + logging.debug("Text "+str(text)+" not found in cache.") + logging.debug("Existing texts "+str(list(self._data.keys()))) if self._empty_tensor is not None: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Empty tensor added for text "+str(text)+".") + logging.debug("Empty tensor added for text "+str(text)+".") tensor = self._empty_tensor else: - Loggers.error(LoggerTypes.LOAD_SERVICE, "No empty tensor available. There is no reference.") + logging.error("No empty tensor available. There is no reference.") raise ValueError("No empty tensor available.") else: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Text "+str(text)+" found in cache.") + logging.debug("Text "+str(text)+" found in cache.") return tensor def return_averaged_copy(self, source_texts:[str], target_embeddings:torch.stack, weights:[float]=None): @@ -112,7 +112,7 @@ class MiniLMEncoder(Encoder): def encode(self, embeddings:Embedding, texts:[str], **kwargs) -> Embedding: missing = embeddings.return_missing_texts(texts) if missing != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing)) + logging.debug("Missing texts "+str(missing)) embeddings.add_embeddings(missing, self.model.encode(missing, convert_to_tensor=True)) return embeddings @@ -130,7 +130,7 @@ class MiniLMEncoder(Encoder): if not found: missing_with_relations.append(text) if missing_with_relations != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing_with_relations)) + logging.debug("Missing texts "+str(missing_with_relations)) embeddings.add_embeddings(missing, self.model.encode(missing_with_relations, convert_to_tensor=True)) return embeddings @@ -138,7 +138,7 @@ class MiniLMEncoder(Encoder): missing = embeddings.return_missing_texts(texts) if missing != []: missing_with_keyword = [ms+" unit" for ms in missing] - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing_with_keyword)) + logging.debug("Missing texts "+str(missing_with_keyword)) emb = self.model.encode(missing, convert_to_tensor=True) embeddings.add_embeddings(missing, emb) return embeddings @@ -155,7 +155,7 @@ class NasaSMD(Encoder): def encode(self, embeddings:Embedding, texts:[str], **kwargs) -> Embedding: missing = embeddings.return_missing_texts(texts) if missing != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing)) + logging.debug("Missing texts "+str(missing)) embeddings.add_embeddings(missing, self.model.encode(missing, convert_to_tensor=True)) return embeddings @@ -163,7 +163,7 @@ class NasaSMD(Encoder): missing = embeddings.return_missing_texts(texts) if missing != []: missing_with_keyword = [ms+" unit" for ms in missing] - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing_with_keyword)) + logging.debug("Missing texts "+str(missing_with_keyword)) emb = self.model.encode(missing, convert_to_tensor=True) embeddings.add_embeddings(missing, emb) return embeddings @@ -181,7 +181,7 @@ class NasaSMD2(Encoder): def encode(self, embeddings:Embedding, texts:[str], **kwargs) -> Embedding: missing = embeddings.return_missing_texts(texts) if missing != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing)) + logging.debug("Missing texts "+str(missing)) embeddings.add_embeddings(missing, self.model.encode(missing, convert_to_tensor=True)) return embeddings @@ -189,7 +189,7 @@ class NasaSMD2(Encoder): missing = embeddings.return_missing_texts(texts) if missing != []: missing_with_keyword = [ms+" unit" for ms in missing] - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing_with_keyword)) + logging.debug("Missing texts "+str(missing_with_keyword)) emb = self.model.encode(missing, convert_to_tensor=True) embeddings.add_embeddings(missing, emb) return embeddings @@ -207,7 +207,7 @@ class BGEMDense(Encoder): def encode(self, embeddings:Embedding, texts:[str], **kwargs) -> Embedding: missing = embeddings.return_missing_texts(texts) if missing != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing)) + logging.debug("Missing texts "+str(missing)) emb = self.model.encode(missing,batch_size=12,max_length=512, return_dense=True, return_sparse=False, return_colbert_vecs=False)['dense_vecs'] embeddings.add_embeddings(missing, torch.from_numpy(emb).float()) return embeddings @@ -225,7 +225,7 @@ class BGEMDense(Encoder): break if not found: missing_with_relations.append(text) if missing_with_relations != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing_with_relations)) + logging.debug("Missing texts "+str(missing_with_relations)) emb = self.model.encode(missing_with_relations,batch_size=12,max_length=512, return_dense=True, return_sparse=False, return_colbert_vecs=False)['dense_vecs'] embeddings.add_embeddings(missing, torch.from_numpy(emb).float()) return embeddings @@ -234,7 +234,7 @@ class BGEMDense(Encoder): missing = embeddings.return_missing_texts(texts) if missing != []: missing_with_keyword = [ms+" unit" for ms in missing] - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing_with_keyword)) + logging.debug("Missing texts "+str(missing_with_keyword)) emb = self.model.encode(missing_with_keyword,batch_size=12,max_length=512, return_dense=True, return_sparse=False, return_colbert_vecs=False)['dense_vecs'] embeddings.add_embeddings(missing, torch.from_numpy(emb).float()) return embeddings @@ -252,7 +252,7 @@ class BGEMMultivector(Encoder): def encode(self, embeddings:Embedding, texts:[str], **kwargs) -> Embedding: missing = embeddings.return_missing_texts(texts) if missing != []: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Missing texts "+str(missing)) + logging.debug("Missing texts "+str(missing)) emb = self.model.encode(missing,batch_size=12,max_length=512, return_dense=False, return_sparse=False, return_colbert_vecs=True)['colbert_vecs'] embeddings.add_embeddings(missing, [torch.from_numpy(e).float() for e in emb]) return embeddings \ No newline at end of file diff --git a/dbrepo-somapper/matching/field.py b/dbrepo-somapper/matching/field.py index 9676a8befba33b0bdf77cd92764b2c3083bee5bc..3b4d33099d87825a2653c7effc88d6ab24934b20 100644 --- a/dbrepo-somapper/matching/field.py +++ b/dbrepo-somapper/matching/field.py @@ -3,7 +3,7 @@ from nltk.stem import WordNetLemmatizer import datetime from matching.read_inputs import SchemaData, OntologyData, OntologyClassTypes -from logger import LoggerTypes, Loggers +import logging class Field: @@ -50,7 +50,7 @@ class Field: if _filter_source in sources: available_filter_sources.append(_filter_source) else: - Loggers.error(LoggerTypes.FIELD_SERVICE, "Source "+str(_filter_source)+" not found in schema.") + logging.error("Source "+str(_filter_source)+" not found in schema.") sources = available_filter_sources if _filter_target != "": @@ -63,7 +63,7 @@ class Field: for source in sources: self._source_target_matrix[source] = targets.copy() - Loggers.info(LoggerTypes.FIELD_SERVICE, "Matrix built for sources as: "+str(sources)+" and targets as: "+str(targets)+".") + logging.debug("Matrix built for sources as: "+str(sources)+" and targets as: "+str(targets)+".") def merge_sources_of_field(self, field:"Field") -> "Field": new_columns = field._schema.return_column_names() @@ -81,7 +81,7 @@ class Field: del self._source_target_matrix[source] del self._type_constraint_matrix[source] except KeyError: - Loggers.error(LoggerTypes.FIELD_SERVICE, "Source "+str(source)+" not found in schema or type constraints.") + logging.error("Source "+str(source)+" not found in schema or type constraints.") pass self._schema.remove_column(source) return self @@ -126,7 +126,7 @@ class Field: # Finds constraints as a target oriented manner. # True if entity doesnt have a constraint. def find_type_constraints(self, apply:bool=False): - Loggers.info(LoggerTypes.FIELD_SERVICE, "Finding type constraints.") + logging.debug("Finding type constraints.") datatype_sources = {} for source in list(self._source_target_matrix.keys()): try: @@ -136,16 +136,16 @@ class Field: for data_type in list(datatype_sources.keys()): sample_source = datatype_sources[data_type][0] - Loggers.info(LoggerTypes.FIELD_SERVICE, "Checking datatype "+str(data_type)+" for sample source "+str(sample_source)+".") + logging.debug("Checking datatype "+str(data_type)+" for sample source "+str(sample_source)+".") for target in self._source_target_matrix[sample_source]: target_datatypes = self._return_target_datatypes(target) if not self._check_type_constraints(data_type, target_datatypes): - Loggers.info(LoggerTypes.FIELD_SERVICE, "Datatype "+str(data_type)+" is not compatible with "+str(target_datatypes)+" same datatype sources will be set to false:" +str(datatype_sources[data_type])+".") + logging.debug("Datatype "+str(data_type)+" is not compatible with "+str(target_datatypes)+" same datatype sources will be set to false:" +str(datatype_sources[data_type])+".") for source in datatype_sources[data_type]: if apply: self._safe_remove_target(source, target) else: self._safe_append_constraint_matrix(source, target, False) else: - Loggers.info(LoggerTypes.FIELD_SERVICE, "Datatype "+str(data_type)+" is compatible with "+str(target_datatypes)+" same datatype sources will be set to true:" +str(datatype_sources[data_type])+".") + logging.debug("Datatype "+str(data_type)+" is compatible with "+str(target_datatypes)+" same datatype sources will be set to true:" +str(datatype_sources[data_type])+".") for source in datatype_sources[data_type]: self._safe_append_constraint_matrix(source, target, True) @@ -154,42 +154,42 @@ class Field: target_datatypes = [] if target_type == OntologyClassTypes.class_: target_datatypes = self._ontology.get_class_datatypes(target) - Loggers.info(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" is a class.") + logging.debug("Target "+str(target)+" is a class.") elif target_type == OntologyClassTypes.object_property: - Loggers.info(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" is an object property.") + logging.debug("Target "+str(target)+" is an object property.") proper_classes = self._ontology.get_objectproperty_classes(target) - Loggers.info(LoggerTypes.FIELD_SERVICE, "Target proper classes are "+str(proper_classes)+".") + logging.debug("Target proper classes are "+str(proper_classes)+".") for cls in proper_classes: target_datatypes.append(self._ontology.get_class_datatypes(cls)) elif target_type == OntologyClassTypes.data_property: target_datatypes = self._ontology.get_dataproperty_datatypes(target) - Loggers.info(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" is a data property.") + logging.debug("Target "+str(target)+" is a data property.") elif target_type == OntologyClassTypes.individual: target_datatypes = self._ontology.get_individual_datatypes(target) - Loggers.info(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" is an individual.") + logging.debug("Target "+str(target)+" is an individual.") else: - Loggers.error(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" is not a class, object property, data property or individual.") + logging.error("Target "+str(target)+" is not a class, object property, data property or individual.") - Loggers.info(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" have datatypes of "+str(target_datatypes)+".") + logging.debug("Target "+str(target)+" have datatypes of "+str(target_datatypes)+".") return target_datatypes # Speed can be increased. Datatypes sometimes contain specific datatypes ("hydrocarbone"). read_inputs can be adapted as such datatypes are defined as str. def _check_type_constraints(self, source_datatype, target_datatypes) -> bool: if source_datatype is None: - Loggers.info(LoggerTypes.FIELD_SERVICE, "Source datatype is None.") + logging.debug("Source datatype is None.") return False try: for _type in self._datatype_dbrepo_to_python[source_datatype]: for target_datatype in target_datatypes: if _type is target_datatype: - Loggers.info(LoggerTypes.FIELD_SERVICE, "Datatype "+str(source_datatype)+" is compatible with "+str(target_datatype)+".") + logging.debug("Datatype "+str(source_datatype)+" is compatible with "+str(target_datatype)+".") return True elif _type is type(target_datatype): - Loggers.info(LoggerTypes.FIELD_SERVICE, "Datatype "+str(source_datatype)+" is compatible with "+str(target_datatype)+".") + logging.debug("Datatype "+str(source_datatype)+" is compatible with "+str(target_datatype)+".") return True - Loggers.info(LoggerTypes.FIELD_SERVICE, "Datatype "+str(source_datatype)+" is not compatible with "+str(target_datatypes)+".") + logging.debug("Datatype "+str(source_datatype)+" is not compatible with "+str(target_datatypes)+".") return False except KeyError: - Loggers.error(LoggerTypes.FIELD_SERVICE, "Datatype "+str(source_datatype)+" not found in _datatype_dbrepo_to_python.") + logging.error("Datatype "+str(source_datatype)+" not found in _datatype_dbrepo_to_python.") return False @@ -202,10 +202,10 @@ class Field: def _safe_remove_target(self, source:str, target:str): try: - Loggers.info(LoggerTypes.FIELD_SERVICE, "Removing target "+str(target)+" from source "+str(source)+".") + logging.debug("Removing target "+str(target)+" from source "+str(source)+".") self._source_target_matrix[source].remove(target) except ValueError: - Loggers.error(LoggerTypes.FIELD_SERVICE, "Target "+str(target)+" not found in source "+str(source)+".") + logging.error("Target "+str(target)+" not found in source "+str(source)+".") pass # Not implemented. diff --git a/dbrepo-somapper/matching/match.py b/dbrepo-somapper/matching/match.py index 5bfb3b76443ad6404604510807315fc765ae577e..560e6d65a4f3b026addb1e68eace3e64ff4c5f82 100644 --- a/dbrepo-somapper/matching/match.py +++ b/dbrepo-somapper/matching/match.py @@ -3,7 +3,7 @@ from matching.scoring import * from matching.field import Field from matching.embeddings import Embedding from matching.results import Results -from logger import LoggerTypes, Loggers +import logging class Match: def _execute_using_text_based(self, _field:Field, _score: Scoring, target_batch_size:int=1) -> Results: @@ -11,7 +11,7 @@ class Match: target_batch = [] for source_name in _field.return_sources(): # For each source, iterate through targets. - Loggers.info(LoggerTypes.MATCH_SERVICE, "Scoring for "+str(source_name)) + logging.debug("Scoring for "+str(source_name)) for target_name in _field.return_targets(source_name): target_count += 1 target_batch.append(target_name) # Add target to batch. @@ -23,25 +23,25 @@ class Match: _score.score(source_name, target_batch) target_count = 0 target_batch = [] - Loggers.info(LoggerTypes.MATCH_SERVICE, "Scoring for "+str(source_name)+" done.") + logging.debug("Scoring for "+str(source_name)+" done.") return _score.return_results() def _execute_using_embeddings(self, source_embeddings:Embedding, target_embeddings:Embedding, _field:Field, _score: EmbeddingBased) -> Results: for source_name in _field.return_sources(): # For each source, iterate through targets. - Loggers.info(LoggerTypes.MATCH_SERVICE, "Scoring for "+str(source_name)) + logging.debug("Scoring for "+str(source_name)) _score.score(source_name, _field.return_targets(source_name), source_embeddings, target_embeddings) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Scoring for "+str(source_name)+" done.") + logging.debug("Scoring for "+str(source_name)+" done.") return _score.return_results() def execute(self, _field:Field, _score: Scoring, source_embedding:Embedding=None, target_embeddings:Embedding=None) -> Results: if isinstance(_score, EmbeddingBased) and source_embedding is not None and target_embeddings is not None: - Loggers.info(LoggerTypes.MATCH_SERVICE, "Using embedding based scoring.") + logging.debug("Using embedding based scoring.") return self._execute_using_embeddings(source_embedding, target_embeddings, _field, _score) elif isinstance(_score, TextDistanceBased): - Loggers.info(LoggerTypes.MATCH_SERVICE, "Using text distance based scoring.") + logging.debug("Using text distance based scoring.") return self._execute_using_text_based(_field, _score) else: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Scoring method not recognized or embeddings are missing.") + logging.error("Scoring method not recognized or embeddings are missing.") return Results() \ No newline at end of file diff --git a/dbrepo-somapper/matching/read_inputs.py b/dbrepo-somapper/matching/read_inputs.py index c589e499fee3a0b81815ad708f0a418305e1adac..c88bbe0be2c326fc8e618d687ceb05da46af21ac 100644 --- a/dbrepo-somapper/matching/read_inputs.py +++ b/dbrepo-somapper/matching/read_inputs.py @@ -1,7 +1,7 @@ import owlready2 as or2 import copy import matching.sparql_queries as sparql_queries -from logger import LoggerTypes, Loggers +import logging from enum import Enum class SchemaData: @@ -12,21 +12,21 @@ class SchemaData: _column_datatypes = None def __init__(self, _column_names:[str], _column_datatypes:dict=None, _schema_name:str=None) -> None: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Creating SchemaData object.") - Loggers.info(LoggerTypes.LOAD_SERVICE, "SchemaData: schema name "+ str(_schema_name)) - Loggers.info(LoggerTypes.LOAD_SERVICE, "SchemaData: columns "+ str(_column_names)) + logging.debug("Creating SchemaData object.") + logging.debug("SchemaData: schema name "+ str(_schema_name)) + logging.debug("SchemaData: columns "+ str(_column_names)) self._schema_name = _schema_name self._column_names = _column_names self._column_datatypes = _column_datatypes - Loggers.info(LoggerTypes.LOAD_SERVICE, "SchemaData object created.") + logging.debug("SchemaData object created.") def add_column(self, _column_name:str, _column_datatype:str): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Adding column "+str(_column_name)+" with datatype "+str(_column_datatype)+".") + logging.debug("Adding column "+str(_column_name)+" with datatype "+str(_column_datatype)+".") self._column_names.append(_column_name) self._column_datatypes[_column_name] = _column_datatype def remove_column(self, _column_name:str): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Removing column "+str(_column_name)+".") + logging.debug("Removing column "+str(_column_name)+".") self._column_names.remove(_column_name) self._column_datatypes.pop(_column_name) @@ -43,7 +43,7 @@ class SchemaData: if _ignored_columns is None or len(_ignored_columns) == 0: return self else: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Creating SchemaData object without columns "+str(_ignored_columns)+".") + logging.debug("Creating SchemaData object without columns "+str(_ignored_columns)+".") return SchemaData([column for column in self._column_names if column not in _ignored_columns], copy.deepcopy(self._column_datatypes), self._schema_name) class OntologyClassTypes(Enum): @@ -97,14 +97,14 @@ class OntologyData: try: return self._visible_subject_iris[name] except KeyError: - if not hide_errors: Loggers.info(LoggerTypes.LOAD_SERVICE, "Name "+name+" does not exist.") + if not hide_errors: logging.debug("Name "+name+" does not exist.") return "" def _get_name(self, iri:str)-> str: try: return self._iri_visible_subjects[iri] except KeyError: - Loggers.info(LoggerTypes.LOAD_SERVICE, "IRI "+iri+" does not exist.") + logging.debug("IRI "+iri+" does not exist.") return "" def _get_value_subclass_auxilary(self, _dictionary:dict, _key:str) -> [str]: @@ -142,7 +142,7 @@ class OntologyData: for range_ in direct_ranges: tmp += self._class_subclass[range_] except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Objectproperty "+str(objectproperty)+" does not have a subclass for range " + str(range_)) + logging.error("Objectproperty "+str(objectproperty)+" does not have a subclass for range " + str(range_)) pass return [self._get_name(t) for t in tmp] @@ -171,12 +171,12 @@ class OntologyData: def get_subject_type(self, subject:str, hide_errors:bool=False) -> str: iri = self._get_iri(subject, hide_errors) if iri == "": - if not hide_errors: Loggers.error(LoggerTypes.LOAD_SERVICE, "Subject "+subject+" does not exist.") + if not hide_errors: logging.error("Subject "+subject+" does not exist.") return OntologyClassTypes.unknown try: return self._subject_type[iri] except KeyError: - if not hide_errors: Loggers.error(LoggerTypes.LOAD_SERVICE, "Subject "+subject+" with IRI"+iri+" does not have a type.") + if not hide_errors: logging.error("Subject "+subject+" with IRI"+iri+" does not have a type.") return OntologyClassTypes.unknown # O(1) @@ -213,7 +213,7 @@ class OntologyData: def _return_non_duplicate_iri(self, _iri, _type): try: if self._subject_type[_iri] != _type: # If the IRI is already used with a different type. - Loggers.error(LoggerTypes.LOAD_SERVICE, "IRI "+_iri+" already exists with type: "+self._subject_type[_iri]+". New type is:"+_type+" is ignored.") + logging.error("IRI "+_iri+" already exists with type: "+self._subject_type[_iri]+". New type is:"+_type+" is ignored.") return None except KeyError: # If the IRI is not used before. return _iri @@ -221,13 +221,13 @@ class OntologyData: def _return_non_duplicate_name(self, _name, _iri): try: if self._visible_subject_iris[_name] != _iri: # If the name is already used with a different IRI. - Loggers.error(LoggerTypes.LOAD_SERVICE, "Name "+_name+" (IRI "+_iri+") already exists with IRI: "+self._visible_subject_iris[_name]+". New name is:\""+_name+" ("+str(_iri)+")\".") + logging.error("Name "+_name+" (IRI "+_iri+") already exists with IRI: "+self._visible_subject_iris[_name]+". New name is:\""+_name+" ("+str(_iri)+")\".") return _name+" ("+str(_iri)+")" except KeyError: # If the name is not used before. return _name def add_subject(self, _iri:str, _visible_subject:str, _type:OntologyClassTypes): - Loggers.info(LoggerTypes.LOAD_SERVICE, "add_subject "+ str(_iri)+" "+str(_visible_subject)+" "+ str(_type)) + logging.debug("add_subject "+ str(_iri)+" "+str(_visible_subject)+" "+ str(_type)) non_dup_iri = self._return_non_duplicate_iri(_iri, _type) if non_dup_iri is None: return non_dup_name = self._return_non_duplicate_name(_visible_subject, _iri) @@ -236,42 +236,42 @@ class OntologyData: self._visible_subject_iris[non_dup_name] = non_dup_iri # Note: Visible name is never used in setters. def safe_append_class_subclass(self, _class_iri:str, _subclass_iris:[str]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "safe_append_class_subclass "+ str(_class_iri)+" "+str(_subclass_iris)) + logging.debug("safe_append_class_subclass "+ str(_class_iri)+" "+str(_subclass_iris)) try: self._class_subclass[_class_iri] += _subclass_iris except: self._class_subclass[_class_iri] = _subclass_iris def safe_append_class_individual(self, _class_iri:str, _individual_iris:[str]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "safe_append_class_individual "+ str(_class_iri)+" "+str(_individual_iris)) + logging.debug("safe_append_class_individual "+ str(_class_iri)+" "+str(_individual_iris)) try: self._class_individual[_class_iri] += _individual_iris except: self._class_individual[_class_iri] = _individual_iris def safe_append_objectproperty_class(self, _objectproperty_iri:str, _class_iris:[str]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "safe_append_objectproperty_class "+ str(_objectproperty_iri)+" "+str(_class_iris)) + logging.debug("safe_append_objectproperty_class "+ str(_objectproperty_iri)+" "+str(_class_iris)) try: self._objectproperty_class[_objectproperty_iri] += _class_iris except: self._objectproperty_class[_objectproperty_iri] = _class_iris def safe_append_dataproperty_range(self, _dataproperty_iri:str, _ranges:[str]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "safe_append_dataproperty_range "+ str(_dataproperty_iri)+" "+str(_ranges)) + logging.debug("safe_append_dataproperty_range "+ str(_dataproperty_iri)+" "+str(_ranges)) try: self._dataproperty_range[_dataproperty_iri] += _ranges except: self._dataproperty_range[_dataproperty_iri] = _ranges def safe_append_dataproperty_datatype(self, _dataproperty_iri:str, _datatype_iris:[str]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "safe_append_dataproperty_datatype "+ str(_dataproperty_iri)+" "+str(_datatype_iris)) + logging.debug("safe_append_dataproperty_datatype "+ str(_dataproperty_iri)+" "+str(_datatype_iris)) try: self._dataproperty_datatype[_dataproperty_iri] += _datatype_iris except: self._dataproperty_datatype[_dataproperty_iri] = _datatype_iris def safe_append_class_indirectdatatypes(self, _class_iri:str, _indirect_datatypes:[str]): - Loggers.info(LoggerTypes.LOAD_SERVICE, "safe_append_class_indirectdatatypes "+ str(_class_iri)+" "+str(_indirect_datatypes)) + logging.debug("safe_append_class_indirectdatatypes "+ str(_class_iri)+" "+str(_indirect_datatypes)) try: self._class_indirectdatatypes[_class_iri] += _indirect_datatypes except: @@ -293,11 +293,11 @@ class OntologyParser: def parse(self, _loc:str, _include_only:[OntologyClassTypes]=None, _use_reasoner=True, _only_local:bool=True) -> OntologyData: self._data = OntologyData() try: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Parsing ontology "+str(_loc)) + logging.debug("Parsing ontology "+str(_loc)) self._world = or2.World() self._onto = self._world.get_ontology(_loc).load(only_local=_only_local) except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Ontology cannot be parsed by OwlReady2.") + logging.error("Ontology cannot be parsed by OwlReady2.") return self._data if _include_only is None or type(_include_only) is list: @@ -308,27 +308,27 @@ class OntologyParser: self._onto.destroy() return self._data else: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Provided _include_only:"+_include_only+"is not a list or None.") + logging.error("Provided _include_only:"+_include_only+"is not a list or None.") self._onto.destroy() return self._data def _start_reasoner(self): try: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Starting Pellet Reasoner.") + logging.debug("Starting Pellet Reasoner.") or2.sync_reasoner_pellet(self._onto, infer_property_values = True, infer_data_property_values = True) return except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Pellet Reasoner is failed. Trying with HermiT Reasoner.") + logging.error("Pellet Reasoner is failed. Trying with HermiT Reasoner.") try: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Starting HermiT Reasoner.") + logging.debug("Starting HermiT Reasoner.") or2.sync_reasoner(self._onto, infer_property_values = True) return except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "HermiT Reasoner is failed. Reasoning is not applied.") + logging.error("HermiT Reasoner is failed. Reasoning is not applied.") def _fill_auxilary_dictionaries(self, _include_only:OntologyClassTypes=None): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling direct definitions.") + logging.debug("Filling direct definitions.") if _include_only is None or OntologyClassTypes.individual in _include_only: for subject in self._onto.individuals(): self._fill_individual_data(subject) if _include_only is None or OntologyClassTypes.class_ in _include_only: @@ -340,7 +340,7 @@ class OntologyParser: self._fill_dataproperty_datatype() def _fill_individual_data(self, individual_:or2.EntityClass): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling individual ("+str(self._get_name(individual_))+") definitions.") + logging.debug("Filling individual ("+str(self._get_name(individual_))+") definitions.") self._data.add_subject(self._get_iri(individual_), self._get_name(individual_), OntologyClassTypes.individual) try: self._data.safe_append_class_individual(self._get_iri(type(individual_)), [self._get_iri(individual_)]) @@ -349,7 +349,7 @@ class OntologyParser: self._data.safe_append_class_individual(self._get_iri(type_), [self._get_iri(individual_)]) # If more than one type, add to all types. def _fill_class_data(self, class_:or2.EntityClass): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling class ("+str(self._get_name(class_))+") definitions.") + logging.debug("Filling class ("+str(self._get_name(class_))+") definitions.") self._data.add_subject(self._get_iri(class_), self._get_name(class_), OntologyClassTypes.class_) subclass_iris = [self._get_iri(subclass) for subclass in list(class_.descendants()) if subclass != class_ or subclass != or2.Thing] @@ -357,14 +357,14 @@ class OntologyParser: self._fill_indirect_class_data(class_) def _fill_objectproperty_data(self, objectproperty_:or2.ObjectPropertyClass): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling object property ("+str(self._get_name(objectproperty_))+") definitions.") + logging.debug("Filling object property ("+str(self._get_name(objectproperty_))+") definitions.") self._data.add_subject(self._get_iri(objectproperty_), self._get_name(objectproperty_), OntologyClassTypes.object_property) ranges = [] for rn in objectproperty_.range: ranges += self._extract_range_construct_information(rn) self._data.safe_append_objectproperty_class(self._get_iri(objectproperty_), ranges) def _fill_dataproperty_data(self, dataproperty:or2.DataPropertyClass): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling data property ("+str(self._get_name(dataproperty))+") definitions.") + logging.debug("Filling data property ("+str(self._get_name(dataproperty))+") definitions.") self._data.add_subject(self._get_iri(dataproperty), self._get_name(dataproperty), OntologyClassTypes.data_property) ranges = [] for rn in dataproperty.range: ranges += self._extract_range_construct_information(rn) @@ -372,9 +372,9 @@ class OntologyParser: def _fill_dataproperty_datatype(self): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling dataproperty datatype definitions.") + logging.debug("Filling dataproperty datatype definitions.") for row in self._sparql(sparql_queries.dataproperty_datatype): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Filling dataproperty ("+str(self._get_name(row[0]))+") datatype.") + logging.debug("Filling dataproperty ("+str(self._get_name(row[0]))+") datatype.") self._data.safe_append_dataproperty_datatype(self._get_iri(row[0]), self._extract_range_construct_information(row[1])) @@ -384,7 +384,7 @@ class OntologyParser: try: construct_values = self._extract_class_construct_information(indirect) # Extract the relation except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Indirect relation cannot be extracted.") + logging.error("Indirect relation cannot be extracted.") continue for property, value in construct_values: @@ -394,7 +394,7 @@ class OntologyParser: self._fill_indirect_class_individual(subject, property, value) self._fill_indirectly_defined_object_property_class(property, value) except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Indirect relation type is not recognized.") + logging.error("Indirect relation type is not recognized.") def _fill_indirect_class_datatype(self, subject, property, value): if isinstance(property, or2.DataPropertyClass): # Datatype @@ -420,83 +420,83 @@ class OntologyParser: # Class Subclass relations will require: data_property-datatype, object_property-class, one_of subject, not subject, inverse subject # They can be combined with logical operators. def _extract_class_construct_information(self, class_construct:or2.class_construct) -> [(type, str)]: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Extracting class construct information.") + logging.debug("Extracting class construct information.") if isinstance(class_construct, or2.Restriction): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(class_construct.value)+") Extracting restriction.") + logging.debug("("+str(class_construct.value)+") Extracting restriction.") if isinstance(class_construct.value, or2.Restriction): # property chain (op some (dp exactly 1 int)), UNNECESSARY SINCE THIS DEFINITION IS A VIOLATION! #return [(class_construct.property, class_construct.value.property)] + self._extract_class_construct_information(class_construct.value) # Gets all chained relations return self._extract_class_construct_information(class_construct.value) # Gets only chain end. else: return [(class_construct.property, class_construct.value)] elif isinstance(class_construct, or2.LogicalClassConstruct): # Complex construction with logical operators - Loggers.info(LoggerTypes.LOAD_SERVICE, "Extracting logical class construct.") + logging.debug("Extracting logical class construct.") tmp = [] for cls in class_construct.Classes: # Recurse until leaf restrictions are found. - Loggers.info(LoggerTypes.LOAD_SERVICE, "Recursing into "+str(cls)) + logging.debug("Recursing into "+str(cls)) tmp += self._extract_class_construct_information(cls) return tmp elif isinstance(class_construct, or2.EntityClass): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(class_construct)+") Extracting entity class.") + logging.debug("("+str(class_construct)+") Extracting entity class.") return [(type(class_construct), class_construct)] elif isinstance(class_construct, or2.OneOf): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(class_construct.instances)+") Extracting one of.") + logging.debug("("+str(class_construct.instances)+") Extracting one of.") tmp = [] for instance in class_construct.instances: tmp.append((type(instance), instance)) return tmp elif isinstance(class_construct, or2.Not): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(class_construct.Class)+") Extracting not.") + logging.debug("("+str(class_construct.Class)+") Extracting not.") # [(type(class_construct), class_construct.Class)] return [] elif isinstance(class_construct, or2.Inverse): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(class_construct.property)+") Extracting inverse.") + logging.debug("("+str(class_construct.property)+") Extracting inverse.") # [(type(class_construct) , class_construct.property)] return [] else: - Loggers.error(LoggerTypes.LOAD_SERVICE, "In class_construct_information unknown type:"+str(type(class_construct))) + logging.error("In class_construct_information unknown type:"+str(type(class_construct))) return [] # Does not support individuals (They are only OneOf). def _extract_range_construct_information(self, _construct:or2.LogicalClassConstruct) -> [str]: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Extracting range information for construct ("+str(_construct)+")." ) + logging.debug("Extracting range information for construct ("+str(_construct)+")." ) if isinstance(_construct, type) and not isinstance(_construct, or2.EntityClass): # Type but not class - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(_construct)+") is a type.") + logging.debug("("+str(_construct)+") is a type.") return [self._get_iri(_construct)] elif str(type(_construct)) == "rdf-schema.Datatype": # Custom datatype - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(self._get_name(_construct))+") is a custom datatype.") + logging.debug("("+str(self._get_name(_construct))+") is a custom datatype.") return [self._get_iri(_construct)] elif isinstance(_construct, or2.EntityClass): # Class - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(self._get_name(_construct))+") is a class.") + logging.debug("("+str(self._get_name(_construct))+") is a class.") return [self._get_iri(_construct)] elif isinstance(_construct, or2.ConstrainedDatatype): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(_construct.base_datatype)+") is a constrained datatype.") + logging.debug("("+str(_construct.base_datatype)+") is a constrained datatype.") return [self._get_iri(_construct.base_datatype)] elif isinstance(_construct, or2.OneOf): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(_construct.instances)+") is a one of.") + logging.debug("("+str(_construct.instances)+") is a one of.") tmp = [] for instance in _construct.instances: tmp.append(self._get_iri(instance)) return tmp elif isinstance(_construct, or2.Not): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(_construct.Class)+") is a not.") + logging.debug("("+str(_construct.Class)+") is a not.") # self._get_iri(_construct.Class) return [] elif isinstance(_construct, or2.Inverse): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(_construct.property)+") is an inverse.") + logging.debug("("+str(_construct.property)+") is an inverse.") # self._get_iri(_construct.property) return [] elif isinstance(_construct, or2.Restriction): - Loggers.info(LoggerTypes.LOAD_SERVICE, "("+str(_construct.value)+") Extracting restriction.") + logging.debug("("+str(_construct.value)+") Extracting restriction.") return [self._get_iri(_construct.value)] elif isinstance(_construct, or2.LogicalClassConstruct): # Complex construction with logical operators - Loggers.info(LoggerTypes.LOAD_SERVICE, "Extracting logical class construct.") + logging.debug("Extracting logical class construct.") tmp = [] for cls in _construct.Classes: # Recurse until leaf types are found. - Loggers.info(LoggerTypes.LOAD_SERVICE, "Recursing into "+str(cls)) + logging.debug("Recursing into "+str(cls)) tmp += self._extract_range_construct_information(cls) return tmp else: - Loggers.error(LoggerTypes.LOAD_SERVICE, "In range_construct_information unknown: \"" + str(_construct) + "\" type: \""+str(type(_construct)) + "\".") + logging.error("In range_construct_information unknown: \"" + str(_construct) + "\" type: \""+str(type(_construct)) + "\".") return [] def _get_name(self, _entity:or2) -> str: diff --git a/dbrepo-somapper/matching/results.py b/dbrepo-somapper/matching/results.py index 9e1a88455e3ac9d73b47be388f3dba2ad75a9698..a1929859a4b77c2f7ca838aad2b0e02224ba1807 100644 --- a/dbrepo-somapper/matching/results.py +++ b/dbrepo-somapper/matching/results.py @@ -2,7 +2,7 @@ import matplotlib.pyplot as plt import matplotlib.patches as mpatches import copy import re -from logger import LoggerTypes, Loggers +import logging class Results: _mean = None @@ -34,8 +34,8 @@ class Results: tmp_winner_text = _target_text self._winner_results[source] = (tmp_winner_text, tmp_winner_value) - Loggers.info(LoggerTypes.RESULT_SERVICE, "Winner results are "+str(self._winner_results)+".") - Loggers.info(LoggerTypes.MATCH_SERVICE, "Winner results are "+str(self._winner_results)+".") + logging.debug("Winner results are "+str(self._winner_results)+".") + logging.debug("Winner results are "+str(self._winner_results)+".") self._determine_statistics() def _determine_statistics(self): @@ -49,8 +49,8 @@ class Results: self._mean = sum(tmp_similarity_scores)/len(tmp_similarity_scores) self._std = sum([(_similarity_score - self._mean)**2 for _similarity_score in tmp_similarity_scores])/len(tmp_similarity_scores) - Loggers.info(LoggerTypes.RESULT_SERVICE, "Mean and standard deviation are "+str(self._mean)+" and "+str(self._std)+".") - Loggers.info(LoggerTypes.MATCH_SERVICE, "Mean and standard deviation are "+str(self._mean)+" and "+str(self._std)+".") + logging.debug("Mean and standard deviation are "+str(self._mean)+" and "+str(self._std)+".") + logging.debug("Mean and standard deviation are "+str(self._mean)+" and "+str(self._std)+".") def update(self, _source_text:str, _target_texts:[str], _similarity_scores:[float]): try: @@ -60,7 +60,7 @@ class Results: # Deprecated def _influence_similarity_score(self, original_score:float, influencable_target_similarity_score:float, coefficent:float) -> float: - Loggers.info(LoggerTypes.MATCH_SERVICE, "Influence score will be calculated using original_score:"+str(original_score)+" influencable_target_similarity_score:"+str(influencable_target_similarity_score)+" with coefficent:"+str(coefficent)) + logging.debug("Influence score will be calculated using original_score:"+str(original_score)+" influencable_target_similarity_score:"+str(influencable_target_similarity_score)+" with coefficent:"+str(coefficent)) if original_score >= influencable_target_similarity_score: return original_score else: @@ -83,10 +83,10 @@ class Results: self._results[source].append((target, new_similarity_score)) found = True if not found and _apply_influencable_constraints:# This might happen when influencer constraints remove a influencable target. - Loggers.info(LoggerTypes.MATCH_SERVICE, "Indirect influence using "+str(winner_influencer)+" from "+str(source)+" to "+str(target)+" with "+str(original_score)+" to 0.0") + logging.debug("Indirect influence using "+str(winner_influencer)+" from "+str(source)+" to "+str(target)+" with "+str(original_score)+" to 0.0") self._results[source].remove((target, original_score)) except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Source "+str(source)+" not found in results!") + logging.error("Source "+str(source)+" not found in results!") self._determine_winner() # Deprecated @@ -107,14 +107,14 @@ class Results: self._results[_source].append((target, new_similarity_score)) found = True if not found and _apply_influencable_constraints:# This might happen when influencer constraints remove a influencable target. - Loggers.info(LoggerTypes.MATCH_SERVICE, "Direct influence using "+str(_direct_influencer)+" from "+str(_source)+" to "+str(target)+" with "+str(original_score)+" to 0.0") + logging.debug("Direct influence using "+str(_direct_influencer)+" from "+str(_source)+" to "+str(target)+" with "+str(original_score)+" to 0.0") self._results[_source].remove((target, original_score)) except KeyError: - Loggers.info(LoggerTypes.MATCH_SERVICE, "Source "+str(_source)+" not found in results!") + logging.debug("Source "+str(_source)+" not found in results!") self._determine_winner() def return_statistics(self)->(float, float): - Loggers.info(LoggerTypes.RESULT_SERVICE, "Statistic mean is "+str(self._mean)+" standard deviation is "+str(self._std)+".") + logging.debug("Statistic mean is "+str(self._mean)+" standard deviation is "+str(self._std)+".") return (self._mean, self._std) def return_sources(self)->[str]: @@ -129,7 +129,7 @@ class Results: try: filtered_results[_filter_source] = copy.deepcopy(self._results[_filter_source]) except KeyError: - Loggers.error(LoggerTypes.RESULT_SERVICE, "Source "+str(_filter_source)+" column name not found in schema.") + logging.error("Source "+str(_filter_source)+" column name not found in schema.") pass elif ignore_sources is not None and len(ignore_sources) > 0: filtered_results = {source: self._results[source] for source in self._results.keys() if source not in ignore_sources} @@ -149,10 +149,10 @@ class Results: for source_filter in source_filters: if ignore_sources is None or source_filter not in ignore_sources: try: - Loggers.info(LoggerTypes.RESULT_SERVICE, "Winner results for "+str(source_filter)+" are "+str(self._winner_results[source_filter])+".") + logging.debug("Winner results for "+str(source_filter)+" are "+str(self._winner_results[source_filter])+".") tmp[source_filter] = self._winner_results[source_filter] except KeyError: - Loggers.info(LoggerTypes.RESULT_SERVICE, "Source "+str(source_filter)+" column name not found in winner results.") + logging.debug("Source "+str(source_filter)+" column name not found in winner results.") tmp[source_filter] = ("", 0.0) return tmp elif ignore_sources is not None and len(ignore_sources) > 0: @@ -169,14 +169,14 @@ class Results: def merge(self, _results:{str:[(str, float)]}) -> 'Results': for source in _results.keys(): - Loggers.info(LoggerTypes.RESULT_SERVICE, "Merging "+str(source)+" to "+str(_results[source])+".") + logging.debug("Merging "+str(source)+" to "+str(_results[source])+".") self._results[source] = _results[source] self._determine_winner() return self def merge_highests(self, _results:{str:[(str, float)]}) -> 'Results': for source in _results.keys(): - Loggers.info(LoggerTypes.RESULT_SERVICE, "Merging "+str(source)+" to "+str(_results[source])+".") + logging.debug("Merging "+str(source)+" to "+str(_results[source])+".") for target, score in _results[source]: if source in self._results: for _target, _score in self._results[source]: diff --git a/dbrepo-somapper/matching/scoring.py b/dbrepo-somapper/matching/scoring.py index 76ad8a91851912a7012c47fe538a23e6102cb668..043c6514400c4569664fc5b0ec6894b5789ba296 100644 --- a/dbrepo-somapper/matching/scoring.py +++ b/dbrepo-somapper/matching/scoring.py @@ -5,7 +5,7 @@ from torch import sum as torchsum import copy from matching.results import Results from matching.embeddings import Embedding -from logger import LoggerTypes, Loggers +import logging class Scoring: _results = None @@ -53,7 +53,7 @@ class Levenshtein(TextDistanceBased): for text2 in texts2: score = td.Levenshtein(qval=self._q_gram).normalized_similarity(text1, text2) scores.append(score) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Levenshtein similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") + logging.debug("Levenshtein similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") self._results.update(text1, texts2, scores) class JaroWinkler(TextDistanceBased): @@ -69,7 +69,7 @@ class JaroWinkler(TextDistanceBased): for text2 in texts2: score = td.jaro_winkler.normalized_similarity(text1, text2) scores.append(score) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Jaro-Winkler similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") + logging.debug("Jaro-Winkler similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") self._results.update(text1, texts2, scores) class Jaccard(TextDistanceBased): @@ -85,7 +85,7 @@ class Jaccard(TextDistanceBased): for text2 in texts2: score = td.jaccard.normalized_similarity(text1, text2) scores.append(score) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Jaccard similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") + logging.debug("Jaccard similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") self._results.update(text1, texts2, scores) class Lcsseq(TextDistanceBased): @@ -101,7 +101,7 @@ class Lcsseq(TextDistanceBased): for text2 in texts2: score = td.lcsseq.normalized_similarity(text1, text2) scores.append(score) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Lcsseq similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") + logging.debug("Lcsseq similarities between "+str(text1)+" and "+str(text2)+" is "+str(score)+".") self._results.update(text1, texts2, scores) class EmbeddingBased(Scoring): @@ -123,14 +123,14 @@ class CosineSimilarity(EmbeddingBased): source_text_embedding = source_embeddings.return_embedding(text1) target_text_embeddings = target_embeddings.return_embeddings(texts2) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Embedding of "+str(text1)+" is "+str(source_text_embedding)+".") - Loggers.info(LoggerTypes.MATCH_SERVICE, "Embeddings of "+str(texts2)+" are "+str(target_text_embeddings)+".") + logging.debug("Embedding of "+str(text1)+" is "+str(source_text_embedding)+".") + logging.debug("Embeddings of "+str(texts2)+" are "+str(target_text_embeddings)+".") #Compute cosine-similarities cosine_scores = util.cos_sim(source_text_embedding, target_text_embeddings).tolist()[0] if (Loggers.check_existence(LoggerTypes.MATCH_SERVICE)): for text2, cosine_score in zip(texts2, cosine_scores): - Loggers.info(LoggerTypes.MATCH_SERVICE, "Cosine similarity between "+str(text1)+" and "+str(text2)+" is "+str(cosine_score)+".") + logging.debug("Cosine similarity between "+str(text1)+" and "+str(text2)+" is "+str(cosine_score)+".") self._results.update(text1, texts2, cosine_scores) @@ -145,8 +145,8 @@ class ColbertScore(EmbeddingBased): source_text_embedding = source_embeddings.return_embedding(text1) target_text_embeddings = target_embeddings.return_embeddings(texts2, asList=True) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Embedding of "+str(text1)+" is "+str(source_text_embedding)+".") - Loggers.info(LoggerTypes.MATCH_SERVICE, "Embeddings of "+str(texts2)+" are "+str(target_text_embeddings)+".") + logging.debug("Embedding of "+str(text1)+" is "+str(source_text_embedding)+".") + logging.debug("Embeddings of "+str(texts2)+" are "+str(target_text_embeddings)+".") cb_scores = [] for i, target_text_embedding in enumerate(target_text_embeddings): @@ -156,6 +156,6 @@ class ColbertScore(EmbeddingBased): cb_scores.append(cb_score.item()) if (Loggers.check_existence(LoggerTypes.MATCH_SERVICE)): - Loggers.info(LoggerTypes.MATCH_SERVICE, "Colbert scores between "+str(text1)+" and "+str(texts2[i])+" is "+str(cb_score)+".") + logging.debug("Colbert scores between "+str(text1)+" and "+str(texts2[i])+" is "+str(cb_score)+".") self._results.update(text1, texts2, cb_scores) \ No newline at end of file diff --git a/dbrepo-somapper/models/ontology_files_information.py b/dbrepo-somapper/models/ontology_files_information.py index f0817d67ceed57d02aa17ef73c47de8abc5d208f..b57ca36e0f702d9d19caf887347710d66be20329 100644 --- a/dbrepo-somapper/models/ontology_files_information.py +++ b/dbrepo-somapper/models/ontology_files_information.py @@ -1,4 +1,4 @@ -from logger import LoggerTypes, Loggers +import logging import os.path import json @@ -16,16 +16,16 @@ class OntologyFilesInformation: try: with open(self._settings_location) as json_file: ontology_settings = json.load(json_file) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology settings are loaded from "+str(self._settings_location)+".") + logging.debug("Ontology settings are loaded from "+str(self._settings_location)+".") return ontology_settings except FileNotFoundError: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Ontology settings are not loaded from "+str(self._settings_location)+". File not found.") + logging.error("Ontology settings are not loaded from "+str(self._settings_location)+". File not found.") return {} except json.JSONDecodeError: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Ontology settings are not loaded from "+str(self._settings_location)+". JSON might be corrupted.") + logging.error("Ontology settings are not loaded from "+str(self._settings_location)+". JSON might be corrupted.") return {} except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Ontology settings are not loaded from "+str(self._settings_location)+".") + logging.error("Ontology settings are not loaded from "+str(self._settings_location)+".") return {} # Note: Ontology name will be the directory name. diff --git a/dbrepo-somapper/services/cache_service.py b/dbrepo-somapper/services/cache_service.py index 8fdd9605522eb771fcadb1959cb3198f0edb1add..4afb28b1a2ead6c1d8d4e2ea4fea71de0de716e9 100644 --- a/dbrepo-somapper/services/cache_service.py +++ b/dbrepo-somapper/services/cache_service.py @@ -1,7 +1,7 @@ from flask_caching import Cache from models.cache_model import CacheModel import secrets -from logger import LoggerTypes, Loggers +import logging from os import listdir, path, remove _cache = None @@ -17,7 +17,7 @@ def set_cache_model(object_results, object_fields, unit_results, schema_embeddin schema_id = get_schema_hash() model = CacheModel(schema_id, object_results, object_fields, unit_results, schema_embeddings) _cache.set(schema_id, model) - Loggers.info(LoggerTypes.CACHE_SERVICE, "Cache model with hash \""+str(schema_id)+"\" is created.") + logging.debug("Cache model with hash \""+str(schema_id)+"\" is created.") return schema_id def update_cache_model(schema_id:str, model:CacheModel, object_results = None, object_fields = None, unit_results_original = None, unit_results = None, schema_embeddings = None): @@ -27,7 +27,7 @@ def update_cache_model(schema_id:str, model:CacheModel, object_results = None, o if unit_results is not None: model.unit_results = unit_results if schema_embeddings is not None: model.schema_embeddings = schema_embeddings _cache.set(schema_id, model) - Loggers.info(LoggerTypes.CACHE_SERVICE, "Cache model with hash \""+str(schema_id)+"\" is updated.") + logging.debug("Cache model with hash \""+str(schema_id)+"\" is updated.") return schema_id def get_cache_model(schema_id:str) -> CacheModel: @@ -40,12 +40,12 @@ def get_cache_model(schema_id:str) -> CacheModel: def delete_cache_model(schema_id:str, hard:bool=False): if hard: remove(path.join(_cache_dir, schema_id)) - Loggers.info(LoggerTypes.CACHE_SERVICE, "Cache model with hash \""+str(schema_id)+"\" is deleted.") + logging.debug("Cache model with hash \""+str(schema_id)+"\" is deleted.") else: try: _cache.delete(schema_id) except: - Loggers.error(LoggerTypes.CACHE_SERVICE, "Failed to delete cache model with hash \""+str(schema_id)+"\".") + logging.error("Failed to delete cache model with hash \""+str(schema_id)+"\".") def delete_all_cache_models(): for file in listdir(_cache_dir): @@ -53,15 +53,15 @@ def delete_all_cache_models(): try: remove(file) except: - Loggers.error(LoggerTypes.CACHE_SERVICE, "Failed to delete cache model with hash \""+str(file)+"\".") + logging.error("Failed to delete cache model with hash \""+str(file)+"\".") def get_schema_hash(retry_count=10) -> str: for _ in range(retry_count): hash = secrets.token_urlsafe(16) is_unique = is_unique_schema_hash(hash) if is_unique: break - if not is_unique: Loggers.error(LoggerTypes.CACHE_SERVICE, "Failed to create a unique schema matching cache hash. Overwriting existing cache with hash \""+str(hash)+"\".") - Loggers.info(LoggerTypes.CACHE_SERVICE, "New schema matching cache will have hash \""+str(hash)+"\".") + if not is_unique: logging.error("Failed to create a unique schema matching cache hash. Overwriting existing cache with hash \""+str(hash)+"\".") + logging.debug("New schema matching cache will have hash \""+str(hash)+"\".") return hash def is_unique_schema_hash(hash:str, cache_dir = _cache_dir) -> str: diff --git a/dbrepo-somapper/services/embedding_service.py b/dbrepo-somapper/services/embedding_service.py index 63e25e0a8982fa8fd91af14c56faccca91d41e77..3f210432e163fb73854918fba2766d2ae72aed01 100644 --- a/dbrepo-somapper/services/embedding_service.py +++ b/dbrepo-somapper/services/embedding_service.py @@ -1,5 +1,5 @@ from matching.embeddings import Embedding -from logger import LoggerTypes, Loggers +import logging from matching.read_inputs import SchemaData, OntologyData from models.ontology_files_information import OntologyFilesInformation import os @@ -30,10 +30,10 @@ def init_embedding_service(encoder_type, schema_encoding_method, object_encoding # Generate Schema Embeddings -------------------------------------------------------------------------------- def generate_schema_embeddings(schema_data:SchemaData) -> Embedding: - Loggers.info(LoggerTypes.EMBEDDING_SERVICE, "Generating Schema embeddings for "+str(schema_data._schema_name)+" using "+_encoder._type()+" model with "+_schema_encoding_method+" method.") + logging.debug("Generating Schema embeddings for "+str(schema_data._schema_name)+" using "+_encoder._type()+" model with "+_schema_encoding_method+" method.") params = collect_schema_embedding_parameters(schema_data, _schema_encoding_method) schema_embedding = getattr(_encoder, _schema_encoding_method)(**params) - Loggers.info(LoggerTypes.EMBEDDING_SERVICE, "Source embeddings for "+str(schema_data._schema_name)+" generated.") + logging.debug("Source embeddings for "+str(schema_data._schema_name)+" generated.") return schema_embedding @@ -58,32 +58,32 @@ def load_ontology_embeddings(path:str, files:[str], is_ontology_data_created:boo filename, file_extension = os.path.splitext(file) if file_extension == ".h5" and not is_ontology_data_created: # Search for hdf's exists and ontology_data is not created (but loaded). embedding_location = os.path.join(path, file) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings for "+str(ontology_name)+" are found.") + logging.debug("Embeddings for "+str(ontology_name)+" are found.") embedding = load_embeddings_hdf(embedding_location, ontology_name, _encoder._type(), encoding_method) if embedding is not None: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings for "+str(ontology_name)+" are loaded.") + logging.debug("Embeddings for "+str(ontology_name)+" are loaded.") return embedding else: continue - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings for "+str(ontology_name)+" are not found or "+str(ontology_name)+" is created with certain entity types. Creating embeddings for "+str(ontology_name)+" using "+encoding_method+".") + logging.debug("Embeddings for "+str(ontology_name)+" are not found or "+str(ontology_name)+" is created with certain entity types. Creating embeddings for "+str(ontology_name)+" using "+encoding_method+".") params = collect_ontology_embedding_parameters(ontology_data, encoding_method) embedding = getattr(_encoder, encoding_method)(**params) embedding_location = os.path.join(path, ontology_name+"_"+_encoder._type()+"_"+encoding_method+".h5") save_embeddings_hdf(embedding, embedding_location, ontology_name, _encoder._type(), encoding_method) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings for "+str(ontology_name)+" are created using "+encoding_method+" and saved as .h5.") + logging.debug("Embeddings for "+str(ontology_name)+" are created using "+encoding_method+" and saved as .h5.") return embedding def load_embeddings_hdf(embedding_location:str, ontology_name:str, encoder_type:str, encoding_method:str) -> Embedding: data = {} - Loggers.info(LoggerTypes.LOAD_SERVICE, "Loading embeddings from "+str(embedding_location)+".") + logging.debug("Loading embeddings from "+str(embedding_location)+".") with h5py.File(embedding_location, 'r') as h5file: file_ontology_name = h5file.attrs['ontology_name'] file_encoder_type = h5file.attrs['encoder_type'] file_encoding_method = h5file.attrs['encoding_method'] if ontology_name != file_ontology_name or encoder_type != file_encoder_type or encoding_method != file_encoding_method: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings from "+str(embedding_location)+" are not loaded. Ontology name "+str(ontology_name)+" != "+str(file_ontology_name)+" or encoder type "+str(encoder_type)+" != "+str(file_encoder_type)+" or encoding method"+str(encoding_method)+" != "+str(file_encoding_method)+".") + logging.debug("Embeddings from "+str(embedding_location)+" are not loaded. Ontology name "+str(ontology_name)+" != "+str(file_ontology_name)+" or encoder type "+str(encoder_type)+" != "+str(file_encoder_type)+" or encoding method"+str(encoding_method)+" != "+str(file_encoding_method)+".") return None for key in h5file['item']: @@ -92,10 +92,10 @@ def load_embeddings_hdf(embedding_location:str, ontology_name:str, encoder_type: else: real_key = key data[real_key] = torch.FloatTensor(h5file['item/' + key][()]) except: - Loggers.error(LoggerTypes.LOAD_SERVICE, "Embedding "+str(key)+" is not loaded. Key might be violating the h5 standards or value might be corrupted.") + logging.error("Embedding "+str(key)+" is not loaded. Key might be violating the h5 standards or value might be corrupted.") return None - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings from "+str(embedding_location)+" are loaded with the keys "+str(list(data.keys()))+".") + logging.debug("Embeddings from "+str(embedding_location)+" are loaded with the keys "+str(list(data.keys()))+".") return Embedding(data) def save_embeddings_hdf(embeddings:Embedding, embedding_location:str, ontology_name:str, encoder_type:str, encoding_method:str): @@ -108,7 +108,7 @@ def save_embeddings_hdf(embeddings:Embedding, embedding_location:str, ontology_n for key, item in data.items(): if "/" in key: key = key.replace("/", "<h5safe>") # Replace / with <h5> for h5py. h5file[path + key] = item - Loggers.info(LoggerTypes.LOAD_SERVICE, "Embeddings are saved as h5 to "+str(embedding_location)+".") + logging.debug("Embeddings are saved as h5 to "+str(embedding_location)+".") def collect_schema_embedding_parameters(schema_data:SchemaData, encoding_method:str): diff --git a/dbrepo-somapper/services/field_service.py b/dbrepo-somapper/services/field_service.py index 0bfddcce41951cb4181cc536f5f1ea8dd77adf3b..33afad716df5e3e29fd42e0031c6c116e7413184 100644 --- a/dbrepo-somapper/services/field_service.py +++ b/dbrepo-somapper/services/field_service.py @@ -1,6 +1,6 @@ from matching.read_inputs import SchemaData, OntologyData from matching.field import Field -from logger import LoggerTypes, Loggers +import logging from copy import deepcopy @@ -56,27 +56,27 @@ def generate_mock_unit_fields(sources:[str]) -> {str:Field}: def generate_fields(schema_data:SchemaData, ontologies:{str:OntologyData}, find_constraints:bool=True, apply_constraints:bool=False) -> {str:Field}: user_specific_fields = {} for ontology_name, ontology_data in ontologies.items(): - Loggers.info(LoggerTypes.FIELD_SERVICE, "Generating field for "+str(ontology_name)) + logging.debug("Generating field for "+str(ontology_name)) field = Field(schema_data, ontology_data) if find_constraints: field.find_type_constraints(apply_constraints) user_specific_fields[ontology_name] = field - Loggers.info(LoggerTypes.FIELD_SERVICE, "Field for "+str(ontology_name)+" generated.") + logging.debug("Field for "+str(ontology_name)+" generated.") return user_specific_fields def remove_columns_fields(fields:{str:Field}, columns:[str]) -> {str:Field}: fields_data = {} for ontology_name, field in fields.items(): - Loggers.info(LoggerTypes.FIELD_SERVICE, "Removing columns "+str(columns)+" from "+str(ontology_name)+".") + logging.debug("Removing columns "+str(columns)+" from "+str(ontology_name)+".") fields_data[ontology_name] = deepcopy(field).remove_sources(columns) return fields_data def merge_fields(fields:{str:Field}, new_fields:{str:Field}) -> {str:Field}: fields_data = {} for ontology_name, field in fields.items(): - Loggers.info(LoggerTypes.FIELD_SERVICE, "Merging fields for "+str(ontology_name)+".") + logging.debug("Merging fields for "+str(ontology_name)+".") try: fields_data[ontology_name] = deepcopy(field).merge_sources_of_field(new_fields[ontology_name]) except KeyError: - Loggers.error(LoggerTypes.FIELD_SERVICE, "Ontology name "+str(ontology_name)+" not found in new_fields.") + logging.error("Ontology name "+str(ontology_name)+" not found in new_fields.") return fields_data \ No newline at end of file diff --git a/dbrepo-somapper/services/load_service.py b/dbrepo-somapper/services/load_service.py index 2b12d8384bcb5657b9066bbf3e1b854e07471f54..808264c60b9f07adb71344470a60be9bd31eb617 100644 --- a/dbrepo-somapper/services/load_service.py +++ b/dbrepo-somapper/services/load_service.py @@ -2,7 +2,7 @@ from matching.read_inputs import SchemaData, OntologyData, OntologyParser, Ontol import hashlib as hashlib import os.path import pickle as pickle -from logger import LoggerTypes, Loggers +import logging from models.ontology_files_information import OntologyFilesInformation _object_ontology_files_information = None @@ -69,13 +69,13 @@ def load_ontology_data(ontology_parser:OntologyParser, path:str, file:str, ontol pickle_location = os.path.join(path, os.path.splitext(file)[0]+'.pickle') if os.path.isfile(pickle_location): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology from "+str(file)+" is loading from pickle.") + logging.debug("Ontology from "+str(file)+" is loading from pickle.") ontology_data = load_pickle(pickle_location) if ontology_data.is_entity_types_equal(include_only_entities): - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology from "+str(file)+" is loaded from pickle.") + logging.debug("Ontology from "+str(file)+" is loaded from pickle.") return ontology_data, False else: - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology from "+str(file)+" is not loaded from pickle. Entity types "+str(ontology_data.return_available_entity_types())+" != "+str(include_only_entities)+" are not equal.") + logging.debug("Ontology from "+str(file)+" is not loaded from pickle. Entity types "+str(ontology_data.return_available_entity_types())+" != "+str(include_only_entities)+" are not equal.") return create_ontology_data(ontology_parser, path, file, pickle_location, include_only_entities, use_reasoner), True else: return create_ontology_data(ontology_parser, path, file, pickle_location, include_only_entities, use_reasoner), True @@ -86,17 +86,17 @@ def read_ontology_settings(ontology_settings:{str:{str:[str], str:bool}}, ontolo for include_only_entity in ontology_settings[ontology_name]["include"]: include_only_entities.append(OntologyClassTypes[include_only_entity]) if len(include_only_entities) == 0: include_only_entities = None - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology "+str(ontology_name)+" will be loaded with only the entities of type "+str(include_only_entities)+".") + logging.debug("Ontology "+str(ontology_name)+" will be loaded with only the entities of type "+str(include_only_entities)+".") except KeyError: include_only_entities = None - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology "+str(ontology_name)+" will be loaded with all entity types.") + logging.debug("Ontology "+str(ontology_name)+" will be loaded with all entity types.") try: use_reasoner = ontology_settings[ontology_name]["use_reasoner"] - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology "+str(ontology_name)+" will be loaded with reasoner == "+str(use_reasoner)+".") + logging.debug("Ontology "+str(ontology_name)+" will be loaded with reasoner == "+str(use_reasoner)+".") except KeyError: use_reasoner = True - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology "+str(ontology_name)+" will be loaded with reasoner == True.") + logging.debug("Ontology "+str(ontology_name)+" will be loaded with reasoner == True.") return include_only_entities, use_reasoner @@ -104,16 +104,16 @@ def create_ontology_data(ontology_parser:OntologyParser, path:str, file:str, pic ontology_location = os.path.join(path, file) ontology_data = ontology_parser.parse("file://"+ontology_location, _include_only=include_only_entities, _use_reasoner=use_reasoner) save_pickle(ontology_data, pickle_location) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology from "+str(file)+" is initialized and saved as a pickle.") + logging.debug("Ontology from "+str(file)+" is initialized and saved as a pickle.") return ontology_data def save_pickle(ontology_data:OntologyData, pickle_location:str): with open(pickle_location, 'wb') as handle: # Save as pickle. pickle.dump(ontology_data, handle, protocol=pickle.HIGHEST_PROTOCOL) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology "+ str(pickle_location) +" saved as pickle.") + logging.debug("Ontology "+ str(pickle_location) +" saved as pickle.") def load_pickle(pickle_location:str) -> OntologyData: with open(pickle_location, 'rb') as handle: data = pickle.load(handle) - Loggers.info(LoggerTypes.LOAD_SERVICE, "Ontology "+str(pickle_location)+" loaded from pickle.") + logging.debug("Ontology "+str(pickle_location)+" loaded from pickle.") return data diff --git a/dbrepo-somapper/services/match_service.py b/dbrepo-somapper/services/match_service.py index 50afb0887a752c9190cad9243915fe6de8d4d075..7b1c3c8baaa2536dc087919bbd6b0ffab17f8505 100644 --- a/dbrepo-somapper/services/match_service.py +++ b/dbrepo-somapper/services/match_service.py @@ -5,7 +5,7 @@ from matching.embeddings import Embedding from matching.field import Field from matching.match import Match from copy import deepcopy -from logger import LoggerTypes, Loggers +import logging @@ -16,10 +16,10 @@ def init_match_service(methods, object_scoring_type:str, unit_scoring_type:str): global _object_scoring_method global _unit_scoring_method _matcher = Match() - Loggers.info(LoggerTypes.MATCH_SERVICE, "Match is initialized.") + logging.debug("Match is initialized.") _object_scoring_method = methods[object_scoring_type]() _unit_scoring_method = methods[unit_scoring_type]() - Loggers.info(LoggerTypes.MATCH_SERVICE, "Scoring methods are initialized with types Object: "+str(object_scoring_type)+" and Unit:"+str(unit_scoring_type)+".") + logging.debug("Scoring methods are initialized with types Object: "+str(object_scoring_type)+" and Unit:"+str(unit_scoring_type)+".") @@ -30,7 +30,7 @@ def dir_influence_results(target_results:{str:Results}, target_embeddings:{str:E tmp_results = _matcher.execute(field, _unit_scoring_method, source_embedding=averaged_embeddings, target_embeddings=target_embeddings[ontology_name]) new_results[ontology_name] = deepcopy(target_results[ontology_name]).merge(tmp_results.return_results_data()) except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") + logging.error("Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") pass return new_results @@ -42,7 +42,7 @@ def indir_influence_results(target_results:{str:Results}, target_embeddings:{str tmp_results = _matcher.execute(field, _unit_scoring_method, source_embedding=weighted_average_embeddings, target_embeddings=target_embeddings[ontology_name]) new_results[ontology_name] = deepcopy(target_results[ontology_name]).merge(tmp_results.return_results_data()) except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") + logging.error("Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") pass return new_results @@ -55,7 +55,7 @@ def dir_influence_results_schema_reuse(target_results:{str:Results}, target_embe tmp_results = _matcher.execute(field, _unit_scoring_method, source_embedding=mocked_embeddings, target_embeddings=target_embeddings[ontology_name]) new_results[ontology_name] = deepcopy(target_results[ontology_name]).merge_highests(tmp_results.return_results_data()) except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") + logging.error("Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") pass return new_results @@ -67,7 +67,7 @@ def indir_influence_results_schema_reuse(target_results:{str:Results}, target_em tmp_results = _matcher.execute(field, _unit_scoring_method, source_embedding=mocked_embeddings, target_embeddings=target_embeddings[ontology_name]) new_results[ontology_name] = deepcopy(target_results[ontology_name]).merge_highests(tmp_results.return_results_data()) except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") + logging.error("Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") pass return new_results @@ -92,17 +92,17 @@ def old_indirect_influence_results(influencing_results:{str:Results}, influencin proper_sources = list(set(sources) - set(exclude_sources_filter)) if exclude_sources_filter is not None else sources influencer_targets = SchemaData(influencer_results[specific_influencer_name].unsafe_return_winner_targets(sources_filter=proper_sources)) # S-O[O] winners for ontology_name in influencing_results.keys(): - Loggers.info(LoggerTypes.MATCH_SERVICE, "Indirect influence for "+str(ontology_name)) + logging.debug("Indirect influence for "+str(ontology_name)) try: medium_field = Field(influencer_targets, influencing_ontologies[ontology_name]) # O winners - U match - Loggers.info(LoggerTypes.MATCH_SERVICE, "Indirect influence for "+str(ontology_name)+" with scoring method "+str(type(_unit_scoring_method))+" over "+specific_influencer_name+".") + logging.debug("Indirect influence for "+str(ontology_name)+" with scoring method "+str(type(_unit_scoring_method))+" over "+specific_influencer_name+".") medium_result = _matcher.execute(medium_field, _unit_scoring_method, source_embedding=influencer_embedding, target_embeddings=influencing_embeddings[ontology_name]) new_result = deepcopy(influencing_results[ontology_name]) new_result.get_indirect_influence(influencer_results[specific_influencer_name].return_winner_results_data(ignore_sources=exclude_sources_filter), medium_result.return_results_data()) # S-U[U] gets influence new_results[ontology_name] = new_result except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") + logging.error("Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") pass return new_results @@ -117,7 +117,7 @@ def old_direct_influence_results(influencing_results:{str:Results}, influencing_ influencer_targets = SchemaData(specific_influencer_targets) # S-O[O] selected for ontology_name in influencing_results.keys(): - Loggers.info(LoggerTypes.MATCH_SERVICE, "Direct influence for "+str(ontology_name)+" with scoring method "+str(type(_unit_scoring_method))+" over "+str(specific_influencer_targets)+".") + logging.debug("Direct influence for "+str(ontology_name)+" with scoring method "+str(type(_unit_scoring_method))+" over "+str(specific_influencer_targets)+".") try: medium_field = Field(influencer_targets, influencing_ontologies[ontology_name]) # O selected - U match medium_result = _matcher.execute(medium_field, _unit_scoring_method, source_embedding=influencer_embedding, target_embeddings=influencing_embeddings[ontology_name]) @@ -125,7 +125,7 @@ def old_direct_influence_results(influencing_results:{str:Results}, influencing_ new_result.get_direct_influence(specific_influencing_sources, specific_influencer_targets, medium_result.return_results_data()) # S-U[U] gets influence new_results[ontology_name] = new_result except KeyError: - Loggers.error(LoggerTypes.MATCH_SERVICE, "Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") + logging.error("Ontology name "+str(ontology_name)+" not found in influencing_ontologies.") pass return new_results @@ -136,16 +136,16 @@ def match_all(isObject:bool, user_specific_fields:{str:Field}, _filter_sources:[ if isObject: scoring_methodology = _object_scoring_method else: scoring_methodology = _unit_scoring_method for ontology_name in user_specific_fields.keys(): - Loggers.info(LoggerTypes.MATCH_SERVICE, "Matching for "+str(ontology_name)) + logging.debug("Matching for "+str(ontology_name)) results[ontology_name] = match_specific(scoring_methodology, user_specific_fields, ontology_name, _filter_sources = _filter_sources, _filter_target = _filter_target, source_embedding=source_embedding, target_embeddings=target_embeddings) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Matching for "+str(ontology_name)+" is done.") + logging.debug("Matching for "+str(ontology_name)+" is done.") return results def match_specific(scoring_methodology:Scoring, user_specific_fields:{str:Field}, specific_field_name:str, _filter_sources:[str] = None, _filter_target:str = "", source_embedding:Embedding=None, target_embeddings:{str:Embedding}=None) -> Results: if _filter_sources is not None and _filter_target is not None: user_specific_fields[specific_field_name]._build_matrix(_filter_sources = _filter_sources, _filter_target = _filter_target) - Loggers.info(LoggerTypes.MATCH_SERVICE, "Matching for "+str(specific_field_name)+" with filter sources "+str(_filter_sources)+" filter targets "+str(_filter_target)+".") + logging.debug("Matching for "+str(specific_field_name)+" with filter sources "+str(_filter_sources)+" filter targets "+str(_filter_target)+".") - Loggers.info(LoggerTypes.MATCH_SERVICE, "Matching for "+str(specific_field_name)+" with scoring method "+str(type(scoring_methodology))+".") + logging.debug("Matching for "+str(specific_field_name)+" with scoring method "+str(type(scoring_methodology))+".") return _matcher.execute(user_specific_fields[specific_field_name], scoring_methodology, source_embedding=source_embedding, target_embeddings=target_embeddings[specific_field_name]) \ No newline at end of file diff --git a/dbrepo-somapper/services/result_service.py b/dbrepo-somapper/services/result_service.py index 00137652a41423fa946428aa614e9e1fce955a58..1fc691ec7e02ce799714d3a1e0e75a86e9830ed7 100644 --- a/dbrepo-somapper/services/result_service.py +++ b/dbrepo-somapper/services/result_service.py @@ -1,7 +1,7 @@ from matching.scoring import Results from matching.field import Field from copy import deepcopy -from logger import LoggerTypes, Loggers +import logging @@ -20,7 +20,7 @@ def return_results(results:{str:Results}, exclude_sources_filter:[str]=None, det else: results_data = {} for ontology_name in results.keys(): - Loggers.info(LoggerTypes.RESULT_SERVICE, "Returning results for "+str(ontology_name)+" ignoring the sources"+str(exclude_sources_filter)+".") + logging.debug("Returning results for "+str(ontology_name)+" ignoring the sources"+str(exclude_sources_filter)+".") results_data[ontology_name] = deepcopy(results[ontology_name]).return_self(ignore_sources=exclude_sources_filter, determine_statistics=determine_statistics) return results_data @@ -28,7 +28,7 @@ def return_results(results:{str:Results}, exclude_sources_filter:[str]=None, det def merge_results(results:{str:Results}, new_results:{str:Results}, exclude_sources_filter:[str]=None, include_sources_filter:[str]=None) -> {str:Results}: results_data = {} for ontology_name in results.keys(): - Loggers.info(LoggerTypes.RESULT_SERVICE, "Merging results for "+str(ontology_name)) + logging.debug("Merging results for "+str(ontology_name)) if exclude_sources_filter is not None and include_sources_filter is None: proper_sources = list(set(results[ontology_name].return_sources()) - set(exclude_sources_filter)) elif include_sources_filter is not None and exclude_sources_filter is None: @@ -37,7 +37,7 @@ def merge_results(results:{str:Results}, new_results:{str:Results}, exclude_sour proper_sources = list(set(include_sources_filter) - set(exclude_sources_filter)) else: proper_sources = None - Loggers.info(LoggerTypes.RESULT_SERVICE, "Proper influencing sources for "+str(ontology_name)+" : "+str(proper_sources)+" (=None means all sources).") + logging.debug("Proper influencing sources for "+str(ontology_name)+" : "+str(proper_sources)+" (=None means all sources).") results_data[ontology_name] = deepcopy(results[ontology_name]).merge(new_results[ontology_name].return_results_data(_filter_sources=proper_sources)) return results_data @@ -53,17 +53,17 @@ def return_type_constraints_information(user_specific_fields:{str:Field}, result if "results" in results_information[ontology_name].keys(): # If results_information is coming from results. source_target_data = deepcopy(results_information[ontology_name]["results"]) type_data[ontology_name] = user_specific_fields[ontology_name].return_type_constraints(source_target_data) - Loggers.info(LoggerTypes.RESULT_SERVICE, "Datatypes of "+str(ontology_name)+" : "+str(type_data[ontology_name])) + logging.debug("Datatypes of "+str(ontology_name)+" : "+str(type_data[ontology_name])) elif "winner_results" in results_information[ontology_name].keys(): # If results_information is coming from winner_results. source_target_data = deepcopy(results_information[ontology_name]["winner_results"]) # Convert to results format. for source, target_score_data in source_target_data.items(): source_target_data[source] = [target_score_data] type_data[ontology_name] = user_specific_fields[ontology_name].return_type_constraints(source_target_data) - Loggers.info(LoggerTypes.RESULT_SERVICE, "Datatypes of "+str(ontology_name)+" : "+str(type_data[ontology_name])) + logging.debug("Datatypes of "+str(ontology_name)+" : "+str(type_data[ontology_name])) else: - Loggers.info(LoggerTypes.RESULT_SERVICE, "No results or winner_results is not found in data.") + logging.debug("No results or winner_results is not found in data.") except KeyError: - Loggers.error(LoggerTypes.RESULT_SERVICE, "Ontology name "+str(ontology_name)+" not found in user_specific_fields.") + logging.error("Ontology name "+str(ontology_name)+" not found in user_specific_fields.") pass return type_data @@ -81,10 +81,10 @@ def return_specific_result_information(results:{str:Results}, specific_ontology_ result = results[specific_ontology_name] onto_data["results"] = result.return_results_data(_filter_sources=_source_filter, _filter_targets=_target_filter) onto_data["statistics"] = result.return_statistics() - Loggers.info(LoggerTypes.RESULT_SERVICE, "Returning results for "+str(specific_ontology_name)+" as "+str(onto_data)+".") + logging.debug("Returning results for "+str(specific_ontology_name)+" as "+str(onto_data)+".") return onto_data except KeyError: - Loggers.error(LoggerTypes.RESULT_SERVICE, "Ontology name "+str(specific_ontology_name)+" not found in results.") + logging.error("Ontology name "+str(specific_ontology_name)+" not found in results.") return onto_data def return_winner_results_information(results:{str:Results}, source_filters:[str]=None) -> {str:dict}: @@ -96,11 +96,11 @@ def return_winner_results_information(results:{str:Results}, source_filters:[str def return_specific_winner_result_information(results:{str:Results}, specific_ontology_name:str, source_filters:[str]=None) -> {"winner_results":{str:(str, float)}, "statistics":(float, float)}: onto_data = {} try: - Loggers.info(LoggerTypes.RESULT_SERVICE, "Returning winner results for "+str(specific_ontology_name)+".") + logging.debug("Returning winner results for "+str(specific_ontology_name)+".") result = results[specific_ontology_name] onto_data["winner_results"] = result.return_winner_results_data(source_filters=source_filters) onto_data["statistics"] = result.return_statistics() return onto_data except KeyError: - Loggers.info(LoggerTypes.RESULT_SERVICE, "Ontology name "+str(specific_ontology_name)+" not found in results.") + logging.debug("Ontology name "+str(specific_ontology_name)+" not found in results.") return onto_data \ No newline at end of file