Skip to content
Snippets Groups Projects
Commit c5813785 authored by Cornelia Michlits's avatar Cornelia Michlits
Browse files

rm database-service requests, no auth. needed, logging

parent 327db9c3
Branches
Tags
1 merge request!103241 modify bstat
This commit is part of merge request !103. Comments created here will be created in the context of that merge request.
......@@ -123,8 +123,8 @@ def determinepk():
res = {"success": False, "message": str(e)}
return Response(res, mimetype="application/json"), 500
@app.route('/api/analyse/update_mdb_col', methods=["POST"], endpoint='mdb_basicstatistic')
@swag_from('/as-yml/updatecol.yml')
@app.route('/api/mdb/update_mdb_col', methods=["POST"], endpoint='mdb_basicstatistic')
@swag_from('as-yml/bstatistic.yml')
def mdb_basicstatistic():
input_json = request.get_json()
try:
......
summary: "Add basic statistics, i.e., determine min, max, values of numerical columns..., and save to metadatabase"
description: "Updates entity mdb_columns and mdb_columns_num (columns with numerical values), mdb_columns_nom (columns
nominal values) and mdb_columns_cat (columns with categorical values, e.g. ENUM datatypes) in metadatabase"
consumes:
- "application/json"
produces:
- "application/json"
parameters:
- in: "body"
name: "body"
description: "Updates entity mdb_columns, mdb_columns_nom (attribute max_length), mdb_columns_num (min, max, mean, sd, histogram) and mdb_columns_cat (num_cat, cat_array). The attribute 'histogram' describes a equi-width histogram with a fix number of 10 buckets. The last value in this numeric array is the width of one bucket. The attribute cat_array contains an array with the names of the categories."
required: true
schema:
type: "object"
properties:
dbid:
type: "integer"
example: 1
tid:
type: "integer"
example: 1
responses:
200:
description: "OK"
405:
description: "Invalid input"
409:
description: "Conflict"
\ No newline at end of file
......@@ -5,84 +5,54 @@ import requests
import json
def update_bstatistic(dbid, tid):
logging.info("called update_bstatistic")
# Get database info
try:
s = requests.get(
"http://fda-database-service:9092/api/database/",
params = {"id":dbid},
headers=headers
).json()
logging.info("s", s)
except Exception as e:
logging.error("Error while trying to get database info",e)
# Get tablename by dbid and tid
try:
tbl_info = requests.get(
"http://fda-table-service:9094/api/database/{0}/table/{1}/".format(dbid,tid), headers=headers).json()
logging.info(tbl_info)
tbl_name = tbl_info['internalName']
except Exception as e:
logging.error("Error:", e)
logging.info("tbl name: " + tbl_name)
nomdtlist = ['text', 'character varying', 'varchar', 'char']
numdtlist = ['number','decimal', 'numeric','bigint', 'integer', 'smallint', 'real', 'double precision', 'timestamp']
numdtlist = ['number', 'decimal', 'numeric', 'bigint', 'integer', 'smallint', 'real', 'double precision',
'timestamp']
catdtlist = ['boolean', 'enum', 'date', 'user-defined']
# Get columnname
logging.info("called update_bstatistic")
logging.info("update nominal columns")
# Get database info
try:
# Connecting to metadatabase
conn = connect(
dbname="fda",
user="postgres",
host="fda-metadata-db",
password="postgres"
)
# Compare columns datatype
cursor = conn.cursor()
cursor.execute("SELECT cDBID, tID, ID FROM mdb_columns where cDBID=%s and tID=%s and lower(datatype) in %s ", dbid, tid, lst2mariadbarr(nomdtlist))
cursor.fetchall()
logging.info(cursor.rowcount())
for row in cursor:
logging.info("insert mdb_columns_nom: " + row)
cursor.execute("SELECT cDBID, tID, ID FROM mdb_columns where cDBID=%s and tID=%s and lower(datatype) in (%s, %s, %s, %s) ",
(dbid, tid, 'text', 'character varying', 'varchar', 'char'))
nom = cursor.fetchall()
logging.info(list(enumerate(nom)))
for num, row in enumerate(nom):
logging.info("insert mdb_columns_nom: "+ '('+str(row[0])+','+str(row[1])+','+str(row[2])+')')
insert_mdb_nomcol(row[0], row[1], row[2])
cursor.close()
cursor = conn.cursor()
cursor.execute("SELECT cDBID, tID, ID FROM mdb_columns where cDBID=%s and tID=%s and lower(datatype) in %s ", dbid, tid, lst2mariadbarr(numdtlist))
for row in cursor.fetchall():
logging.info("insert mdb_columns_num: " + row)
conn.commit()
logging.info("update numerical columns")
cursor.execute("""SELECT cDBID, tID, ID FROM mdb_columns where cDBID=%s and tID=%s and lower(datatype) in (%s,%s,%s,%s,%s,%s,%s)
and (lower(cname)!='id')""",
(dbid, tid, 'number', 'decimal', 'numeric', 'bigint', 'integer', 'smallint', 'real'))
numc = cursor.fetchall()
for num, row in enumerate(numc):
logging.info("insert mdb_columns_num: "+ '('+str(row[0])+','+str(row[1])+','+str(row[2])+')')
insert_mdb_numcol(row[0], row[1], row[2])
cursor.close()
cursor = conn.cursor()
cursor.execute("SELECT cDBID, tID, ID FROM mdb_columns where cDBID=%s and tID=%s and lower(datatype) in %s ", dbid, tid,
lst2mariadbarr(catdtlist))
conn.commit()
logging.info("update nominal columns")
cursor.execute("SELECT cDBID, tID, ID FROM mdb_columns where cDBID=%s and tID=%s and lower(datatype) in (%s,%s,%s,%s) ",
(dbid, tid, 'boolean', 'enum', 'date', 'user-defined'))
for row in cursor.fetchall():
logging.info("insert mdb_columns_nom_cat: " + row)
logging.info("insert mdb_columns_nom_cat: " + '('+str(row[0])+','+str(row[1])+','+str(row[2])+')')
insert_mdb_catcol(row[0], row[1], row[2])
conn.commit()
cursor.close()
cursor = conn.cursor()
cursor.execute("SELECT value FROM mdb_images_environment_item where key=%s",'ROOT')
value=cursor.fetchone()
except Exception as e:
print("Error while trying to get cname from mdb",e)
# Conneting to database
try:
engine = create_engine('mysql+pymysql://root:'+str(value)+'@dbrepo-userdb-'+s[0]['internalName'].replace('_', '-')+'/'+s[0]['internalName'])
sql = text("""SELECT column_name, columns.data_type, columns.ordinal_position, is_nullable
from information_schema.columns
where columns.table_name= :tblname and column_name=:colname""")
with engine.begin() as conn:
res = conn.execute(sql, tblname=tbl_name,colname=cname).fetchone()
except Exception as e:
print("Error while connecting to database.", e)
logging.error("error while trying to update_bstatistics",e)
def insert_mdb_nomcol(dbid, tid, cid):
# Connecting to metadatabase - to obtain column name
# Connecting to metadatabase - to obtain column name""
logging.info("get nominal columns")
try:
conn = connect(
dbname="fda",
......@@ -92,37 +62,39 @@ def insert_mdb_nomcol(dbid, tid, cid):
)
cursor = conn.cursor()
cursor.execute("select cDBID,tID,ID,internal_name from mdb_columns where cdbid = %s and tid = %s and cid =%s",
cursor.execute(
"SELECT internal_name FROM mdb_databases where ID=%s", (dbid,))
s = cursor.fetchone()
conn.commit()
cursor.execute("SELECT internal_name FROM mdb_tables where tDBID=%s and ID=%s",(dbid, tid))
t = cursor.fetchone()
conn.commit()
cursor.execute("select cDBID,tID,ID,internal_name from mdb_columns where cDBID = %s and tID = %s and ID =%s",
(dbid, tid, cid))
res = cursor.fetchall()
conn.commit()
cname = res[0][3]
cursor.execute("SELECT value FROM mdb_images_environment_item where key=%s", ('MARIADB_ROOT_PASSWORD',))
value = cursor.fetchone()
conn.commit()
cursor.close()
logging.info("nominal column: " + str(cname))
except Exception as e:
print("Error while connecting to metadatabase.", e)
print("error while inserting into mdb_columns_nom.", e)
logging.info(res)
# Connect to database - to obtain max_length
try:
s = requests.get(
"http://fda-database-service:9092/api/database/",
params={"id": dbid}
).json()
except Exception as e:
print("Error while trying to get database info", e)
try:
tbl_info = requests.get(
"http://fda-table-service:9094/api/database/{0}/table/{1}/".format(dbid, tid)).json()
tbl_name = tbl_info['internalName']
except Exception as e:
print("Error:", e)
try:
engine = create_engine('mysql+pymysql://root:'+str(value)+'@dbrepo-userdb-'+s[0]['internalName'].replace('_', '-')+'/'+s[0]['internalName'])
logging.info("determine max_length in :"+'userdb-'+str(s[0]))
engine = create_engine('mysql+pymysql://root:'+str(value[0])+'@dbrepo-userdb-'+s[0].replace('_', '-')+'/'+s[0])
sql = text("select max(char_length(" + cname + ")) from " + tbl_name)
sql = text("select max(char_length(" + cname + ")) from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
maxlen = res[0]
except Exception as e:
print("Error while connecting to database", e)
logging.error("error while connecting to userdb", e)
try:
conn = connect(
dbname="fda",
......@@ -139,13 +111,14 @@ def insert_mdb_nomcol(dbid, tid, cid):
ret = cursor.statusmessage
conn.commit()
cursor.close()
except Exception as e:
print("Error while inserting into metadatabase", e)
print("error while inserting into fda-metadata-db", e)
return json.dumps(ret)
def insert_mdb_numcol(dbid, tid, cid):
# Connecting to metadatabase to obtain columnname
logging.info("get numerical columns")
try:
conn = connect(
dbname="fda",
......@@ -155,25 +128,24 @@ def insert_mdb_numcol(dbid, tid, cid):
)
cursor = conn.cursor()
cursor.execute("select cDBID,tID,ID,cName from mdb_columns where cdbid = %s and tid = %s and cid =%s",
cursor.execute(
"SELECT internal_name FROM mdb_databases where ID=%s", (dbid,))
s = cursor.fetchone()
conn.commit()
cursor.execute("SELECT internal_name FROM mdb_tables where tDBID=%s and ID=%s", (dbid, tid))
t = cursor.fetchone()
conn.commit()
cursor.execute("select cDBID,tID,ID,internal_name from mdb_columns where cdbid = %s and tid = %s and id =%s",
(dbid, tid, cid))
res = cursor.fetchall()
cname = res[0][3]
cursor.execute("SELECT value FROM mdb_images_environment_item where key=%s", ('MARIADB_ROOT_PASSWORD',))
value = cursor.fetchone()
conn.commit()
cursor.close()
except Exception as e:
print("Error while connecting to metadatabase.", e)
try:
s = requests.get(
"http://fda-database-service:9092/api/database/",
params={"id": dbid}
).json()
except Exception as e:
print("Error while trying to get database info", e)
try:
tbl_info = requests.get(
"http://fda-table-service:9094/api/database/{0}/table/{1}/".format(dbid, tid)).json()
tbl_name = tbl_info['internalName']
except Exception as e:
print("Error:", e)
print("error while connecting to fda-metadata-db.", e)
# Determine min, max, ...
try:
# Postgres engine
......@@ -181,30 +153,29 @@ def insert_mdb_numcol(dbid, tid, cid):
# 'postgresql+psycopg2://postgres:postgres@fda-userdb-' + s[0]['internalName'].replace('_', '-') + '/' + s[0][
# 'internalName'])
# Mariadb engine
engine = create_engine(
'mysql+pymysql://root:'+str(value)+'@dbrepo-userdb-' + s[0]['internalName'].replace('_', '-') + '/' + s[0][
'internalName'])
engine = create_engine('mysql+pymysql://root:'+str(value[0])+'@dbrepo-userdb-'+s[0].replace('_', '-')+'/'+s[0])
logging.info("determine min, max, mean, ... in :" + 'userdb-' + str(s[0]))
# min
sql = text("select min(" + cname + ") from " + tbl_name)
sql = text("select min(" + cname + ") from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
minval = res[0]
# max
sql = text("select max(" + cname + ") from " + tbl_name)
sql = text("select max(" + cname + ") from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
maxval = res[0]
# mean
sql = text("select avg(" + cname + ") from " + tbl_name)
sql = text("select avg(" + cname + ") from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
meanval = res[0]
# sd
sql = text("select stddev(" + cname + ") from " + tbl_name)
sql = text("select stddev(" + cname + ") from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
sdval = float(res[0])
......@@ -215,7 +186,7 @@ def insert_mdb_numcol(dbid, tid, cid):
# sql = text("select " + cname + "from " + tbl_name + "where rand() <= 0.3")
width_bucket = (maxval - minval + 1) / num_buckets
for i in range(0, num_buckets):
sql = text("select count(*) from " + tbl_name + " where " + cname + " >= " + str(
sql = text("select count(*) from " + t[0] + " where " + cname + " >= " + str(
minval + i * width_bucket) + " and " + cname + " < " + str(minval + (i + 1) * width_bucket))
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
......@@ -225,7 +196,7 @@ def insert_mdb_numcol(dbid, tid, cid):
histpgarr = lst2pgarr(hist_lst)
except Exception as e:
print("Error while connecting to database", e)
print("error while connecting to userdb", e)
# Insert / update values in metadata-db
try:
conn = connect(
......@@ -239,21 +210,21 @@ def insert_mdb_numcol(dbid, tid, cid):
cursor = conn.cursor()
cursor.execute("""Insert into mdb_columns_num (cdbid,tid,cid,minval,maxval,mean,sd,histogram,last_modified)
values (%s,%s,%s,%s,%s,%s,%s,%s,%s,current_timestamp)
values (%s,%s,%s,%s,%s,%s,%s,%s,current_timestamp)
ON CONFLICT (cdbid,tid,cid) do update set
(minval,maxval,mean,sd,histogram,last_modified) = (%s,%s,%s,%s,%s,%s,current_timestamp)""",
(minval,maxval,mean,sd,histogram,last_modified) = (%s,%s,%s,%s,%s,current_timestamp)""",
(dbid, tid, cid, minval, maxval, meanval, sdval, histpgarr, minval, maxval, meanval, sdval,
histpgarr,))
histpgarr))
ret = cursor.statusmessage
conn.commit()
except Exception as e:
print("Error while inserting into metadatabase", e)
print("error while inserting into fda-metadata-db", e)
return json.dumps(ret)
def insert_mdb_catcol(dbid, tid, cid):
# Connecting to metadatabase to obtain columnname
logging.info("get categorical columns")
try:
conn = connect(
dbname="fda",
......@@ -263,25 +234,24 @@ def insert_mdb_catcol(dbid, tid, cid):
)
cursor = conn.cursor()
cursor.execute("select cDBID,tID,ID,cname from mdb_columns where cdbid = %s and tid = %s and id =%s",
cursor.execute(
"SELECT internal_name FROM mdb_databases where ID=%s", (dbid,))
s = cursor.fetchone()
conn.commit()
cursor.execute("SELECT internal_name FROM mdb_tables where tDBID=%s and ID=%s", (dbid, tid))
t = cursor.fetchone()
conn.commit()
cursor.execute("select cDBID,tID,ID,internal_name from mdb_columns where cdbid = %s and tid = %s and id =%s",
(dbid, tid, cid))
res = cursor.fetchall()
cname = res[0][3]
cursor.execute("SELECT value FROM mdb_images_environment_item where key=%s", ('MARIADB_ROOT_PASSWORD',))
value = cursor.fetchone()
conn.commit()
cursor.close()
except Exception as e:
print("Error while connecting to metadatabase.", e)
try:
s = requests.get(
"http://fda-database-service:9092/api/database/",
params={"id": dbid}
).json()
except Exception as e:
print("Error while trying to get database info", e)
try:
tbl_info = requests.get(
"http://fda-table-service:9094/api/database/{0}/table/{1}/".format(dbid, tid)).json()
tbl_name = tbl_info['internalName']
except Exception as e:
print("Error:", e)
print("error while connecting to fda-metadata-db.", e)
# Determine number of categories, categories array
try:
# Postgres engine
......@@ -289,18 +259,17 @@ def insert_mdb_catcol(dbid, tid, cid):
# 'postgresql+psycopg2://postgres:postgres@fda-userdb-' + s[0]['internalName'].replace('_', '-') + '/' + s[0][
# 'internalName'])
# Mariadb engine
engine = create_engine(
'mysql+pymysql://root:'+str(value)+'@dbrepo-userdb-' + s[0]['internalName'].replace('_', '-') + '/' + s[0][
'internalName'])
engine = create_engine('mysql+pymysql://root:'+str(value[0])+'@dbrepo-userdb-'+s[0].replace('_', '-')+'/'+s[0])
logging.info("determine categories in :" + 'userdb-' + str(s[0]))
# num_categories
sql = text("select count( distinct " + cname + ") from " + tbl_name)
sql = text("select count( distinct " + cname + ") from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchone()
num_cat = int(res[0])
# cat_array
sql = text("select distinct " + cname + " from " + tbl_name)
sql = text("select distinct " + cname + " from " + t[0])
with engine.begin() as conn:
res = conn.execute(sql).fetchall()
cat_arr = lst2pgarr(lstflat(res))
......@@ -325,13 +294,11 @@ def insert_mdb_catcol(dbid, tid, cid):
ret = cursor.statusmessage
conn.commit()
cursor.close()
except Exception as e:
print("Error while inserting into metadatabase", e)
print("error while inserting into fda-metadata-db", e)
return json.dumps(ret)
# Useful helper functions
def lst2mariadbarr(lst):
return '(' + ','.join(list(map(lambda str: "'" + str + "'", lst)))+')'
lstflat = lambda x: [item for sublst in x for item in sublst]
lst2pgarr = lambda lst: '{' + ','.join(lst) + '}'
......@@ -403,7 +403,7 @@ CREATE TABLE IF NOT EXISTS mdb_COLUMNS_num
Mean NUMERIC,
Median NUMERIC,
Sd Numeric,
Histogram INTEGER[],
Histogram NUMERIC[],
last_modified timestamp without time zone,
created timestamp without time zone NOT NULL DEFAULT NOW(),
FOREIGN KEY (cDBID, tID, cID) REFERENCES mdb_COLUMNS (cDBID, tID, ID),
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment