Skip to content
Snippets Groups Projects
Verified Commit ac252a16 authored by Martin Weise's avatar Martin Weise
Browse files

Updated lib

parent 932bdc48
No related branches found
No related tags found
1 merge request!386WIP
This commit is part of merge request !386. Comments created here will be created in the context of that merge request.
......@@ -11,7 +11,7 @@ from dbrepo.api.dto import *
from dbrepo.api.exceptions import ResponseCodeError, NotExistsError, \
ForbiddenError, MalformedError, NameExistsError, QueryStoreError, ExternalSystemError, \
AuthenticationError, FormatNotAvailable, RequestError, ServiceError, ServiceConnectionError
from dbrepo.api.mapper import query_to_subset
from dbrepo.api.mapper import query_to_subset, dataframe_to_table_definition
logging.basicConfig(format='%(asctime)s %(name)-12s %(levelname)-6s %(message)s', level=logging.INFO,
stream=sys.stdout)
......@@ -463,9 +463,8 @@ class RestClient:
raise ResponseCodeError(
f'Failed to update database schema: response code: {response.status_code} is not 200 (OK)')
def create_table(self, database_id: str, name: str, is_public: bool, is_schema_public: bool,
columns: List[CreateTableColumn], constraints: CreateTableConstraints,
description: str = None) -> TableBrief:
def create_table(self, database_id: str, name: str, is_public: bool, is_schema_public: bool, dataframe: DataFrame,
description: str = None, with_data: bool = True) -> TableBrief:
"""
Updates the database owner of a database with given database id.
......@@ -473,9 +472,9 @@ class RestClient:
:param name: The name of the created table.
:param is_public: The visibility of the data. If set to true the data will be publicly visible.
:param is_schema_public: The visibility of the schema metadata. If set to true the schema metadata will be publicly visible.
:param constraints: The constraints of the created table.
:param columns: The columns of the created table.
:param dataframe: The `pandas` dataframe.
:param description: The description of the created table. Optional.
:param with_data: If set to `True`, the data will be included in the new table. Optional. Default: `True`.
:returns: The table, if successful.
......@@ -488,12 +487,18 @@ class RestClient:
:raises ResponseCodeError: If something went wrong with the creation.
"""
url = f'/api/database/{database_id}/table'
columns, constraints = dataframe_to_table_definition(dataframe)
response = self._wrapper(method="post", url=url, force_auth=True,
payload=CreateTable(name=name, is_public=is_public, is_schema_public=is_schema_public,
description=description, columns=columns, constraints=constraints))
if response.status_code == 201:
body = response.json()
return TableBrief.model_validate(body)
table = TableBrief.model_validate(body)
if with_data:
self.import_table_data(database_id=database_id,
table_id=table.id,
dataframe=dataframe.reset_index())
return table
if response.status_code == 400:
raise MalformedError(f'Failed to create table: {response.text}')
if response.status_code == 403:
......@@ -919,9 +924,9 @@ class RestClient:
:raises ResponseCodeError: If something went wrong with the insert.
"""
url = f'/api/upload'
buffer = BytesIO()
dataframe.to_csv(path_or_buf=buffer, header=False, index=False)
dataframe.to_csv(path_or_buf=buffer, header=True, index=False)
url = f'/api/upload'
response = self._wrapper(method="post", url=url, force_auth=True,
files={'file': ('dataframe.csv', buffer.getvalue())})
if response.status_code == 201:
......@@ -949,8 +954,8 @@ class RestClient:
url = f'/api/database/{database_id}/table/{table_id}/data/import'
response = self._wrapper(method="post", url=url, force_auth=True,
payload=Import(location=self._upload(dataframe), separator=',', quote='"',
header=True, line_termination='\n'))
payload=Import(location=self._upload(dataframe), separator=',', quote='"', header=True,
line_termination='\n'))
if response.status_code == 202:
return
if response.status_code == 400:
......
from dbrepo.api.dto import Subset, QueryDefinition, Database, Table, Image, Filter, Order
import logging
import pandas
from numpy import dtype
from pandas import DataFrame, Series
from dbrepo.api.dto import Subset, QueryDefinition, Database, Table, Image, Filter, Order, CreateTableColumn, \
CreateTableConstraints, ColumnType
from dbrepo.api.exceptions import MalformedError
......@@ -38,3 +45,78 @@ def query_to_subset(database: Database, image: Image, query: QueryDefinition) ->
raise MalformedError(f'Failed to create view: order column name not found in database')
orders.append(Order(column_id=order_column_ids[0], direction=order.direction))
return Subset(table_id=tables[0].id, columns=filtered_column_ids, filter=filters, order=orders)
def dataframe_to_table_definition(dataframe: DataFrame) -> ([CreateTableColumn], CreateTableConstraints):
if dataframe.index.name is None:
raise MalformedError(f'Failed to map dataframe: index not set')
constraints = CreateTableConstraints(uniques=[],
checks=[],
foreign_keys=[],
primary_key=dataframe.index.names)
dataframe = dataframe.reset_index()
columns = []
for name, series in dataframe.items():
column = CreateTableColumn(name=str(name),
type=ColumnType.TEXT,
null_allowed=contains_null(dataframe[name]))
if series.dtype == dtype('float64'):
if pandas.to_numeric(dataframe[name], errors='coerce').notnull().all():
logging.debug(f"mapped column {name} from float64 to decimal")
column.type = ColumnType.DECIMAL
column.size = 40
column.d = 20
else:
logging.debug(f"mapped column {name} from float64 to text")
column.type = ColumnType.TEXT
elif series.dtype == dtype('int64'):
min_val = min(dataframe[name])
max_val = max(dataframe[name])
if 0 <= min_val <= 1 and 0 <= max_val <= 1 and 'id' not in name:
logging.debug(f"mapped column {name} from int64 to bool")
column.type = ColumnType.BOOL
columns.append(column)
continue
logging.debug(f"mapped column {name} from int64 to bigint")
column.type = ColumnType.BIGINT
elif series.dtype == dtype('O'):
try:
pandas.to_datetime(dataframe[name], format='mixed')
if dataframe[name].str.contains(':').any():
logging.debug(f"mapped column {name} from O to timestamp")
column.type = ColumnType.TIMESTAMP
columns.append(column)
continue
logging.debug(f"mapped column {name} from O to date")
column.type = ColumnType.DATE
columns.append(column)
continue
except ValueError:
pass
max_size = max(dataframe[name].astype(str).map(len))
if max_size <= 1:
logging.debug(f"mapped column {name} from O to char")
column.type = ColumnType.CHAR
column.size = 1
if 0 <= max_size <= 255:
logging.debug(f"mapped column {name} from O to varchar")
column.type = ColumnType.VARCHAR
column.size = 255
else:
logging.debug(f"mapped column {name} from O to text")
column.type = ColumnType.TEXT
elif series.dtype == dtype('bool'):
logging.debug(f"mapped column {name} from bool to bool")
column.type = ColumnType.BOOL
elif series.dtype == dtype('datetime64'):
logging.debug(f"mapped column {name} from datetime64 to datetime")
column.type = ColumnType.DATETIME
else:
logging.warning(f'default to \'text\' for column {name} and type {dtype}')
columns.append(column)
return columns, constraints
def contains_null(dataframe: DataFrame) -> bool:
if '\\N' in dataframe.values:
return True
return dataframe.isnull().values.any()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment