diff --git a/.docs/api/python.md b/.docs/api/python.md index ab6b2b69a6b6ca9606232eca534686bc6c0b71ab..3f1a8928542dd01b16e549482215f35fecf2a3ea 100644 --- a/.docs/api/python.md +++ b/.docs/api/python.md @@ -33,89 +33,78 @@ This package supports Python 3.11+. ## Quickstart -Create a table and import a .csv file from your computer. +Get public data from a table as pandas `DataFrame`: ```python from dbrepo.RestClient import RestClient -from dbrepo.api.dto import CreateTableColumn, ColumnType, CreateTableConstraints -client = RestClient(endpoint='https://test.dbrepo.tuwien.ac.at', username="foo", - password="bar") - -# analyse csv -analysis = client.analyse_datatypes(file_path="sensor.csv", separator=",") -print(f"Analysis result: {analysis}") -# -> columns=(date=date, precipitation=decimal, lat=decimal, lng=decimal), separator=, -# line_termination=\n - -# create table -table = client.create_table(database_id=1, - name="Sensor Data", - constraints=CreateTableConstraints(checks=['precipitation >= 0'], - uniques=[['precipitation']]), - columns=[CreateTableColumn(name="date", - type=ColumnType.DATE, - dfid=3, # YYYY-MM-dd - primary_key=True, - null_allowed=False), - CreateTableColumn(name="precipitation", - type=ColumnType.DECIMAL, - size=10, - d=4, - primary_key=False, - null_allowed=True), - CreateTableColumn(name="lat", - type=ColumnType.DECIMAL, - size=10, - d=4, - primary_key=False, - null_allowed=True), - CreateTableColumn(name="lng", - type=ColumnType.DECIMAL, - size=10, - d=4, - primary_key=False, - null_allowed=True)]) -print(f"Create table result {table}") -# -> (id=1, internal_name=sensor_data, ...) - -client.import_table_data(database_id=1, table_id=1, file_path="sensor.csv", separator=",", - skip_lines=1, line_encoding="\n") -print(f"Finished.") +client = RestClient(endpoint="https://dbrepo1.ec.tuwien.ac.at") +# Get a small data slice of just three rows +df = client.get_table_data(database_id=7, table_id=13, page=0, size=3, df=True) +print(df) +# x_coord component unit ... value stationid meantype +# 0 16.52617 Feinstaub (PM10) µg/m³ ... 21.0 01:0001 HMW +# 1 16.52617 Feinstaub (PM10) µg/m³ ... 23.0 01:0001 HMW +# 2 16.52617 Feinstaub (PM10) µg/m³ ... 26.0 01:0001 HMW +# +# [3 rows x 12 columns] ``` -The library is well-documented, please see the [full documentation](../sphinx) or -the [PyPI page](https://pypi.org/project/dbrepo/). +Import data into a table: -## Supported Features & Best-Practices +```python +import pandas as pd +from dbrepo.RestClient import RestClient -- Manage user account ([docs](../usage-overview/#create-user-account)) -- Manage databases ([docs](../usage-overview/#create-database)) -- Manage database access & visibility ([docs](../usage-overview/#private-database-access)) -- Import dataset ([docs](../usage-overview/#private-database-access)) -- Create persistent identifiers ([docs](../usage-overview/#assign-database-pid)) -- Execute queries ([docs](../usage-overview/#export-subset)) -- Get data from tables/views/subsets +client = RestClient(endpoint="https://dbrepo1.ec.tuwien.ac.at", username="foo", + password="bar") +df = pd.DataFrame(data={'x_coord': 16.52617, 'component': 'Feinstaub (PM10)', + 'unit': 'µg/m³', ...}) +client.import_table_data(database_id=7, table_id=13, file_name_or_data_frame=df) +``` -## Secrets +## Supported Features & Best-Practices -It is not recommended to store credentials directly in the notebook as they will be versioned with git, etc. Use -environment variables instead: +- Manage user + account ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#create-user-account)) +- Manage + databases ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#create-database)) +- Manage database access & + visibility ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#create-database)) +- Import + dataset ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#import-dataset)) +- Create persistent + identifiers ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#assign-database-pid)) +- Execute + queries ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#export-subset)) +- Get data from tables/views/subsets -```properties title=".env" -DBREPO_ENDPOINT=https://test.dbrepo.tuwien.ac.at -DBREPO_USERNAME=foo -DBREPO_PASSWORD=bar -DBREPO_SECURE=True +## Configure + +All credentials can optionally be set/overridden with environment variables. This is especially useful when sharing +Jupyter Notebooks by creating an invisible `.env` file and loading it: + +``` title=".env" +REST_API_ENDPOINT="https://dbrepo1.ec.tuwien.ac.at" +REST_API_USERNAME="foo" +REST_API_PASSWORD="bar" +REST_API_SECURE="True" +AMQP_API_HOST="https://dbrepo1.ec.tuwien.ac.at" +AMQP_API_PORT="5672" +AMQP_API_USERNAME="foo" +AMQP_API_PASSWORD="bar" +AMQP_API_VIRTUAL_HOST="dbrepo" +REST_UPLOAD_ENDPOINT="https://dbrepo1.ec.tuwien.ac.at/api/upload/files" ``` -Then use the default constructor of the `RestClient` to e.g. analyse a CSV. Your secrets are automatically passed: +You can disable logging by setting the log level to e.g. `INFO`: -```python title="analysis.py" +```python from dbrepo.RestClient import RestClient - -client = RestClient() -analysis = client.analyse_datatypes(file_path="sensor.csv", separator=",") +import logging +logging.getLogger().setLevel(logging.INFO) +... +client = RestClient(...) ``` ## Future diff --git a/.docs/examples/xps-data.md b/.docs/examples/xps-data.md index 539326347bbc6b3cce51f0733309399add6aa0fb..9b237c8c6c8ec2be919751475f810605a9ee1925 100644 --- a/.docs/examples/xps-data.md +++ b/.docs/examples/xps-data.md @@ -18,10 +18,11 @@ connecting material properties to compositions via XPS spectra becomes evident. We read XPS data from the VAMAS-encoded format and inserted it into a [database schema](https://gitlab.tuwien.ac.at/fairdata/xps/-/blob/e17860399b1b109c72b01888766f37193dde5870/sql/create_schema.sql) -that captures the VAMAS-schema. It can then be read using the [Python Library](../../api/python). +that captures the VAMAS-schema. It can then be read using the Python Library that executes a database query in SQL to +obtain only the experiment data (c.f. [subset page](https://dbrepo1.ec.tuwien.ac.at/database/27/subset/10/info)). <figure markdown> -{ .img-border } +{ .img-border } <figcaption>Figure 1: Jupyter Notebook accessing data on DBRepo using the Python Library.</figcaption> </figure> diff --git a/.docs/images/screenshots/xps-chart.png b/.docs/images/screenshots/xps-chart.png index b7b52a0805161779adb7cbcb6617c155702686e3..c064c2f2feefeafe831b3d628f5f9bdd63ac6136 100644 Binary files a/.docs/images/screenshots/xps-chart.png and b/.docs/images/screenshots/xps-chart.png differ diff --git a/.docs/images/screenshots/xps-notebook.png b/.docs/images/screenshots/xps-notebook.png new file mode 100644 index 0000000000000000000000000000000000000000..a1031b751991eb41d66c1c291f921f13f7cd5304 Binary files /dev/null and b/.docs/images/screenshots/xps-notebook.png differ diff --git a/lib/python/README.md b/lib/python/README.md index c8785a2e84153752e2e3020b6dfb56c3a119ce04..35fec904439b74e2d1f91d04a19f15615a0ca372 100644 --- a/lib/python/README.md +++ b/lib/python/README.md @@ -15,72 +15,50 @@ This package supports Python 3.11+. ## Quickstart -Create a table and import a .csv file from your computer. +Get public data from a table as pandas `DataFrame`: ```python from dbrepo.RestClient import RestClient -from dbrepo.api.dto import CreateTableColumn, ColumnType, CreateTableConstraints -client = RestClient(endpoint='https://test.dbrepo.tuwien.ac.at', username="foo", - password="bar") +client = RestClient(endpoint="https://dbrepo1.ec.tuwien.ac.at") +# Get a small data slice of just three rows +df = client.get_table_data(database_id=7, table_id=13, page=0, size=3, df=True) +print(df) +# x_coord component unit ... value stationid meantype +# 0 16.52617 Feinstaub (PM10) µg/m³ ... 21.0 01:0001 HMW +# 1 16.52617 Feinstaub (PM10) µg/m³ ... 23.0 01:0001 HMW +# 2 16.52617 Feinstaub (PM10) µg/m³ ... 26.0 01:0001 HMW +# +# [3 rows x 12 columns] +``` + +Import data into a table: + +```python +import pandas as pd +from dbrepo.RestClient import RestClient -# analyse csv -analysis = client.analyse_datatypes(file_path="sensor.csv", separator=",") -print(f"Analysis result: {analysis}") -# -> columns=(date=date, precipitation=decimal, lat=decimal, lng=decimal), separator=, -# line_termination=\n - -# create table -table = client.create_table(database_id=1, - name="Sensor Data", - constraints=CreateTableConstraints( - checks=['precipitation >= 0'], - uniques=[['precipitation']]), - columns=[CreateTableColumn(name="date", - type=ColumnType.DATE, - dfid=3, # YYYY-MM-dd - primary_key=True, - null_allowed=False), - CreateTableColumn(name="precipitation", - type=ColumnType.DECIMAL, - size=10, - d=4, - primary_key=False, - null_allowed=True), - CreateTableColumn(name="lat", - type=ColumnType.DECIMAL, - size=10, - d=4, - primary_key=False, - null_allowed=True), - CreateTableColumn(name="lng", - type=ColumnType.DECIMAL, - size=10, - d=4, - primary_key=False, - null_allowed=True)]) -print(f"Create table result {table}") -# -> (id=1, internal_name=sensor_data, ...) - -client.import_table_data(database_id=1, table_id=1, file_path="sensor.csv", separator=",", - skip_lines=1, line_encoding="\n") -print(f"Finished.") +client = RestClient(endpoint="https://dbrepo1.ec.tuwien.ac.at", username="foo", + password="bar") +df = pd.DataFrame(data={'x_coord': 16.52617, 'component': 'Feinstaub (PM10)', + 'unit': 'µg/m³', ...}) +client.import_table_data(database_id=7, table_id=13, file_name_or_data_frame=df) ``` ## Supported Features & Best-Practices - Manage user - account ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#create-user-account)) + account ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#create-user-account)) - Manage databases ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#create-database)) - Manage database access & - visibility ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#private-database-access)) + visibility ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#create-database)) - Import - dataset ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#private-database-access)) + dataset ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#import-dataset)) - Create persistent - identifiers ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#assign-database-pid)) + identifiers ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#assign-database-pid)) - Execute - queries ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo//usage-overview/#export-subset)) + queries ([docs](https://www.ifs.tuwien.ac.at/infrastructures/dbrepo/1.4.4/api/#export-subset)) - Get data from tables/views/subsets ## Configure @@ -89,16 +67,26 @@ All credentials can optionally be set/overridden with environment variables. Thi Jupyter Notebooks by creating an invisible `.env` file and loading it: ``` -REST_API_ENDPOINT="https://test.dbrepo.tuwien.ac.at" +REST_API_ENDPOINT="https://dbrepo1.ec.tuwien.ac.at" REST_API_USERNAME="foo" REST_API_PASSWORD="bar" REST_API_SECURE="True" -AMQP_API_HOST="https://test.dbrepo.tuwien.ac.at" +AMQP_API_HOST="https://dbrepo1.ec.tuwien.ac.at" AMQP_API_PORT="5672" AMQP_API_USERNAME="foo" AMQP_API_PASSWORD="bar" -AMQP_API_VIRTUAL_HOST="/" -REST_UPLOAD_ENDPOINT="https://test.dbrepo.tuwien.ac.at/api/upload/files" +AMQP_API_VIRTUAL_HOST="dbrepo" +REST_UPLOAD_ENDPOINT="https://dbrepo1.ec.tuwien.ac.at/api/upload/files" +``` + +You can disable logging by setting the log level to e.g. `INFO`: + +```python +from dbrepo.RestClient import RestClient +import logging +logging.getLogger().setLevel(logging.INFO) +... +client = RestClient(...) ``` ## Roadmap diff --git a/lib/python/dbrepo/RestClient.py b/lib/python/dbrepo/RestClient.py index 54165642fb105cda117f10223b6fd6adffe01053..5b94a2e9a721f08285649f434ed9de0c9fca7652 100644 --- a/lib/python/dbrepo/RestClient.py +++ b/lib/python/dbrepo/RestClient.py @@ -1,6 +1,7 @@ import os import sys import logging +import time import requests from pydantic import TypeAdapter @@ -1098,9 +1099,9 @@ class RestClient: raise ResponseCodeError(f'Failed to insert table data: response code: {response.status_code} is not ' f'201 (CREATED): {response.text}') - def import_table_data(self, database_id: int, table_id: int, separator: str, file_path: str, - quote: str = None, skip_lines: int = 0, false_encoding: str = None, - true_encoding: str = None, null_encoding: str = None, + def import_table_data(self, database_id: int, table_id: int, separator: str, + file_name_or_data_frame: str | DataFrame, quote: str = None, skip_lines: int = 0, + false_encoding: str = None, true_encoding: str = None, null_encoding: str = None, line_encoding: str = "\r\n") -> None: """ Import a csv dataset from a file into a table in a database with given database id and table id. @@ -1108,7 +1109,7 @@ class RestClient: :param database_id: The database id. :param table_id: The table id. :param separator: The csv column separator. - :param file_path: The path of the file that is imported on the storage service. + :param file_name_or_data_frame: The path of the file that is imported on the storage service or pandas dataframe. :param quote: The column data quotation character. Optional. :param skip_lines: The number of lines to skip. Optional. Default: 0. :param false_encoding: The encoding of boolean false. Optional. @@ -1123,6 +1124,12 @@ class RestClient: :raises ResponseCodeError: If something went wrong with the insert. """ client = UploadClient(endpoint=f"{self.endpoint}/api/upload/files") + if type(file_name_or_data_frame) is DataFrame: + file_path: str = f"./tmp-{time.time()}" + df: DataFrame = file_name_or_data_frame + df.to_csv(path_or_buf=file_path, index=False, header=False) + else: + file_path: str = file_name_or_data_frame filename = client.upload(file_path=file_path) url = f'/api/database/{database_id}/table/{table_id}/data/import' response = self._wrapper(method="post", url=url, force_auth=True,