updated table endpint and feature extract commentary

b5a66e4c · Martin Weise · 75911770 · b5a66e4c · b5a66e4c · b5a66e4c
Commit b5a66e4c authored Jun 29, 2022 by Martin Weise
--- a/.invenio/api_query/api/table_data_endpoint_api.py
+++ b/.invenio/api_query/api/table_data_endpoint_api.py
@@ -259,7 +259,7 @@ class TableDataEndpointApi(object):
        auth_settings = []  # noqa: E501
        return self.api_client.call_api(
-            '/api/container/{id}/database/{databaseId}/table/{tableId}/data', 'HEAD',
+            '/api/container/{id}/database/{databaseId}/table/{tableId}/data', 'GET',
            path_params,
            query_params,
            header_params,
@@ -380,7 +380,7 @@ class TableDataEndpointApi(object):
        auth_settings = []  # noqa: E501
        return self.api_client.call_api(
-            '/api/container/{id}/database/{databaseId}/table/{tableId}/data', 'GET',
+            '/api/container/{id}/database/{databaseId}/table/{tableId}/data', 'HEAD',
            path_params,
            query_params,
            header_params,

--- a/.invenio/deposit.ipynb
+++ b/.invenio/deposit.ipynb
@@ -3,7 +3,9 @@
  {
   "cell_type": "markdown",
   "source": [
-    "# Deposit"
+    "# Deposit\n",
+    "\n",
+    "Please create an account at [http://localhost:3000/register](http://localhost:3000/register) with `user:user` before executing."
   ],
   "metadata": {
    "collapsed": false
@@ -81,10 +83,10 @@
      "{'access': {'embargo': {'active': False, 'reason': None, 'until': None},\n",
      "            'files': 'public',\n",
      "            'record': 'public'},\n",
-      " 'created': datetime.datetime(2022, 6, 28, 19, 47, 36, 289193, tzinfo=tzutc()),\n",
+      " 'created': datetime.datetime(2022, 6, 29, 10, 23, 1, 849279, tzinfo=tzutc()),\n",
-      " 'expires_at': datetime.datetime(2022, 6, 28, 19, 47, 36, 289218),\n",
+      " 'expires_at': datetime.datetime(2022, 6, 29, 10, 23, 1, 849305),\n",
      " 'files': {'default_preview': None, 'enabled': True},\n",
-      " 'id': 'a13x6-2wz51',\n",
+      " 'id': 'w37m8-dx896',\n",
      " 'is_published': False,\n",
      " 'metadata': {'creators': [{'affiliations': [{'id': None, 'name': 'TU Wien'}],\n",
      "                            'person_or_org': {'family_name': 'Weise',\n",
@@ -99,7 +101,7 @@
      "              'title': 'Jupyter Notebook Test'},\n",
      " 'pids': {'doi': None},\n",
      " 'revision_id': 4,\n",
-      " 'updated': datetime.datetime(2022, 6, 28, 19, 47, 36, 304744, tzinfo=tzutc()),\n",
+      " 'updated': datetime.datetime(2022, 6, 29, 10, 23, 1, 866243, tzinfo=tzutc()),\n",
      " 'versions': {'index': 1, 'is_latest': False, 'is_latest_draft': True}}\n"
     ]
    }
@@ -212,10 +214,10 @@
      "{'access': {'embargo': {'active': False, 'reason': None, 'until': None},\n",
      "            'files': 'public',\n",
      "            'record': 'public'},\n",
-      " 'created': datetime.datetime(2022, 6, 28, 19, 47, 42, 416073, tzinfo=tzutc()),\n",
+      " 'created': datetime.datetime(2022, 6, 29, 10, 23, 3, 488874, tzinfo=tzutc()),\n",
      " 'expires_at': None,\n",
      " 'files': {'default_preview': None, 'enabled': True},\n",
-      " 'id': 'a13x6-2wz51',\n",
+      " 'id': 'w37m8-dx896',\n",
      " 'is_published': True,\n",
      " 'metadata': {'creators': [{'affiliations': [{'id': None, 'name': 'TU Wien'}],\n",
      "                            'person_or_org': {'family_name': 'Weise',\n",
@@ -229,10 +231,10 @@
      "              'resource_type': {'id': 'other'},\n",
      "              'title': 'Jupyter Notebook Test'},\n",
      " 'pids': {'doi': {'client': 'datacite',\n",
-      "                  'identifier': '10.70124/a13x6-2wz51',\n",
+      "                  'identifier': '10.70124/w37m8-dx896',\n",
      "                  'provider': 'datacite'}},\n",
      " 'revision_id': 3,\n",
-      " 'updated': datetime.datetime(2022, 6, 28, 19, 47, 42, 490667, tzinfo=tzutc()),\n",
+      " 'updated': datetime.datetime(2022, 6, 29, 10, 23, 3, 555616, tzinfo=tzutc()),\n",
      " 'versions': {'index': 1, 'is_latest': True, 'is_latest_draft': True}}\n"
     ]
    }

 %% Cell type:markdown id: tags:
 # Deposit
+Please create an account at [http://localhost:3000/register](http://localhost:3000/register) with `user:user` before executing.
 %% Cell type:code id: tags:
 ``` python
 import os.path
 from os import listdir
 from os.path import isfile, join
 from api_document.api.document_endpoint_api import DocumentEndpointApi
 from api_document.api.file_endpoint_api import FileEndpointApi
 from api_authentication.api.authentication_endpoint_api import AuthenticationEndpointApi
 authentication = AuthenticationEndpointApi()
 document = DocumentEndpointApi()
 file = FileEndpointApi()
 ```
 %% Cell type:markdown id: tags:
 1. Obtain an authentication token
 %% Cell type:code id: tags:
 ``` python
 response = authentication.authenticate_user1({
    "username": "user",
    "password": "user"
 })
 document.api_client.default_headers = {"Authorization": "Bearer " + response.token}
 file.api_client.default_headers = {"Authorization": "Bearer " + response.token}
 ```
 %% Cell type:markdown id: tags:
 2. Create a metadata draft
 %% Cell type:code id: tags:
 ``` python
 response = document.create({
    "access": {
        "record": "public",
        "files": "public"
    },
    "files": {
        "enabled": True
    },
    "metadata": {
        "creators": [
            {
                "affiliations": [
                    {
                        "name": "TU Wien"
                    }
                ],
                "person_or_org": {
                    "type": "personal",
                    "name": "M., Weise",
                    "identifiers": [
                        {
                            "scheme": "orcid",
                            "identifier": "0000-0003-4216-302X"
                        }
                    ],
                    "given_name": "Martin",
                    "family_name": "Weise"
                }
            }
        ],
        "title": "Jupyter Notebook Test",
        "resource_type": {
            "id": "other"
        },
        "publication_date": "2022-06-28"
    }
 })
 document_id = response.id
 print(response)
 ```
 %% Output
    {'access': {'embargo': {'active': False, 'reason': None, 'until': None},
                'files': 'public',
                'record': 'public'},
-     'created': datetime.datetime(2022, 6, 28, 19, 47, 36, 289193, tzinfo=tzutc()),
+     'created': datetime.datetime(2022, 6, 29, 10, 23, 1, 849279, tzinfo=tzutc()),
-     'expires_at': datetime.datetime(2022, 6, 28, 19, 47, 36, 289218),
+     'expires_at': datetime.datetime(2022, 6, 29, 10, 23, 1, 849305),
     'files': {'default_preview': None, 'enabled': True},
-     'id': 'a13x6-2wz51',
+     'id': 'w37m8-dx896',
     'is_published': False,
     'metadata': {'creators': [{'affiliations': [{'id': None, 'name': 'TU Wien'}],
                                'person_or_org': {'family_name': 'Weise',
                                                  'given_name': 'Martin',
                                                  'identifiers': [{'identifier': '0000-0003-4216-302X',
                                                                   'scheme': 'orcid'}],
                                                  'name': 'Weise, Martin',
                                                  'type': 'personal'},
                                'role': None}],
                  'publication_date': datetime.datetime(2022, 6, 28, 0, 0),
                  'resource_type': {'id': 'other'},
                  'title': 'Jupyter Notebook Test'},
     'pids': {'doi': None},
     'revision_id': 4,
-     'updated': datetime.datetime(2022, 6, 28, 19, 47, 36, 304744, tzinfo=tzutc()),
+     'updated': datetime.datetime(2022, 6, 29, 10, 23, 1, 866243, tzinfo=tzutc()),
     'versions': {'index': 1, 'is_latest': False, 'is_latest_draft': True}}
 %% Cell type:markdown id: tags:
 3. Upload the files
 %% Cell type:code id: tags:
 ``` python
 files = [f for f in listdir("./audio") if isfile(join("./audio", f))]
 for f in files:
    print("... upload file", "/tmp/" + f)
    response = file.upload_file({
        "location": os.path.curdir + "/tmp/" + f
    }, document_id)
 ```
 %% Output
    ... upload file /tmp/colive.0044_20200518133554_1_m4a_1.wav
    ... upload file /tmp/colive.0044_20200518133554_2_m4a_1.wav
    ... upload file /tmp/colive.0066_20200611134530_1_m4a_0.wav
    ... upload file /tmp/colive.0066_20200611134530_2_m4a_0.wav
    ... upload file /tmp/colive.0066_20200612072315_1_m4a_0.wav
 %% Cell type:markdown id: tags:
 4. Publish the record
 %% Cell type:code id: tags:
 ``` python
 response = document.publish(document_id)
 print(response)
 ```
 %% Output
    {'access': {'embargo': {'active': False, 'reason': None, 'until': None},
                'files': 'public',
                'record': 'public'},
-     'created': datetime.datetime(2022, 6, 28, 19, 47, 42, 416073, tzinfo=tzutc()),
+     'created': datetime.datetime(2022, 6, 29, 10, 23, 3, 488874, tzinfo=tzutc()),
     'expires_at': None,
     'files': {'default_preview': None, 'enabled': True},
-     'id': 'a13x6-2wz51',
+     'id': 'w37m8-dx896',
     'is_published': True,
     'metadata': {'creators': [{'affiliations': [{'id': None, 'name': 'TU Wien'}],
                                'person_or_org': {'family_name': 'Weise',
                                                  'given_name': 'Martin',
                                                  'identifiers': [{'identifier': '0000-0003-4216-302X',
                                                                   'scheme': 'orcid'}],
                                                  'name': 'Weise, Martin',
                                                  'type': 'personal'},
                                'role': None}],
                  'publication_date': datetime.datetime(2022, 6, 28, 0, 0),
                  'resource_type': {'id': 'other'},
                  'title': 'Jupyter Notebook Test'},
     'pids': {'doi': {'client': 'datacite',
-                      'identifier': '10.70124/a13x6-2wz51',
+                      'identifier': '10.70124/w37m8-dx896',
                      'provider': 'datacite'}},
     'revision_id': 3,
-     'updated': datetime.datetime(2022, 6, 28, 19, 47, 42, 490667, tzinfo=tzutc()),
+     'updated': datetime.datetime(2022, 6, 29, 10, 23, 3, 555616, tzinfo=tzutc()),
     'versions': {'index': 1, 'is_latest': True, 'is_latest_draft': True}}

--- a/.invenio/feature_extract.ipynb
+++ b/.invenio/feature_extract.ipynb
@@ -19,12 +19,14 @@
    "  4. Create a mariadb container\n",
    "  5. Start the mariadb container\n",
    "  6. Create a database within the mariadb container\n",
-    "  7. Import the feature .csv (manually)"
+    "  7. Import the feature .csv (manually)\n",
+    "\n",
+    "Please create an account at [http://localhost:3000/register](http://localhost:3000/register) with `user:user` before executing."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 4,
   "outputs": [],
   "source": [
    "import os.path\n",
@@ -58,7 +60,7 @@
  {
   "cell_type": "markdown",
   "source": [
-    "  8. Download wav\n",
+    "  9. Download wav\n",
    "\n",
    "Resolve the DOI to URI"
   ],
@@ -68,7 +70,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
@@ -102,7 +104,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
@@ -114,11 +116,6 @@
      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200611134530_2_m4a_0.wav\n",
      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200612072315_1_m4a_0.wav\n",
      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200612072315_2_m4a_0.wav\n",
-      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200613082517_1_m4a_0.wav\n",
-      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200613082517_2_m4a_0.wav\n",
-      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200614080017_1_m4a_0.wav\n",
-      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200614080017_2_m4a_0.wav\n",
-      "... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200615070238_1_m4a_0.wav\n",
      "Generated a feature .csv in your home directory\n"
     ]
    }
@@ -127,6 +124,7 @@
    "response = rq.get(\"https://\" + host + \"/api/records/\" + id)\n",
    "record = response.json()\n",
    "\n",
+    "i = 0\n",
    "with open(os.path.expanduser(\"~/features.csv\"), \"w\") as f:\n",
    "    writer = csv.writer(f)\n",
    "    writer.writerow([\"key\", \"size\", \"link\"])\n",
@@ -134,6 +132,9 @@
    "        rq.get(file[\"links\"][\"self\"])\n",
    "        print(\"... feature extract from\", file[\"links\"][\"self\"])\n",
    "        writer.writerow([file[\"key\"], file[\"size\"], file[\"links\"][\"self\"]])\n",
+    "        i += 1\n",
+    "        if i > 5:\n",
+    "            break\n",
    "print(\"Generated a feature .csv in your home directory\")"
   ],
   "metadata": {
@@ -154,7 +155,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 7,
   "outputs": [],
   "source": [
    "response = authentication.authenticate_user1({\n",
@@ -183,17 +184,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'hash': 'f5a649a71aae3748e62228721c44627ffc866f665d677bb890c37b9111590ffa',\n",
+      "{'hash': 'a431232a07efc4027cf71beaf38e7465984985ded1d4a67a94d092b49bd4d65e',\n",
-      " 'id': 2,\n",
+      " 'id': 1,\n",
-      " 'internal_name': 'fda-userdb-mir-1010b964-f6fa-11ec-9f77-64bc58900b78',\n",
+      " 'internal_name': 'fda-userdb-mir-529d42b0-f796-11ec-ad95-64bc58900b78',\n",
      " 'is_public': None,\n",
-      " 'name': 'MIR 1010b964-f6fa-11ec-9f77-64bc58900b78'}\n"
+      " 'name': 'MIR 529d42b0-f796-11ec-ad95-64bc58900b78'}\n"
     ]
    }
   ],
@@ -224,17 +225,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 9,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "{'hash': 'f5a649a71aae3748e62228721c44627ffc866f665d677bb890c37b9111590ffa',\n",
+      "{'hash': 'a431232a07efc4027cf71beaf38e7465984985ded1d4a67a94d092b49bd4d65e',\n",
-      " 'id': 2,\n",
+      " 'id': 1,\n",
-      " 'internal_name': 'fda-userdb-mir-1010b964-f6fa-11ec-9f77-64bc58900b78',\n",
+      " 'internal_name': 'fda-userdb-mir-529d42b0-f796-11ec-ad95-64bc58900b78',\n",
      " 'is_public': None,\n",
-      " 'name': 'MIR 1010b964-f6fa-11ec-9f77-64bc58900b78'}\n"
+      " 'name': 'MIR 529d42b0-f796-11ec-ad95-64bc58900b78'}\n"
     ]
    }
   ],
@@ -263,7 +264,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 10,
   "outputs": [],
   "source": [
    "response = database.create({\n",

 %% Cell type:markdown id: tags:
 # Feature Extraction & Deposit
 In this notebook we define an example of creating a database from a .csv and perform feature extraction of audio files. The APIs are auto-generated from the Swagger Endpoint documentations using [`generate.sh`](https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-docs/-/blob/master/swagger/generate.sh). Steps we perform:
  1. Download a music file from a public repository
  2. Perform feature extraction
  3. Obtain an authentication token
  4. Create a mariadb container
  5. Start the mariadb container
  6. Create a database within the mariadb container
  7. Import the feature .csv (manually)
+Please create an account at [http://localhost:3000/register](http://localhost:3000/register) with `user:user` before executing.
 %% Cell type:code id: tags:
 ``` python
 import os.path
 import uuid
 import time
 import re
 import csv
 import requests as rq
 from api_authentication.api.authentication_endpoint_api import AuthenticationEndpointApi
 from api_authentication.api.user_endpoint_api import UserEndpointApi
 from api_container.api.container_endpoint_api import ContainerEndpointApi
 from api_database.api.container_database_endpoint_api import ContainerDatabaseEndpointApi
 from api_table.api.table_endpoint_api import TableEndpointApi
 authentication = AuthenticationEndpointApi()
 user = UserEndpointApi()
 container = ContainerEndpointApi()
 database = ContainerDatabaseEndpointApi()
 table = TableEndpointApi()
 doi = "10.5281/zenodo.5649276"
 email = "some@example.com"
 ```
 %% Cell type:markdown id: tags:
-  8. Download wav
+  9. Download wav
 Resolve the DOI to URI
 %% Cell type:code id: tags:
 ``` python
 response = rq.get("https://doi.org/" + doi)
 id = re.findall("/([a-z0-9-]+)$", response.url)[0]
 host = re.findall("^https?:\/\/([a-z0-9]+\.[a-z]+)", response.url)[0]
 print("Resolved DOI to", host, "and record id", id)
 ```
 %% Output
    Resolved DOI to zenodo.org and record id 5649276
 %% Cell type:markdown id: tags:
 2. Perform feature extraction
 %% Cell type:code id: tags:
 ``` python
 response = rq.get("https://" + host + "/api/records/" + id)
 record = response.json()
+i = 0
 with open(os.path.expanduser("~/features.csv"), "w") as f:
    writer = csv.writer(f)
    writer.writerow(["key", "size", "link"])
    for file in record["files"]:
        rq.get(file["links"]["self"])
        print("... feature extract from", file["links"]["self"])
        writer.writerow([file["key"], file["size"], file["links"]["self"]])
+        i += 1
+        if i > 5:
+            break
 print("Generated a feature .csv in your home directory")
 ```
 %% Output
    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0044_20200518133554_1_m4a_1.wav
    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0044_20200518133554_2_m4a_1.wav
    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200611134530_1_m4a_0.wav
    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200611134530_2_m4a_0.wav
    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200612072315_1_m4a_0.wav
    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200612072315_2_m4a_0.wav
-    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200613082517_1_m4a_0.wav
-    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200613082517_2_m4a_0.wav
-    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200614080017_1_m4a_0.wav
-    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200614080017_2_m4a_0.wav
-    ... feature extract from https://zenodo.org/api/files/22d69a63-2aff-47ae-b818-be78a23e9889/colive.0066_20200615070238_1_m4a_0.wav
    Generated a feature .csv in your home directory
 %% Cell type:markdown id: tags:
 3. Obtain an authentication token
 %% Cell type:code id: tags:
 ``` python
 response = authentication.authenticate_user1({
    "username": "user",
    "password": "user"
 })
 container.api_client.default_headers = {"Authorization": "Bearer " + response.token}
 database.api_client.default_headers = {"Authorization": "Bearer " + response.token}
 table.api_client.default_headers = {"Authorization": "Bearer " + response.token}
 ```
 %% Cell type:markdown id: tags:
 4. Create a mariadb container
 %% Cell type:code id: tags:
 ``` python
 response = container.create1({
    "name": "MIR " + str(uuid.uuid1()),
    "repository": "mariadb",
    "tag": "10.5"
 })
 container_id = response.id
 print(response)
 ```
 %% Output
-    {'hash': 'f5a649a71aae3748e62228721c44627ffc866f665d677bb890c37b9111590ffa',
+    {'hash': 'a431232a07efc4027cf71beaf38e7465984985ded1d4a67a94d092b49bd4d65e',
-     'id': 2,
+     'id': 1,
-     'internal_name': 'fda-userdb-mir-1010b964-f6fa-11ec-9f77-64bc58900b78',
+     'internal_name': 'fda-userdb-mir-529d42b0-f796-11ec-ad95-64bc58900b78',
     'is_public': None,
-     'name': 'MIR 1010b964-f6fa-11ec-9f77-64bc58900b78'}
+     'name': 'MIR 529d42b0-f796-11ec-ad95-64bc58900b78'}
 %% Cell type:markdown id: tags:
 5. Start the mariadb container
 %% Cell type:code id: tags:
 ``` python
 response = container.modify({
    "action": "START"
 }, container_id)
 time.sleep(5)
 print(response)
 ```
 %% Output
-    {'hash': 'f5a649a71aae3748e62228721c44627ffc866f665d677bb890c37b9111590ffa',
+    {'hash': 'a431232a07efc4027cf71beaf38e7465984985ded1d4a67a94d092b49bd4d65e',
-     'id': 2,
+     'id': 1,
-     'internal_name': 'fda-userdb-mir-1010b964-f6fa-11ec-9f77-64bc58900b78',
+     'internal_name': 'fda-userdb-mir-529d42b0-f796-11ec-ad95-64bc58900b78',
     'is_public': None,
-     'name': 'MIR 1010b964-f6fa-11ec-9f77-64bc58900b78'}
+     'name': 'MIR 529d42b0-f796-11ec-ad95-64bc58900b78'}
 %% Cell type:markdown id: tags:
 6. Create a database within the mariadb container
 %% Cell type:code id: tags:
 ``` python
 response = database.create({
    "name": "MIR " + str(uuid.uuid1()),
    "description": "Music Information Retrieval",
    "is_public": True
 }, container_id)
 database_id = response.id
 ```
 %% Cell type:markdown id: tags:
 7. Import the feature .csv
 Now open [http://localhost:3000/](http://localhost:3000/) and import the .csv file by clicking the database. After successful creation of the table, come back here.