Skip to content
Snippets Groups Projects
Verified Commit ee0fb83f authored by Martin Weise's avatar Martin Weise
Browse files

Partly implemented, needing the data db to import them

parent d0cc05bf
No related branches found
No related tags found
4 merge requests!231CI: Remove build for log-service,!228Better error message handling in the frontend,!223Release of version 1.4.0,!212Resolve "Modify storage solutions in regards to cloud readiness"
......@@ -13,11 +13,13 @@ ENV FLASK_RUN_HOST=0.0.0.0
ENV PORT_APP=5000
ENV FLASK_ENV=production
ENV HOSTNAME=analyse-service
ENV SHARED_FILESYSTEM=/tmp
ENV UPLOAD_ENDPOINT=http://upload-service:1080/api/upload/files
COPY ./as-yml/ ./as-yml/
COPY ./*.py ./
RUN mkdir -p /data
EXPOSE $PORT_APP
ENTRYPOINT [ "python", "./pywsgi.py" ]
......
......@@ -13,6 +13,7 @@ import json
import csv
import logging
import os
import urllib.request
import messytables, pandas as pd
from messytables import CSVTableSet, type_guess, \
......@@ -23,7 +24,10 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) -
# Use option enum=True for searching Postgres ENUM Types in CSV file. Remark
# Enum is not SQL standard, hence, it might not be supported by all db-engines.
# However, it can be used in Postgres and MySQL.
path = os.path.join(os.getenv('SHARED_FILESYSTEM', '/tmp'), filename)
path = "/data/" + filename
api_path = os.getenv('UPLOAD_ENDPOINT', 'http://127.0.0.1:1080/api/upload/files') + "/" + filename
logging.info('retrieve api_path: %s and save it to path: %s', api_path, path)
urllib.request.urlretrieve(api_path, path)
if separator is None:
with open(path) as csvfile:
dialect = csv.Sniffer().sniff(csvfile.readline())
......
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# SQLite db
*.db
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Data Database
\ No newline at end of file
FROM python:3.10-alpine
RUN apk add bash curl jq && adduser -D alpine
WORKDIR /home/alpine
COPY Pipfile Pipfile.lock ./
RUN pip install pipenv && \
pipenv install gunicorn && \
pipenv install --system --deploy
COPY ./ds-yml ./ds-yml
COPY ./app.py ./app.py
ENV UPLOAD_ENDPOINT="http://upload-service:1080/api/upload/files"
RUN chown -R alpine:alpine ./
USER alpine
EXPOSE 5000
ENTRYPOINT [ "gunicorn", "-w", "4", "-b", ":5000", "app:app" ]
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
flasgger = "*"
flask = "~=2.0"
flask-cors = "~=4.0"
flask-jwt-extended = "~=4.5"
flask-sqlalchemy = "~=3.0"
prometheus-flask-exporter = "*"
python-dotenv = "~=1.0"
sqlalchemy-utils = "*"
gunicorn = "*"
[dev-packages]
[requires]
python_version = "3.10"
This diff is collapsed.
# Data Database Sidecar
Sidecar that downloads the .csv from the Upload Service to deposit on the same pod as the data-database.
## Endpoints
* Prometheus metrics [`/metrics`](http://localhost:8080/metrics)
* Health check [`/health`](http://localhost:8080/health)
* Swagger API [`/swagger-ui/`](http://localhost:8080/swagger-ui/)
\ No newline at end of file
import os
import logging
from urllib.error import URLError, ContentTooShortError, HTTPError
from flasgger import LazyJSONEncoder, Swagger
from flask import Flask, request, Response
from flasgger.utils import swag_from
import urllib.request
from prometheus_flask_exporter import PrometheusMetrics
logging.basicConfig(level=logging.DEBUG)
from logging.config import dictConfig
# logging configuration
dictConfig({
'version': 1,
'formatters': {
'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
},
'simple': {
'format': '[%(asctime)s] %(levelname)s: %(message)s',
},
},
'handlers': {'wsgi': {
'class': 'logging.StreamHandler',
'stream': 'ext://flask.logging.wsgi_errors_stream',
'formatter': 'simple' # default
}},
'root': {
'level': 'DEBUG',
'handlers': ['wsgi']
}
})
# create app object
app = Flask(__name__)
metrics = PrometheusMetrics(app)
metrics.info("app_info", "Application info", version="0.0.1")
app.config["SWAGGER"] = {"openapi": "3.0.1", "title": "Swagger UI", "uiversion": 3}
swagger_config = {
"headers": [],
"specs": [
{
"endpoint": "api-sidecar",
"route": "/api-sidecar.json",
"rule_filter": lambda rule: rule.endpoint.startswith('actuator') or rule.endpoint.startswith('sidecar'),
"model_filter": lambda tag: True, # all in
}
],
"static_url_path": "/flasgger_static",
"swagger_ui": True,
"specs_route": "/swagger-ui/",
}
template = {
"openapi": "3.0.0",
"info": {
"title": "Database Repository Data Database sidecar API",
"description": "Sidecar that downloads the import .csv file",
"version": "1.3.0",
"contact": {
"name": "Prof. Andreas Rauber",
"email": "andreas.rauber@tuwien.ac.at"
},
"license": {
"name": "Apache 2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
},
},
"externalDocs": {
"description": "Sourcecode Documentation",
"url": "https://gitlab.phaidra.org/fair-data-austria-db-repository/fda-services"
},
"servers": [
{
"url": "http://localhost:5000",
"description": "Generated server url"
},
{
"url": "https://test.dbrepo.tuwien.ac.at",
"description": "Sandbox"
}
]
}
swagger = Swagger(app, config=swagger_config, template=template)
# https://flask-jwt-extended.readthedocs.io/en/stable/options/
app.config["JWT_ALGORITHM"] = "HS256"
app.config["JWT_DECODE_ISSUER"] = os.getenv("JWT_ISSUER")
app.config["JWT_PUBLIC_KEY"] = os.getenv("JWT_PUBKEY")
app.json_encoder = LazyJSONEncoder
@app.route("/health", methods=["GET"], endpoint="actuator_health")
@swag_from("ds-yml/health.yml")
def health():
return Response({"status": "UP"}, mimetype="application/json"), 200
@app.route("/sidecar/import", methods=["POST"], endpoint="sidecar_import")
@swag_from("ds-yml/import.yml")
def import_csv():
logging.debug('endpoint import csv, body=%s', request)
input_json = request.get_json()
filepath = str(input_json['filepath'])
api = os.getenv("UPLOAD_ENDPOINT", "http://localhost:1080/api/auth/files")
try:
urllib.request.urlretrieve(api + "/" + filepath, "/tmp/" + filepath)
except URLError as e:
logging.error('Failed to import .csv: %s', e)
return Response(), 503
return Response(), 202
summary: Return a healthcheck
description: |
Return UP if the instance is ready to serve connections.
consumes:
- application/json
produces:
- application/json
parameters: [ ]
responses:
200:
description: OK, service is up and running
content:
application/json:
schema:
$ref: "#/components/schemas/Health"
404:
description: Service is not yet ready
tags:
- actuator
components:
schemas:
Health:
title: Status object
type: object
properties:
status:
type: string
example: UP
required:
- status
\ No newline at end of file
summary: Imports a .csv from the Upload Service
description: |
Imports a specific .csv file from the Upload Service via HTTP
consumes:
- application/json
produces:
- application/json
parameters:
- in: "body"
name: "body"
description: "Payload to import the .csv"
required: true
schema:
$ref: "#/components/schemas/Import"
responses:
202:
description: Imported the .csv
content: { }
503:
description: The Upload Service could not be contacted or .csv was not found.
tags:
- sidecar
components:
schemas:
Import:
type: "object"
properties:
filepath:
type: "string"
example: "sample.csv"
\ No newline at end of file
......@@ -87,7 +87,7 @@ services:
- "--base-path=/api/upload/files/"
volumes:
- upload-service-data:/data
- "${SHARED_FILESYSTEM:-/tmp}:/srv/tusd-data/data"
# - "${SHARED_FILESYSTEM:-/tmp}:/srv/tusd-data/data"
logging:
driver: json-file
......@@ -196,7 +196,7 @@ services:
ports:
- "5000:5000"
environment:
SHARED_FILESYSTEM: "${SHARED_FILESYSTEM:-/tmp}"
UPLOAD_ENDPOINT: "${UPLOAD_ENDPOINT:-http://upload-service:1080/api/upload/files}"
volumes:
- "${SHARED_FILESYSTEM:-/tmp}:/tmp"
healthcheck:
......@@ -267,6 +267,25 @@ services:
FLASK_DEBUG: ${SEARCH_DEBUG_MODE:-true}
OPENSEARCH_HOST: ${OPENSEARCH_HOST:-dbrepo-search-db}
dbrepo-data-db-sidecar:
restart: "no"
container_name: dbrepo-data-db-sidecar
hostname: data-db-sidecar
build: ./dbrepo-data-db/sidecar
image: dbrepo-data-db-sidecar:latest
ports:
- "3600:5000"
environment:
FLASK_DEBUG: ${SEARCH_DEBUG_MODE:-true}
UPLOAD_ENDPOINT: "${UPLOAD_ENDPOINT:-http://upload-service:1080/api/upload/files}"
volumes:
- "${SHARED_FILESYSTEM:-/tmp}:/tmp"
healthcheck:
test: curl -sSL 127.0.0.1:5000/health | jq .status | grep "UP" || exit 1
interval: 10s
timeout: 5s
retries: 12
dbrepo-ui:
restart: "no"
container_name: dbrepo-ui
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment