Skip to content
Snippets Groups Projects
Unverified Commit 2fcd8230 authored by Martin Weise's avatar Martin Weise
Browse files

Analysis works with minIO

parent ee0fb83f
No related branches found
No related tags found
4 merge requests!231CI: Remove build for log-service,!228Better error message handling in the frontend,!223Release of version 1.4.0,!212Resolve "Modify storage solutions in regards to cloud readiness"
Showing
with 646 additions and 86 deletions
......@@ -13,7 +13,9 @@ ENV FLASK_RUN_HOST=0.0.0.0
ENV PORT_APP=5000
ENV FLASK_ENV=production
ENV HOSTNAME=analyse-service
ENV UPLOAD_ENDPOINT=http://upload-service:1080/api/upload/files
ENV S3_STORAGE_ENDPOINT="http://storage-service:9000"
ENV S3_ACCESS_KEY_ID="minioadmin"
ENV S3_SECRET_ACCESS_KEY="minioadmin"
COPY ./as-yml/ ./as-yml/
COPY ./*.py ./
......
......@@ -105,19 +105,20 @@ def determinedt():
logging.debug('endpoint determine datatype, body=%s', request)
input_json = request.get_json()
try:
filepath = str(input_json['filepath'])
filename = str(input_json['filename'])
enum = False
if 'enum' in input_json:
enum = bool(input_json['enum'])
print(enum)
logging.info("Enum is present in payload and set to %s", enum)
enum_tol = 0.001
if 'enum_tol' in input_json:
enum_tol = float(input_json['enum_tol'])
print(enum_tol)
logging.info("Enum toleration is present in payload and set to %s", enum_tol)
separator = None
if 'separator' in input_json:
separator = str(input_json['separator'])
res = determine_datatypes(filepath, enum, enum_tol, separator)
logging.info("Seperator is present in payload and set to %s", separator)
res = determine_datatypes(filename, enum, enum_tol, separator)
logging.debug('determine datatype resulted in datatypes %s', res)
return Response(res, mimetype="application/json"), 200
except OSError as e:
......
......@@ -12,9 +12,9 @@ parameters:
schema:
type: "object"
properties:
filepath:
filename:
type: "string"
example : "/data/testdt08.csv"
example : "sample.csv"
enum:
type: "boolean"
example: true
......
......@@ -13,8 +13,9 @@ import json
import csv
import logging
import os
import urllib.request
import io
import boto3
import messytables, pandas as pd
from messytables import CSVTableSet, type_guess, \
headers_guess, headers_processor, offset_processor
......@@ -24,18 +25,26 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) -
# Use option enum=True for searching Postgres ENUM Types in CSV file. Remark
# Enum is not SQL standard, hence, it might not be supported by all db-engines.
# However, it can be used in Postgres and MySQL.
path = "/data/" + filename
api_path = os.getenv('UPLOAD_ENDPOINT', 'http://127.0.0.1:1080/api/upload/files') + "/" + filename
logging.info('retrieve api_path: %s and save it to path: %s', api_path, path)
urllib.request.urlretrieve(api_path, path)
endpoint_url = os.getenv('S3_STORAGE_ENDPOINT', 'http://localhost:9000')
aws_access_key_id = os.getenv('S3_ACCESS_KEY_ID', 'minioadmin')
aws_secret_access_key = os.getenv('S3_SECRET_ACCESS_KEY', 'minioadmin')
s3_client = boto3.client(service_name='s3', endpoint_url=endpoint_url, aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key)
logging.info("retrieve file from S3, endpoint_url=%s, aws_access_key_id=%s, aws_secret_access_key=(hidden)",
endpoint_url, aws_access_key_id)
response = s3_client.get_object(Bucket='dbrepo-upload', Key=filename)
stream = response['Body']
if separator is None:
with open(path) as csvfile:
dialect = csv.Sniffer().sniff(csvfile.readline())
logging.info('Attempt to guess separator for from first line')
with io.BytesIO(stream.read()) as fh:
line = fh.readline().decode('utf-8')
dialect = csv.Sniffer().sniff(line)
separator = dialect.delimiter
logging.debug('determined separator: %s', separator)
logging.info('determined separator: %s', separator)
# Load a file object:
with open(path, 'rb') as fh:
with io.BytesIO(stream.read()) as fh:
logging.info('Analysing corpus with separator: %s', separator)
table_set = CSVTableSet(fh, delimiter=separator)
# A table set is a collection of tables:
......@@ -55,7 +64,7 @@ def determine_datatypes(filename, enum=False, enum_tol=0.0001, separator=None) -
# list of rows
if enum == True:
rows = pd.read_csv(path, sep=separator, header=offset)
rows = pd.read_csv(fh, sep=separator, header=offset)
n = len(rows)
for i in range(0, (len(types))):
......
......@@ -47,3 +47,4 @@ xlrd==2.0.1
zipp==3.15.0
zope.event==4.6
zope.interface==6.0
boto3==1.28.82
\ No newline at end of file
......@@ -13,7 +13,7 @@ RUN pip install pipenv && \
COPY ./ds-yml ./ds-yml
COPY ./app.py ./app.py
ENV UPLOAD_ENDPOINT="http://upload-service:1080/api/upload/files"
ENV UPLOAD_ENDPOINT="http://upload-service:1080/api/upload"
RUN chown -R alpine:alpine ./
USER alpine
......
This diff is collapsed.
......@@ -108,9 +108,23 @@ def import_csv():
logging.debug('endpoint import csv, body=%s', request)
input_json = request.get_json()
filepath = str(input_json['filepath'])
api = os.getenv("UPLOAD_ENDPOINT", "http://localhost:1080/api/auth/files")
api = os.getenv("UPLOAD_ENDPOINT", "http://localhost:1080/api/upload")
try:
urllib.request.urlretrieve(api + "/" + filepath, "/tmp/" + filepath)
urllib.request.urlretrieve(api + "/files/" + filepath, "/tmp/" + filepath)
except URLError as e:
logging.error('Failed to import .csv: %s', e)
return Response(), 503
return Response(), 202
@app.route("/sidecar/export/<string:filename>", methods=["PUT"], endpoint="sidecar_export")
@swag_from("ds-yml/export.yml")
def import_csv(filename):
logging.debug('endpoint export csv, filename=%s, body=%s', filename, request)
api = os.getenv("UPLOAD_ENDPOINT", "http://localhost:1080/api/upload")
try:
# upload
urllib.request.urlretrieve(api + "/files/" + filepath, "/tmp/" + filepath)
except URLError as e:
logging.error('Failed to import .csv: %s', e)
return Response(), 503
......
......@@ -34,6 +34,10 @@ upstream search-db-dashboard {
server search-db-dashboard:5601;
}
upstream storage-service {
server storage-service:9001;
}
server {
listen 80 default_server;
server_name _;
......@@ -56,6 +60,22 @@ server {
proxy_read_timeout 90;
}
location /admin/storage/ {
rewrite /admin/storage/(.*) /$1 break;
# http
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
chunked_transfer_encoding off;
# proxy
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_pass http://storage-service;
proxy_read_timeout 90;
}
location /api/broker {
rewrite /api/broker/(.*) /admin/broker/api/$1 break;
proxy_set_header Host $host;
......
package at.tuwien.api.database;
import io.swagger.v3.oas.annotations.media.Schema;
import jakarta.validation.constraints.NotBlank;
import lombok.*;
import lombok.extern.jackson.Jacksonized;
@Getter
@Setter
@Builder
@NoArgsConstructor
@AllArgsConstructor
@Jacksonized
@ToString
public class LoadFileDto {
@NotBlank(message = "filepath is required")
@Schema(example = "sample.csv")
private String filepath;
}
......@@ -14,10 +14,10 @@ COPY ./yarn.lock ./
# Install yarn dependencies
RUN yarn install --frozen-lockfile
COPY ./nuxt.config.js ./
COPY ./ava.config.cjs ./
COPY ./babel.config.js ./
COPY ./config.js ./
COPY ./nuxt.config.js ./nuxt.config.js
COPY ./ava.config.cjs ./ava.config.cjs
COPY ./babel.config.js ./babel.config.js
COPY ./config.js ./config.js
COPY ./assets ./assets
COPY ./api ./api
COPY ./components ./components
......@@ -44,9 +44,14 @@ ENV BROKER_PASSWORD="fda"
ENV BROKER_LOGIN_URL="/admin/broker/"
ENV KEYCLOAK_LOGIN_URL="/api/auth/"
ENV OPENSEARCH_LOGIN_URL="/admin/dashboard/"
ENV MINIO_LOGIN_URL="/admin/storage/"
ENV LOGO="/logo.png"
ENV SEARCH_USERNAME="admin"
ENV SEARCH_PASSWORD="admin"
ENV S3_STORAGE_HOSTNAME="storage-service"
ENV S3_STORAGE_PORT="9000"
ENV S3_ACCESS_KEY_ID="minioadmin"
ENV S3_SECRET_ACCESS_KEY="minioadmin"
ENV VERSION="${TAG}"
ENV TITLE="Database Repository"
ENV ICON="/favicon.ico"
......
......@@ -2,9 +2,13 @@ import Vue from 'vue'
import api from '@/api'
class AnalyseService {
determineDataTypes (filepath) {
determineDataTypes (filename, separator) {
return new Promise((resolve, reject) => {
api.post('/api/analyse/determinedt', { filepath }, { headers: { Accept: 'application/json' } })
const payload = {
filename,
separator
}
api.post('/api/analyse/determinedt', payload, { headers: { Accept: 'application/json' } })
.then((response) => {
const analysis = response.data
console.debug('response analysis', analysis)
......
......@@ -18,6 +18,25 @@ class MiddlewareService {
})
})
}
upload (file) {
return new Promise((resolve, reject) => {
const formData = new FormData()
formData.append('file', file, file.name)
axios.post('/server-middleware/upload', formData, { headers: { 'Content-Type': 'multipart/form-data' } })
.then((response) => {
const metadata = response.data
console.debug('response metadata', metadata)
resolve(metadata)
})
.catch((error) => {
const { code, message } = error
console.error('Failed to upload file', error)
Vue.$toast.error(`[${code}] Failed to upload file: ${message}`)
reject(error)
})
})
}
}
export default new MiddlewareService()
import Vue from 'vue'
const tus = require('tus-js-client')
class UploadService {
upload (file) {
return new Promise((resolve, reject) => {
const endpoint = `${location.protocol}//${location.host}/api/upload/files`
console.debug('upload file to endpoint:', endpoint)
const upload = new tus.Upload(file, {
endpoint,
retryDelays: [0, 3000, 5000, 10000, 20000],
metadata: {
filename: file.name,
filetype: file.type
},
onError (error) {
console.error('Failed because: ' + error)
},
onProgress (bytesUploaded, bytesTotal) {
const percentage = ((bytesUploaded / bytesTotal) * 100).toFixed(2)
console.debug(bytesUploaded, bytesTotal, percentage + '%')
},
onSuccess () {
console.info('Download %s from %s', upload.file.name, upload.url)
Vue.$toast.success('Successfully uploaded file')
const matches = upload.url.match(/files\/([a-z0-9]+)/gi)
if (matches.length !== 1) {
console.error('Failed to match file name', matches)
reject(new Error('Failed to match file name'))
}
upload.path = matches[0].replace('files/', '')
resolve(upload)
}
})
upload.findPreviousUploads().then(function (previousUploads) {
/* Found previous uploads so we select the first one */
if (previousUploads.length) {
upload.resumeFromPreviousUpload(previousUploads[0])
}
upload.start()
})
})
}
}
export default new UploadService()
......@@ -132,7 +132,7 @@
<script>
import QueryService from '@/api/query.service'
import UploadService from '@/api/upload.service'
import MiddlewareService from '@/api/middleware.service'
export default {
props: {
......@@ -311,11 +311,11 @@ export default {
if (!file) {
return
}
UploadService.upload(file)
.then((file) => {
console.debug('uploaded file', file)
MiddlewareService.upload(file)
.then((metadata) => {
console.debug('uploaded file', metadata)
this.localDisplay[column.internal_name] = this.localTuple[column.internal_name]
this.localTuple[column.internal_name] = file.path
this.localTuple[column.internal_name] = metadata.path
})
.catch((error) => {
console.error(`Failed to set column value: ${column.internal_name}`, error)
......
......@@ -13,4 +13,10 @@ config.clientId = process.env.NODE_ENV !== 'development' ? process.env.DBREPO_CL
config.clientSecret = process.env.NODE_ENV !== 'development' ? process.env.DBREPO_CLIENT_SECRET : 'MUwRc7yfXSJwX8AdRMWaQC3Nep1VjwgG'
config.defaultPublisher = process.env.NODE_ENV !== 'development' ? process.env.DEFAULT_PID_PUBLISHER : ''
config.doiUrl = process.env.NODE_ENV !== 'development' ? process.env.DOI_URL : 'https://doi.org'
config.minIoUrl = process.env.NODE_ENV !== 'development' ? process.env.MINIO_LOGIN_URL : '/admin/storage/'
config.s3storageHostname = process.env.NODE_ENV !== 'development' ? process.env.S3_STORAGE_HOSTNAME : 'storage-service'
config.s3storagePort = process.env.NODE_ENV !== 'development' ? Number(process.env.S3_STORAGE_PORT) : 9000
config.s3accessKeyId = process.env.NODE_ENV !== 'development' ? process.env.S3_ACCESS_KEY_ID : 'minioadmin'
config.s3secretAccessKey = process.env.NODE_ENV !== 'development' ? process.env.S3_SECRET_ACCESS_KEY : 'minioadmin'
config.forceSsl = process.env.NODE_ENV !== 'development' ? process.env.FORCE_SSL === 'true' : false
module.exports = config
......@@ -463,6 +463,11 @@ export default {
this.$store.commit('SET_SEARCH_USERNAME', this.$config.searchUsername)
this.$store.commit('SET_SEARCH_PASSWORD', this.$config.searchPassword)
this.$store.commit('SET_DOI_URL', this.$config.doiUrl)
this.$store.commit('SET_S3_STORAGE_HOSTNAME', this.$config.s3storageHostname)
this.$store.commit('SET_S3_STORAGE_PORT', this.$config.s3storagePort)
this.$store.commit('SET_S3_ACCESS_KEY_ID', this.$config.s3accessKeyId)
this.$store.commit('SET_S3_SECRET_ACCESS_KEY', this.$config.s3secretAccessKey)
this.$store.commit('SET_FORCE_SSL', this.$config.forceSsl)
console.debug('runtime config', this.$config)
},
advancedSearch () {
......
import path from 'path'
import colors from 'vuetify/es5/util/colors'
import { icon, clientSecret, title, logo, version, defaultPublisher, doiUrl, clientId, searchUsername, searchPassword, brokerLoginUrl, keycloakLoginUrl, openSearchUrl } from './config'
import { forceSsl, icon, clientSecret, title, logo, version, defaultPublisher, doiUrl, minIoUrl, clientId, searchUsername, searchPassword, brokerLoginUrl, keycloakLoginUrl, openSearchUrl, s3storageHostname, s3storagePort, s3accessKeyId, s3secretAccessKey } from './config'
const proxy = {}
......@@ -29,8 +29,6 @@ const meta = [
{ name: 'viewport', content: 'width=device-width, initial-scale=1' }
]
const forceSsl = process.env.FORCE_SSL === 'true'
if (forceSsl) {
console.info('Flag FORCE_SSL is set: http-equiv Content-Security-Policy header is set to upgrade-insecure-requests')
meta.push({ 'http-equiv': 'Content-Security-Policy', content: 'upgrade-insecure-requests' })
......@@ -109,7 +107,12 @@ export default {
openSearchUrl,
searchUsername,
searchPassword,
doiUrl
doiUrl,
minIoUrl,
s3storageHostname,
s3storagePort,
s3accessKeyId,
s3secretAccessKey
},
serverMiddleware: [
......@@ -140,6 +143,12 @@ export default {
// https://github.com/nuxt/nuxt/issues/7722
build: {
extend (config, { isDev, isClient }) {
/* AWS S3 depends on this, we need to tell it that we are a client, not a server */
config.node = {
fs: 'empty'
}
},
babel: {
presets (env, [preset, options]) {
return [
......
......@@ -41,6 +41,7 @@
"jwt-decode": "^3.1.2",
"knex": "^0.95.6",
"lodash": "^4.17.21",
"minio": "7.0.18",
"moment": "^2.29.1",
"multer": "^1.4.2",
"node-fetch": "^2.6.1",
......
......@@ -104,7 +104,7 @@
<script>
import TableService from '@/api/table.service'
import QueryService from '@/api/query.service'
import UploadService from '@/api/upload.service'
import MiddlewareService from '@/api/middleware.service'
const { isNonNegativeInteger } = require('@/utils')
export default {
......@@ -187,12 +187,13 @@ export default {
isNonNegativeInteger,
uploadAndImport () {
this.loading = true
UploadService.upload(this.fileModel)
MiddlewareService.upload(this.fileModel)
.then((file) => {
console.debug('uploaded file', file)
this.tableImport.location = file.path
QueryService.importCsv(this.$route.params.database_id, this.$route.params.table_id, this.tableImport)
.then(() => {
.then((metadata) => {
console.debug('successfully imported data', metadata)
this.$toast.success('Successfully imported data')
this.$router.push(`/database/${this.$route.params.database_id}/table/${this.$route.params.table_id}`)
})
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment