Skip to content
Snippets Groups Projects
Verified Commit 87412e52 authored by Martin Weise's avatar Martin Weise
Browse files

Tremendously increased the speed for loading large databases

parent 98dfd436
No related branches found
No related tags found
5 merge requests!322Master,!321Dev,!319Dev,!318Dev,!315Dev
......@@ -23,9 +23,21 @@ types), semantic concepts (i.e. ontologies) and relational metadata (databases,
## Generation
Most of the metadata available in DBRepo is generated automatically, leveraging the available information and taking
the burden away from researchers, data stewards, etc. For example, the schema (names, constraints, data length) of
generated tables and views is obtained from the `information_schema` database maintained by MariaDB internally.
DBRepo generates metadata for managed tables automatically by querying MariaDB's internal structures
(e.g. `information_schema`).
!!! info "Managed Tables"
DBRepo only manages system-versioned tables, other tables are not supported. These other tables are ignored by
DBRepo and thus can co-exist in the same database. If you want a non-system-versioned table `my_table` to be managed
by DBRepo, make it system-versioned:
```sql
ALTER TABLE `my_table` ADD SYSTEM VERSIONING;
```
Then, refresh the managed table index by navigating to your database > Settings > Schema > Refresh. This action can
only be performed by the database owner.
## Identifiers
......
......
---
author: Martin Weise
---
## tl;dr
[:fontawesome-solid-database:  Dataset](https://dbrepo1.ec.tuwien.ac.at/pid/52){ .md-button .md-button--primary target="_blank" }
## Description
This dataset contains anonymous behavioural and ocular recordings from healthy participants during performance of a
decision-making task between action movements in which we studied the influence of social facilitation and social
pressure. The original dataset is available on [Kaggle](https://doi.org/10.34740/kaggle/ds/4292829).
## Solution
TBD
## DBRepo Features
- [x] System versioning
- [x] Subset exploration
- [x] Large dataset (3 tables, largest contains >35 million rows)
- [x] External data access for analysis
......@@ -25,6 +25,7 @@ import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import io.swagger.v3.oas.annotations.responses.ApiResponses;
import io.swagger.v3.oas.annotations.security.SecurityRequirement;
import jakarta.servlet.http.HttpServletRequest;
import jakarta.validation.Valid;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
......@@ -32,6 +33,7 @@ import lombok.extern.log4j.Log4j2;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.InputStreamResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.security.access.prepost.PreAuthorize;
......@@ -189,6 +191,7 @@ public class TableEndpoint {
@RequestParam(required = false) Instant timestamp,
@RequestParam(required = false) Long page,
@RequestParam(required = false) Long size,
@NotNull HttpServletRequest request,
Principal principal)
throws DatabaseUnavailableException, RemoteUnavailableException, TableNotFoundException,
TableMalformedException, PaginationException, QueryMalformedException, MetadataServiceException,
......@@ -222,6 +225,11 @@ public class TableEndpoint {
headers.set("Access-Control-Expose-Headers", "X-Count");
try {
headers.set("X-Count", "" + tableService.getCount(table, timestamp));
if (request.getMethod().equals("HEAD")) {
return ResponseEntity.ok()
.headers(headers)
.build();
}
final QueryResultDto dto = tableService.getData(table, timestamp, page, size);
return ResponseEntity.ok()
.headers(headers)
......
......
......@@ -9,6 +9,7 @@ import at.tuwien.exception.*;
import at.tuwien.gateway.MetadataServiceGateway;
import at.tuwien.service.TableService;
import at.tuwien.test.AbstractUnitTest;
import jakarta.servlet.http.HttpServletRequest;
import lombok.extern.log4j.Log4j2;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
......@@ -40,6 +41,9 @@ public class TableEndpointUnitTest extends AbstractUnitTest {
@Autowired
private TableEndpoint tableEndpoint;
@Autowired
private HttpServletRequest httpServletRequest;
@MockBean
private TableService tableService;
......@@ -152,7 +156,7 @@ public class TableEndpointUnitTest extends AbstractUnitTest {
.thenReturn(TABLE_8_DATA_DTO);
/* test */
final ResponseEntity<QueryResultDto> response = tableEndpoint.getData(DATABASE_3_ID, TABLE_8_ID, null, null, null, null);
final ResponseEntity<QueryResultDto> response = tableEndpoint.getData(DATABASE_3_ID, TABLE_8_ID, null, null, null, httpServletRequest, null);
assertEquals(HttpStatus.OK, response.getStatusCode());
assertNotNull(response.getHeaders().get("X-Count"));
assertEquals(1, response.getHeaders().get("X-Count").size());
......@@ -175,7 +179,7 @@ public class TableEndpointUnitTest extends AbstractUnitTest {
/* test */
assertThrows(TableNotFoundException.class, () -> {
tableEndpoint.getData(DATABASE_3_ID, TABLE_8_ID, null, null, null, null);
tableEndpoint.getData(DATABASE_3_ID, TABLE_8_ID, null, null, null, httpServletRequest, null);
});
}
......
......
......@@ -171,7 +171,7 @@ public class PrometheusEndpointMvcTest extends AbstractUnitTest {
/* mock */
try {
tableEndpoint.getData(DATABASE_1_ID, TABLE_1_ID, null, null, null, null);
tableEndpoint.getData(DATABASE_1_ID, TABLE_1_ID, null, null, null, httpServletRequest, null);
} catch (Exception e) {
/* ignore */
}
......
......
......@@ -71,8 +71,9 @@ export const useTableService = (): any => {
const axios = useAxiosInstance()
console.debug('get data for table with id', tableId, 'in database with id', databaseId);
return new Promise<QueryResultDto>((resolve, reject) => {
axios.get<QueryResultDto>(`/api/database/${databaseId}/table/${tableId}/data`, { params: mapFilter(timestamp, page, size), timeout: 30_000 })
axios.get<QueryResultDto>(`/api/database/${databaseId}/table/${tableId}/data`, { params: mapFilter(timestamp, page, size), timeout: 60_000 })
.then((response) => {
response.data.count = Number(response.headers['x-count'])
console.info('Got data for table with id', tableId, 'in database with id', databaseId)
resolve(response.data)
})
......@@ -87,7 +88,7 @@ export const useTableService = (): any => {
const axios = useAxiosInstance()
console.debug('get data count for table with id', tableId, 'in database with id', databaseId);
return new Promise<number>((resolve, reject) => {
axios.head<void>(`/api/database/${databaseId}/table/${tableId}/data`, { params: mapFilter(timestamp, null, null), timeout: 30_000 })
axios.head<void>(`/api/database/${databaseId}/table/${tableId}/data`, { params: mapFilter(timestamp, null, null), timeout: 60_000 })
.then((response: AxiosResponse<void>) => {
const count: number = Number(response.headers['x-count'])
console.info('Found' + count + 'in table with id', tableId, 'in database with id', databaseId)
......@@ -191,7 +192,7 @@ export const useTableService = (): any => {
const axios = useAxiosInstance()
console.debug('suggest semantic entities for table column with id', columnId, 'of table with id', tableId, 'of database with id', databaseId)
return new Promise<TableColumnEntityDto[]>((resolve, reject) => {
axios.get<TableColumnEntityDto[]>(`/api/database/${databaseId}/table/${tableId}/column/${columnId}/suggest`, {timeout: 10000})
axios.get<TableColumnEntityDto[]>(`/api/database/${databaseId}/table/${tableId}/column/${columnId}/suggest`, {timeout: 60_000})
.then((response) => {
console.info('Suggested semantic entities for table column with id', columnId, 'of table with id', tableId, 'of database with id', databaseId)
resolve(response.data)
......
......
......@@ -292,7 +292,6 @@ export default {
}
},
mounted () {
this.reload()
this.loadProperties()
},
methods: {
......@@ -429,7 +428,6 @@ export default {
reload () {
this.lastReload = new Date()
this.loadData({ page: this.options.page, itemsPerPage: this.options.itemsPerPage, sortBy: null})
this.loadCount()
},
loadData ({ page, itemsPerPage, sortBy }) {
this.options.page = page
......@@ -438,6 +436,7 @@ export default {
this.loadingData = true
tableService.getData(this.$route.params.database_id, this.$route.params.table_id, (page - 1), itemsPerPage, (this.versionISO || this.lastReload.toISOString()))
.then((data) => {
this.total = data.count
this.rows = data.result.map((row) => {
for (const col in row) {
const column = this.table.columns.filter(c => c.internal_name === col)[0]
......@@ -464,23 +463,6 @@ export default {
toast.error(this.$t(code) + ": " + message)
})
},
loadCount () {
const tableService = useTableService()
this.loadingCount = true
tableService.getCount(this.$route.params.database_id, this.$route.params.table_id, (this.versionISO || this.lastReload.toISOString()))
.then((count) => {
this.total = count
this.loadingCount = false
})
.catch(({code, message}) => {
this.loadingCount = false
const toast = useToastInstance()
if (typeof code !== 'string' || typeof message !== 'string') {
return
}
toast.error(this.$t(code) + ": " + message)
})
},
isFileField (column) {
return ['blob', 'longblob', 'mediumblob', 'tinyblob'].includes(column.column_type)
},
......
......
......@@ -1082,28 +1082,30 @@ export function timestampsToHumanDifference(date1: string, date2: string) {
return moment.duration(other.diff(date)).humanize(true)
}
export function sizeToHumanLabel(num: number) {
export function sizeToHumanLabel(num: number): string {
let number = Number(num)
if (!number) {
return '0 B'
}
if (number < 1000) {
return `${Math.floor(number)} B`
return `${roundTwoDecimals(number)} B`
}
number = number / 1000
if (number < 1000) {
return `${Math.floor(number)} kB`
return `${roundTwoDecimals(number)} kB`
}
number = number / 1000
if (number < 1000) {
return `${Math.floor(number)} MB`
return `${roundTwoDecimals(number)} MB`
}
number = number / 1000
if (number < 1000) {
return `${number} GB`
return `${roundTwoDecimals(number)} GB`
}
number = number / 1000
if (number < 1000) {
return `${number} TB`
return `${roundTwoDecimals(number)} TB`
}
export function roundTwoDecimals(num: number): number {
return Math.round((num + Number.EPSILON) * 100) / 100
}
......@@ -47,6 +47,7 @@ nav:
- Air Quality Data: examples/air.md
- COVID-19 Data: examples/covid-19.md
- Hazard Data: examples/hazard.md
- Health Data: examples/health.md
- Industry 4.0 Power Data: examples/power.md
- Survey Data: examples/survey.md
- Lute Data: examples/lute-data.md
......
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment