diff --git a/.docs/api/data-service.md b/.docs/api/data-service.md index ff27c035863fc300dfd1aab6d45faee050595df4..66089a1cd69492c4f47f3048de1bcb31ac53c8a3 100644 --- a/.docs/api/data-service.md +++ b/.docs/api/data-service.md @@ -46,12 +46,18 @@ everytime e.g. a sensor measurement is inserted. By default, this information is administrators can disable this behavior by setting `CREDENTIAL_CACHE_TIMEOUT=0` (cache is deleted after 0 seconds). -## Upload +## Storage The Data Service also is capable to upload files to the S3 backend. The default limit of [`Tomcat`](https://spring.io/guides/gs/uploading-files#_tuning_file_upload_limits) in Spring Boot is configured to be `2GB`. You can provide your own limit with setting `MAX_UPLOAD_SIZE`. +By default, the Data Service removes datasets older than 24 hours on a regular basis every 60 minutes. You can set the +`MAX_AGE` (in seconds) and `S3_STALE_CRON` to fit your use-case. You can disable this feature by setting `S3_STALE_CRON` +to `-`, this may lead to storage issues as no space will be available inevitably. Note +that [Spring Boot uses its own flavor](https://spring.io/blog/2020/11/10/new-in-spring-5-3-improved-cron-expressions#usage) +of cron syntax. + ## Limitations * Views in DBRepo can only have 63-character length (it is assumed only internal views have the maximum length of 64 diff --git a/dbrepo-auth-service/listeners/target/create-event-listener.jar b/dbrepo-auth-service/listeners/target/create-event-listener.jar index c45fcf9fa8e58ddb816cd6f9afb13cfd470c001b..c4222b60517827f2c2834533c8aa0d2a51a90548 100644 Binary files a/dbrepo-auth-service/listeners/target/create-event-listener.jar and b/dbrepo-auth-service/listeners/target/create-event-listener.jar differ diff --git a/dbrepo-data-service/rest-service/src/main/java/at/tuwien/DbrepoDataServiceApplication.java b/dbrepo-data-service/rest-service/src/main/java/at/tuwien/DataServiceApplication.java similarity index 57% rename from dbrepo-data-service/rest-service/src/main/java/at/tuwien/DbrepoDataServiceApplication.java rename to dbrepo-data-service/rest-service/src/main/java/at/tuwien/DataServiceApplication.java index 1f38a7920a020f53591ea2bd19bfdc199d9c1d87..95a70f0bb299dda8756f93fa76b2499fa7bd03a1 100644 --- a/dbrepo-data-service/rest-service/src/main/java/at/tuwien/DbrepoDataServiceApplication.java +++ b/dbrepo-data-service/rest-service/src/main/java/at/tuwien/DataServiceApplication.java @@ -3,13 +3,15 @@ package at.tuwien; import lombok.extern.log4j.Log4j2; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.scheduling.annotation.EnableScheduling; @Log4j2 +@EnableScheduling @SpringBootApplication -public class DbrepoDataServiceApplication { +public class DataServiceApplication { public static void main(String[] args) { - SpringApplication.run(DbrepoDataServiceApplication.class, args); + SpringApplication.run(DataServiceApplication.class, args); } } diff --git a/dbrepo-data-service/rest-service/src/main/resources/application.yml b/dbrepo-data-service/rest-service/src/main/resources/application.yml index f008cde99b234ea0fe798d4e16199e8aad5e363d..a22eb40a207fc436339c4cccdfb788a7e5b513ec 100644 --- a/dbrepo-data-service/rest-service/src/main/resources/application.yml +++ b/dbrepo-data-service/rest-service/src/main/resources/application.yml @@ -64,6 +64,8 @@ dbrepo: accessKeyId: "${S3_ACCESS_KEY_ID:seaweedfsadmin}" secretAccessKey: "${S3_SECRET_ACCESS_KEY:seaweedfsadmin}" bucket: "${S3_BUCKET:dbrepo}" + maxAge: "${S3_MAX_AGE:86400}" + cron: "${S3_STALE_CRON:0 */60 * * * *}" system: username: "${SYSTEM_USERNAME:admin}" password: "${SYSTEM_PASSWORD:admin}" diff --git a/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/StorageServiceIntegrationTest.java b/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/StorageServiceIntegrationTest.java index dd563deb70cd7021e0a721075669d755328ff0a8..9d923542a141e25fa30467708d73b291283c52ca 100644 --- a/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/StorageServiceIntegrationTest.java +++ b/dbrepo-data-service/rest-service/src/test/java/at/tuwien/service/StorageServiceIntegrationTest.java @@ -1,12 +1,12 @@ package at.tuwien.service; import at.ac.tuwien.ifs.dbrepo.core.api.ExportResourceDto; -import at.tuwien.config.S3Config; import at.ac.tuwien.ifs.dbrepo.core.exception.MalformedException; import at.ac.tuwien.ifs.dbrepo.core.exception.StorageNotFoundException; import at.ac.tuwien.ifs.dbrepo.core.exception.StorageUnavailableException; import at.ac.tuwien.ifs.dbrepo.core.exception.TableMalformedException; import at.ac.tuwien.ifs.dbrepo.core.test.BaseTest; +import at.tuwien.config.S3Config; import lombok.extern.log4j.Log4j2; import org.apache.commons.io.FileUtils; import org.apache.spark.sql.Dataset; @@ -31,6 +31,7 @@ import org.testcontainers.junit.jupiter.Testcontainers; import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CreateBucketRequest; +import software.amazon.awssdk.services.s3.model.ListObjectsRequest; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import java.io.*; @@ -232,6 +233,35 @@ public class StorageServiceIntegrationTest extends BaseTest { assertEquals("", lines.get(0)); } + @Test + public void deleteStaleObjects_none_succeeds() { + + /* mock */ + s3Client.putObject(PutObjectRequest.builder() + .key("s3key") + .bucket(s3Config.getS3Bucket()) + .build(), RequestBody.fromFile(new File("src/test/resources/csv/weather_aus.csv"))); + + /* test */ + storageService.deleteStaleObjects(); + assertEquals(1, s3Client.listObjects(ListObjectsRequest.builder().bucket(s3Config.getS3Bucket()).build()).contents().size()); + } + + @Test + public void deleteStaleObjects_succeeds() throws InterruptedException { + + /* mock */ + s3Client.putObject(PutObjectRequest.builder() + .key("s3key") + .bucket(s3Config.getS3Bucket()) + .build(), RequestBody.fromFile(new File("src/test/resources/csv/weather_aus.csv"))); + + /* test */ + Thread.sleep(4000); + storageService.deleteStaleObjects(); + assertEquals(0, s3Client.listObjects(ListObjectsRequest.builder().bucket(s3Config.getS3Bucket()).build()).contents().size()); + } + @ParameterizedTest @Disabled("cannot fix") @MethodSource("loadDataset_arguments") diff --git a/dbrepo-data-service/rest-service/src/test/resources/application.properties b/dbrepo-data-service/rest-service/src/test/resources/application.properties index a0bb7de2bbbdfb31d85a3980355112185e51ca98..f1d57b67866e26b0b9ede1a51bbf7e3dd4444d45 100644 --- a/dbrepo-data-service/rest-service/src/test/resources/application.properties +++ b/dbrepo-data-service/rest-service/src/test/resources/application.properties @@ -33,3 +33,4 @@ spring.rabbitmq.password=guest # s3 dbrepo.s3.accessKeyId=minioadmin dbrepo.s3.secretAccessKey=minioadmin +dbrepo.s3.maxAge=3 diff --git a/dbrepo-data-service/services/src/main/java/at/tuwien/config/S3Config.java b/dbrepo-data-service/services/src/main/java/at/tuwien/config/S3Config.java index c5aeb968d52468416c336264232e016e136bfc1a..726692e55db5dc5d2c13f78275df41ff9ea685a4 100644 --- a/dbrepo-data-service/services/src/main/java/at/tuwien/config/S3Config.java +++ b/dbrepo-data-service/services/src/main/java/at/tuwien/config/S3Config.java @@ -30,6 +30,9 @@ public class S3Config { @Value("${dbrepo.s3.bucket}") private String s3Bucket; + @Value("${dbrepo.s3.maxAge}") + private Integer maxAge; + @Bean public S3Client s3client() { final AwsCredentialsProvider credentialsProvider = StaticCredentialsProvider.create( diff --git a/dbrepo-data-service/services/src/main/java/at/tuwien/service/StorageService.java b/dbrepo-data-service/services/src/main/java/at/tuwien/service/StorageService.java index 65896d53e32c9024af39ac34cf2ed41029b0a045..0e126e27a6aa58bb24aed6569a07a6cf5b667091 100644 --- a/dbrepo-data-service/services/src/main/java/at/tuwien/service/StorageService.java +++ b/dbrepo-data-service/services/src/main/java/at/tuwien/service/StorageService.java @@ -9,6 +9,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import java.io.InputStream; +import java.time.Instant; import java.util.List; public interface StorageService { @@ -47,6 +48,10 @@ public interface StorageService { */ byte[] getBytes(String bucket, String key) throws StorageUnavailableException, StorageNotFoundException; + void deleteObject(String bucket, String key); + + void deleteStaleObjects(); + /** * Loads an object of the default export bucket from the Storage Service into an export resource. * diff --git a/dbrepo-data-service/services/src/main/java/at/tuwien/service/impl/StorageServiceS3Impl.java b/dbrepo-data-service/services/src/main/java/at/tuwien/service/impl/StorageServiceS3Impl.java index 76bfb60c4de4b0facc647809b17db95e784a84cd..bb75d7bac20794bd16d2d47a3337cabbcad051c2 100644 --- a/dbrepo-data-service/services/src/main/java/at/tuwien/service/impl/StorageServiceS3Impl.java +++ b/dbrepo-data-service/services/src/main/java/at/tuwien/service/impl/StorageServiceS3Impl.java @@ -1,11 +1,11 @@ package at.tuwien.service.impl; import at.ac.tuwien.ifs.dbrepo.core.api.ExportResourceDto; -import at.tuwien.config.S3Config; import at.ac.tuwien.ifs.dbrepo.core.exception.MalformedException; import at.ac.tuwien.ifs.dbrepo.core.exception.StorageNotFoundException; import at.ac.tuwien.ifs.dbrepo.core.exception.StorageUnavailableException; import at.ac.tuwien.ifs.dbrepo.core.exception.TableMalformedException; +import at.tuwien.config.S3Config; import at.tuwien.service.StorageService; import lombok.extern.log4j.Log4j2; import org.apache.commons.lang3.RandomStringUtils; @@ -17,13 +17,12 @@ import org.springframework.core.io.InputStreamResource; import org.springframework.stereotype.Service; import software.amazon.awssdk.core.sync.RequestBody; import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.GetObjectRequest; -import software.amazon.awssdk.services.s3.model.NoSuchKeyException; -import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.*; import java.io.*; import java.nio.charset.Charset; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.LinkedHashMap; import java.util.List; @@ -91,6 +90,30 @@ public class StorageServiceS3Impl implements StorageService { } } + @Override + public void deleteObject(String bucket, String key) { + log.trace("delete object with key {} from bucket: {}", key, bucket); + s3Client.deleteObject(DeleteObjectRequest.builder() + .bucket(bucket) + .key(key) + .build()); + } + + @Override + public void deleteStaleObjects() { + log.trace("list stale objects in bucket: {}", s3Config.getS3Bucket()); + final List<String> keys = s3Client.listObjects(ListObjectsRequest.builder() + .bucket(s3Config.getS3Bucket()) + .build()) + .contents() + .stream() + .filter(o -> o.lastModified().isBefore(Instant.now().minus(s3Config.getMaxAge(), ChronoUnit.SECONDS))) + .map(S3Object::key) + .toList(); + keys.forEach(key -> deleteObject(s3Config.getS3Bucket(), key)); + log.info("Deleted {} stale object(s) in bucket: {}", keys.size(), s3Config.getS3Bucket()); + } + @Override public ExportResourceDto getResource(String key) throws StorageNotFoundException, StorageUnavailableException { return getResource(s3Config.getS3Bucket(), key); diff --git a/dbrepo-data-service/services/src/main/java/at/tuwien/timer/StaleObjectTimer.java b/dbrepo-data-service/services/src/main/java/at/tuwien/timer/StaleObjectTimer.java new file mode 100644 index 0000000000000000000000000000000000000000..de30299f3d0b4ccace1a8181e678e5e3ebd2dd87 --- /dev/null +++ b/dbrepo-data-service/services/src/main/java/at/tuwien/timer/StaleObjectTimer.java @@ -0,0 +1,25 @@ +package at.tuwien.timer; + +import at.tuwien.service.StorageService; +import lombok.extern.log4j.Log4j2; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +@Log4j2 +@Component +public class StaleObjectTimer { + + private final StorageService storageService; + + @Autowired + public StaleObjectTimer(StorageService storageService) { + this.storageService = storageService; + } + + @Scheduled(cron = "${dbrepo.s3.cron}") + public void deleteStaleObjects() { + storageService.deleteStaleObjects(); + } + +} diff --git a/dbrepo-metadata-service/services/src/main/java/at/ac/tuwien/ifs/dbrepo/service/impl/StorageServiceS3Impl.java b/dbrepo-metadata-service/services/src/main/java/at/ac/tuwien/ifs/dbrepo/service/impl/StorageServiceS3Impl.java index 10e04eb291ede8938581316e21beb01979072d4c..345529486aaee6b1af596dc89da29298b060baff 100644 --- a/dbrepo-metadata-service/services/src/main/java/at/ac/tuwien/ifs/dbrepo/service/impl/StorageServiceS3Impl.java +++ b/dbrepo-metadata-service/services/src/main/java/at/ac/tuwien/ifs/dbrepo/service/impl/StorageServiceS3Impl.java @@ -8,7 +8,9 @@ import lombok.extern.log4j.Log4j2; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import software.amazon.awssdk.services.s3.S3Client; -import software.amazon.awssdk.services.s3.model.*; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.NoSuchKeyException; +import software.amazon.awssdk.services.s3.model.S3Exception; import java.io.IOException; import java.io.InputStream; diff --git a/helm/dbrepo/files/create-event-listener.jar b/helm/dbrepo/files/create-event-listener.jar index c45fcf9fa8e58ddb816cd6f9afb13cfd470c001b..c4222b60517827f2c2834533c8aa0d2a51a90548 100644 Binary files a/helm/dbrepo/files/create-event-listener.jar and b/helm/dbrepo/files/create-event-listener.jar differ