From 7367f442e8a314b5d1a5b41ccbd9526850b4fc52 Mon Sep 17 00:00:00 2001
From: Niko <nikolaos.papadopoulos@univie.ac.at>
Date: Tue, 19 Nov 2024 16:57:35 +0100
Subject: [PATCH] submission code

---
 08-submission/.DS_Store          | Bin 0 -> 6148 bytes
 08-submission/README.md          |  10 ++++++++++
 08-submission/convert_to_embl.sh |  21 +++++++++++++++++++++
 08-submission/txome_manifest.py  |  20 ++++++++++++++++++++
 4 files changed, 51 insertions(+)
 create mode 100644 08-submission/.DS_Store
 create mode 100644 08-submission/README.md
 create mode 100644 08-submission/convert_to_embl.sh
 create mode 100644 08-submission/txome_manifest.py

diff --git a/08-submission/.DS_Store b/08-submission/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6
GIT binary patch
literal 6148
zcmZQzU|@7AO)+F(5MW?n;9!8z45|!R0Z1N%F(jFgL>QrFAPJ2!M?+vV1V%$(Gz3ON
zU^D~<VF)ln+{D2Rp-0Kl5Eu=C(GY-#0H}OW0QD6Z7#JL&bOVG2Nii@oFo3%Nj0_Ac
zFio(203!nfNGnJUNGpg2X=PvpvA|}4wK6b5wK9UcAq)(R;4TS>25V<v1ltVagS9g-
zf^BACV1#IAV1(Mt2<@RTf_gL{^C8+97{Ru~TsKOOhQMeDz(Rl-!Vmz}|E>%SxcdJP
zRior+2#kinunYl47MEZbCs3t{!+W4QHvuXKVuPw;Mo^s$(F3lEVT}ML$bg~*R5_@+
b2Uo?6kTwK}57Iu`5P${HC_Nei0}uiLNUI8I

literal 0
HcmV?d00001

diff --git a/08-submission/README.md b/08-submission/README.md
new file mode 100644
index 0000000..fbcdcea
--- /dev/null
+++ b/08-submission/README.md
@@ -0,0 +1,10 @@
+# Submission
+
+Utility scripts for data upload to ENA.
+
+- [EMBL format conversion](convert_to_embl.sh) for an annotated genome (Fasta + GFF3) - inspired
+  from a [prokka issue](https://github.com/tseemann/prokka/issues/145), leading to
+  [GFF3toEMBL](https://github.com/sanger-pathogens/gff3toembl) and eventually to the real solution,
+  [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3).
+- A [Python script](txome_manifest.py) to batch-create manifest files for de novo transcriptome
+  submissions to ENA.
\ No newline at end of file
diff --git a/08-submission/convert_to_embl.sh b/08-submission/convert_to_embl.sh
new file mode 100644
index 0000000..892017c
--- /dev/null
+++ b/08-submission/convert_to_embl.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+module load conda
+conda activate emblmygff3
+
+GENOME=/lisc/project/zoology/pycnogonum/paper/results/draft.fasta
+GFF=/lisc/project/zoology/pycnogonum/paper/results/merged_sorted.gff3
+RESDIR=/lisc/scratch/zoology/pycnogonum/genome/submission
+
+cd $RESDIR || exit
+
+EMBLmyGFF3 $GFF $GENOME \
+        --topology linear \
+        --molecule_type 'genomic DNA' \
+        --transl_table 1  \
+        --species 'Pycnogonum litorale' \
+        --taxonomy INV \
+        --locus_tag VPG \
+        --project_id PRJEB80537 \
+        --verbose \
+        -o result.embl
\ No newline at end of file
diff --git a/08-submission/txome_manifest.py b/08-submission/txome_manifest.py
new file mode 100644
index 0000000..81d06c2
--- /dev/null
+++ b/08-submission/txome_manifest.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+study = "PRJEB80537"
+
+samples = ["ERS21095029", "ERS21095030", "ERS21095031", "ERS21095032", "ERS21095033", "ERS21095034", "ERS21095035", "ERS21095036", "ERS21095037"]
+assembly_names = ["RNA_EMBRYO3", "RNA_INSTAR1", "RNA_INSTAR2", "RNA_INSTAR3", "RNA_INSTAR4", "RNA_INSTAR5", "RNA_INSTAR6", "RNA_JUV1", "RNA_SUBADULT"]
+base = "/lisc/scratch/zoology/pycnogonum/transcriptome/development"
+file_locs = ["embryonic_stage3-4", "instar_II", "instar_III", "instar_I-protonymphon", "instar_IV", "instar_V", "instar_VI", "juvenile_I", "subadult"]
+out_dir = "/lisc/scratch/zoology/pycnogonum/genome/submission/transcriptomes"
+
+for sample, name, loc in zip(samples, assembly_names, file_locs):
+    f = f"{out_dir}/{loc}.manifest"
+    with open(f, 'w') as manifest:
+        manifest.write(f"STUDY   PRJEB80537\n")
+        manifest.write(f"SAMPLE  {sample}\n")
+        manifest.write(f"ASSEMBLYNAME    {name}\n")
+        manifest.write(f"ASSEMBLY_TYPE isolate\n")
+        manifest.write(f"PROGRAM Trinity\n")
+        manifest.write(f"PLATFORM    ILLUMINA\n")
+        manifest.write(f"FASTA   {base}/{loc}/Trinity.fasta.gz\n")
-- 
GitLab