From 7367f442e8a314b5d1a5b41ccbd9526850b4fc52 Mon Sep 17 00:00:00 2001 From: Niko <nikolaos.papadopoulos@univie.ac.at> Date: Tue, 19 Nov 2024 16:57:35 +0100 Subject: [PATCH] submission code --- 08-submission/.DS_Store | Bin 0 -> 6148 bytes 08-submission/README.md | 10 ++++++++++ 08-submission/convert_to_embl.sh | 21 +++++++++++++++++++++ 08-submission/txome_manifest.py | 20 ++++++++++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 08-submission/.DS_Store create mode 100644 08-submission/README.md create mode 100644 08-submission/convert_to_embl.sh create mode 100644 08-submission/txome_manifest.py diff --git a/08-submission/.DS_Store b/08-submission/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 GIT binary patch literal 6148 zcmZQzU|@7AO)+F(5MW?n;9!8z45|!R0Z1N%F(jFgL>QrFAPJ2!M?+vV1V%$(Gz3ON zU^D~<VF)ln+{D2Rp-0Kl5Eu=C(GY-#0H}OW0QD6Z7#JL&bOVG2Nii@oFo3%Nj0_Ac zFio(203!nfNGnJUNGpg2X=PvpvA|}4wK6b5wK9UcAq)(R;4TS>25V<v1ltVagS9g- zf^BACV1#IAV1(Mt2<@RTf_gL{^C8+97{Ru~TsKOOhQMeDz(Rl-!Vmz}|E>%SxcdJP zRior+2#kinunYl47MEZbCs3t{!+W4QHvuXKVuPw;Mo^s$(F3lEVT}ML$bg~*R5_@+ b2Uo?6kTwK}57Iu`5P${HC_Nei0}uiLNUI8I literal 0 HcmV?d00001 diff --git a/08-submission/README.md b/08-submission/README.md new file mode 100644 index 0000000..fbcdcea --- /dev/null +++ b/08-submission/README.md @@ -0,0 +1,10 @@ +# Submission + +Utility scripts for data upload to ENA. + +- [EMBL format conversion](convert_to_embl.sh) for an annotated genome (Fasta + GFF3) - inspired + from a [prokka issue](https://github.com/tseemann/prokka/issues/145), leading to + [GFF3toEMBL](https://github.com/sanger-pathogens/gff3toembl) and eventually to the real solution, + [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3). +- A [Python script](txome_manifest.py) to batch-create manifest files for de novo transcriptome + submissions to ENA. \ No newline at end of file diff --git a/08-submission/convert_to_embl.sh b/08-submission/convert_to_embl.sh new file mode 100644 index 0000000..892017c --- /dev/null +++ b/08-submission/convert_to_embl.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +module load conda +conda activate emblmygff3 + +GENOME=/lisc/project/zoology/pycnogonum/paper/results/draft.fasta +GFF=/lisc/project/zoology/pycnogonum/paper/results/merged_sorted.gff3 +RESDIR=/lisc/scratch/zoology/pycnogonum/genome/submission + +cd $RESDIR || exit + +EMBLmyGFF3 $GFF $GENOME \ + --topology linear \ + --molecule_type 'genomic DNA' \ + --transl_table 1 \ + --species 'Pycnogonum litorale' \ + --taxonomy INV \ + --locus_tag VPG \ + --project_id PRJEB80537 \ + --verbose \ + -o result.embl \ No newline at end of file diff --git a/08-submission/txome_manifest.py b/08-submission/txome_manifest.py new file mode 100644 index 0000000..81d06c2 --- /dev/null +++ b/08-submission/txome_manifest.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +study = "PRJEB80537" + +samples = ["ERS21095029", "ERS21095030", "ERS21095031", "ERS21095032", "ERS21095033", "ERS21095034", "ERS21095035", "ERS21095036", "ERS21095037"] +assembly_names = ["RNA_EMBRYO3", "RNA_INSTAR1", "RNA_INSTAR2", "RNA_INSTAR3", "RNA_INSTAR4", "RNA_INSTAR5", "RNA_INSTAR6", "RNA_JUV1", "RNA_SUBADULT"] +base = "/lisc/scratch/zoology/pycnogonum/transcriptome/development" +file_locs = ["embryonic_stage3-4", "instar_II", "instar_III", "instar_I-protonymphon", "instar_IV", "instar_V", "instar_VI", "juvenile_I", "subadult"] +out_dir = "/lisc/scratch/zoology/pycnogonum/genome/submission/transcriptomes" + +for sample, name, loc in zip(samples, assembly_names, file_locs): + f = f"{out_dir}/{loc}.manifest" + with open(f, 'w') as manifest: + manifest.write(f"STUDY PRJEB80537\n") + manifest.write(f"SAMPLE {sample}\n") + manifest.write(f"ASSEMBLYNAME {name}\n") + manifest.write(f"ASSEMBLY_TYPE isolate\n") + manifest.write(f"PROGRAM Trinity\n") + manifest.write(f"PLATFORM ILLUMINA\n") + manifest.write(f"FASTA {base}/{loc}/Trinity.fasta.gz\n") -- GitLab