diff --git a/08-submission/.DS_Store b/08-submission/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..5008ddfcf53c02e82d7eee2e57c38e5672ef89f6 Binary files /dev/null and b/08-submission/.DS_Store differ diff --git a/08-submission/README.md b/08-submission/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fbcdcea55381d19ef7952a10104696a1b867883c --- /dev/null +++ b/08-submission/README.md @@ -0,0 +1,10 @@ +# Submission + +Utility scripts for data upload to ENA. + +- [EMBL format conversion](convert_to_embl.sh) for an annotated genome (Fasta + GFF3) - inspired + from a [prokka issue](https://github.com/tseemann/prokka/issues/145), leading to + [GFF3toEMBL](https://github.com/sanger-pathogens/gff3toembl) and eventually to the real solution, + [EMBLmyGFF3](https://github.com/NBISweden/EMBLmyGFF3). +- A [Python script](txome_manifest.py) to batch-create manifest files for de novo transcriptome + submissions to ENA. \ No newline at end of file diff --git a/08-submission/convert_to_embl.sh b/08-submission/convert_to_embl.sh new file mode 100644 index 0000000000000000000000000000000000000000..892017c26a35b9882c373ffaf455457aaeeae39e --- /dev/null +++ b/08-submission/convert_to_embl.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +module load conda +conda activate emblmygff3 + +GENOME=/lisc/project/zoology/pycnogonum/paper/results/draft.fasta +GFF=/lisc/project/zoology/pycnogonum/paper/results/merged_sorted.gff3 +RESDIR=/lisc/scratch/zoology/pycnogonum/genome/submission + +cd $RESDIR || exit + +EMBLmyGFF3 $GFF $GENOME \ + --topology linear \ + --molecule_type 'genomic DNA' \ + --transl_table 1 \ + --species 'Pycnogonum litorale' \ + --taxonomy INV \ + --locus_tag VPG \ + --project_id PRJEB80537 \ + --verbose \ + -o result.embl \ No newline at end of file diff --git a/08-submission/txome_manifest.py b/08-submission/txome_manifest.py new file mode 100644 index 0000000000000000000000000000000000000000..81d06c228b2b4fdad1745532a2c6a9ef32bc1ca8 --- /dev/null +++ b/08-submission/txome_manifest.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +study = "PRJEB80537" + +samples = ["ERS21095029", "ERS21095030", "ERS21095031", "ERS21095032", "ERS21095033", "ERS21095034", "ERS21095035", "ERS21095036", "ERS21095037"] +assembly_names = ["RNA_EMBRYO3", "RNA_INSTAR1", "RNA_INSTAR2", "RNA_INSTAR3", "RNA_INSTAR4", "RNA_INSTAR5", "RNA_INSTAR6", "RNA_JUV1", "RNA_SUBADULT"] +base = "/lisc/scratch/zoology/pycnogonum/transcriptome/development" +file_locs = ["embryonic_stage3-4", "instar_II", "instar_III", "instar_I-protonymphon", "instar_IV", "instar_V", "instar_VI", "juvenile_I", "subadult"] +out_dir = "/lisc/scratch/zoology/pycnogonum/genome/submission/transcriptomes" + +for sample, name, loc in zip(samples, assembly_names, file_locs): + f = f"{out_dir}/{loc}.manifest" + with open(f, 'w') as manifest: + manifest.write(f"STUDY PRJEB80537\n") + manifest.write(f"SAMPLE {sample}\n") + manifest.write(f"ASSEMBLYNAME {name}\n") + manifest.write(f"ASSEMBLY_TYPE isolate\n") + manifest.write(f"PROGRAM Trinity\n") + manifest.write(f"PLATFORM ILLUMINA\n") + manifest.write(f"FASTA {base}/{loc}/Trinity.fasta.gz\n")