Skip to content
Snippets Groups Projects
Commit b387ad91 authored by Niko (Nikolaos) Papadopoulos's avatar Niko (Nikolaos) Papadopoulos
Browse files

fixed paths, added validate script

parent 0bdd277c
No related branches found
No related tags found
No related merge requests found
......@@ -4,10 +4,7 @@ compose:
bash gff-01-compose_gff.sh
annotate:
module load conda
# a conda environment that has pandas and can run jupyter notebooks
conda activate jupyterhub-5.1.0
jupyter nbconvert --to notebook --execute gff-02-annotate.ipynb
bash gff-02-functional_annot.sh
conform:
bash gff-03-ENA_conform.sh
......@@ -16,4 +13,10 @@ embl:
bash gff-04-convert_to_embl.sh
validate:
bash gff-05-submit_to_ENA.sh validate
\ No newline at end of file
bash gff-05-submit_to_ENA.sh -validate
submit:
bash gff-05-submit_to_ENA.sh -submit
all: compose annotate conform embl validate 2>&1 | tee log.txt
@echo Done.
\ No newline at end of file
......@@ -13,15 +13,16 @@ RESULT=/lisc/scratch/zoology/pycnogonum/genome/submission
cd $RESULT || exit
# define inputs and outputs
GFF=$BASE/merged_sorted_named.gff3
DEDUP=$BASE/merged_sorted_named_dedup.gff3
SHORT_INTRONS=$BASE/short_introns.tsv
KILL_LIST=$BASE/kill_list.tsv
GFF=./merged_sorted_named.gff3
DEDUP=./merged_sorted_named_dedup.gff3
SHORT_INTRONS=./short_introns.tsv
KILL_LIST=./kill_list.tsv
FILTERED_GFF=$BASE/merged_sorted_named_dedup_filtered.gff3
FILTERED_GFF=./merged_sorted_named_dedup_filtered.gff3
FILTERED_mRNA=./short_introns.gff3
# the python script that will generate the kill list
KILLSCRIPT=/lisc/user/papadopoulos/repos/plit-genome/08-submission/gff-04-build_kill_list.py
KILLSCRIPT=/lisc/user/papadopoulos/repos/plit-genome/08-submission/gff-03-build_kill_list.py
# first remove duplicate features
agat_sp_fix_features_locations_duplicated.pl --gff "$GFF" -o "$DEDUP"
......@@ -31,7 +32,11 @@ agat_sp_list_short_introns.pl --gff "$DEDUP" --size 10 --out "$SHORT_INTRONS"
# this table contains the locus (chromosome), gene, start position, and length of
# map the short introns to mRNAs in the GFF3 file
# this was written in Python 3.12 but Python >3 should be fine; we only use default libraries
python $KILLSCRIPT "$SHORT_INTRONS" "$DEDUP" > "$KILL_LIST"
# use the kill list to filter the offending mRNAs out of the GFF3
agat_sp_filter_feature_from_kill_list.pl --gff "$DEDUP" --kill_list "$KILL_LIST" -p mRNA -o "$FILTERED_GFF"
\ No newline at end of file
agat_sp_filter_feature_from_kill_list.pl --gff "$DEDUP" --kill_list "$KILL_LIST" -p mRNA -o "$FILTERED_GFF"
# also make a supplementary GFF with only the short introns:
agat_sp_filter_feature_from_keep_list.pl --gff "$DEDUP" --keep_list "$KILL_LIST" -p mRNA -o "$FILTERED_mRNA"
\ No newline at end of file
......@@ -3,8 +3,8 @@
module load conda
conda activate emblmygff3
GENOME=/lisc/project/zoology/pycnogonum/paper/results/draft.fasta
GFF=/lisc/project/zoology/pycnogonum/paper/results/merged_sorted_dedup.gff3
GENOME=/lisc/project/zoology/pycnogonum/paper/zenodo/results/draft.fasta
GFF=/lisc/scratch/zoology/pycnogonum/genome/submission/merged_sorted_named_dedup_filtered.gff3
RESDIR=/lisc/scratch/zoology/pycnogonum/genome/submission
cd $RESDIR || exit
......@@ -19,4 +19,6 @@ EMBLmyGFF3 $GFF $GENOME \
--locus_tag VPG \
--project_id PRJEB80537 \
-vvv \
-o result.embl
\ No newline at end of file
-o result.embl
gzip result.embl
\ No newline at end of file
#!/usr/bin/env bash
# submit the GFF3 file to ENA. Assumes we are in the submission directory
# and the genome manifest file is present and valid.
module load java
MODE=$1 # either -validate or -submit
java -jar ~/bin/webin-cli-8.0.0.jar \
-context genome \
-userName Webin-68127 \
-passwordFile ~/webin.pwd \
-manifest genome.manifest \
"$MODE"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment