Skip to content
Snippets Groups Projects
Commit b387ad91 authored by Niko (Nikolaos) Papadopoulos's avatar Niko (Nikolaos) Papadopoulos
Browse files

fixed paths, added validate script

parent 0bdd277c
No related branches found
No related tags found
No related merge requests found
...@@ -4,10 +4,7 @@ compose: ...@@ -4,10 +4,7 @@ compose:
bash gff-01-compose_gff.sh bash gff-01-compose_gff.sh
annotate: annotate:
module load conda bash gff-02-functional_annot.sh
# a conda environment that has pandas and can run jupyter notebooks
conda activate jupyterhub-5.1.0
jupyter nbconvert --to notebook --execute gff-02-annotate.ipynb
conform: conform:
bash gff-03-ENA_conform.sh bash gff-03-ENA_conform.sh
...@@ -16,4 +13,10 @@ embl: ...@@ -16,4 +13,10 @@ embl:
bash gff-04-convert_to_embl.sh bash gff-04-convert_to_embl.sh
validate: validate:
bash gff-05-submit_to_ENA.sh validate bash gff-05-submit_to_ENA.sh -validate
\ No newline at end of file
submit:
bash gff-05-submit_to_ENA.sh -submit
all: compose annotate conform embl validate 2>&1 | tee log.txt
@echo Done.
\ No newline at end of file
...@@ -13,15 +13,16 @@ RESULT=/lisc/scratch/zoology/pycnogonum/genome/submission ...@@ -13,15 +13,16 @@ RESULT=/lisc/scratch/zoology/pycnogonum/genome/submission
cd $RESULT || exit cd $RESULT || exit
# define inputs and outputs # define inputs and outputs
GFF=$BASE/merged_sorted_named.gff3 GFF=./merged_sorted_named.gff3
DEDUP=$BASE/merged_sorted_named_dedup.gff3 DEDUP=./merged_sorted_named_dedup.gff3
SHORT_INTRONS=$BASE/short_introns.tsv SHORT_INTRONS=./short_introns.tsv
KILL_LIST=$BASE/kill_list.tsv KILL_LIST=./kill_list.tsv
FILTERED_GFF=$BASE/merged_sorted_named_dedup_filtered.gff3 FILTERED_GFF=./merged_sorted_named_dedup_filtered.gff3
FILTERED_mRNA=./short_introns.gff3
# the python script that will generate the kill list # the python script that will generate the kill list
KILLSCRIPT=/lisc/user/papadopoulos/repos/plit-genome/08-submission/gff-04-build_kill_list.py KILLSCRIPT=/lisc/user/papadopoulos/repos/plit-genome/08-submission/gff-03-build_kill_list.py
# first remove duplicate features # first remove duplicate features
agat_sp_fix_features_locations_duplicated.pl --gff "$GFF" -o "$DEDUP" agat_sp_fix_features_locations_duplicated.pl --gff "$GFF" -o "$DEDUP"
...@@ -31,7 +32,11 @@ agat_sp_list_short_introns.pl --gff "$DEDUP" --size 10 --out "$SHORT_INTRONS" ...@@ -31,7 +32,11 @@ agat_sp_list_short_introns.pl --gff "$DEDUP" --size 10 --out "$SHORT_INTRONS"
# this table contains the locus (chromosome), gene, start position, and length of # this table contains the locus (chromosome), gene, start position, and length of
# map the short introns to mRNAs in the GFF3 file # map the short introns to mRNAs in the GFF3 file
# this was written in Python 3.12 but Python >3 should be fine; we only use default libraries
python $KILLSCRIPT "$SHORT_INTRONS" "$DEDUP" > "$KILL_LIST" python $KILLSCRIPT "$SHORT_INTRONS" "$DEDUP" > "$KILL_LIST"
# use the kill list to filter the offending mRNAs out of the GFF3 # use the kill list to filter the offending mRNAs out of the GFF3
agat_sp_filter_feature_from_kill_list.pl --gff "$DEDUP" --kill_list "$KILL_LIST" -p mRNA -o "$FILTERED_GFF" agat_sp_filter_feature_from_kill_list.pl --gff "$DEDUP" --kill_list "$KILL_LIST" -p mRNA -o "$FILTERED_GFF"
\ No newline at end of file
# also make a supplementary GFF with only the short introns:
agat_sp_filter_feature_from_keep_list.pl --gff "$DEDUP" --keep_list "$KILL_LIST" -p mRNA -o "$FILTERED_mRNA"
\ No newline at end of file
...@@ -3,8 +3,8 @@ ...@@ -3,8 +3,8 @@
module load conda module load conda
conda activate emblmygff3 conda activate emblmygff3
GENOME=/lisc/project/zoology/pycnogonum/paper/results/draft.fasta GENOME=/lisc/project/zoology/pycnogonum/paper/zenodo/results/draft.fasta
GFF=/lisc/project/zoology/pycnogonum/paper/results/merged_sorted_dedup.gff3 GFF=/lisc/scratch/zoology/pycnogonum/genome/submission/merged_sorted_named_dedup_filtered.gff3
RESDIR=/lisc/scratch/zoology/pycnogonum/genome/submission RESDIR=/lisc/scratch/zoology/pycnogonum/genome/submission
cd $RESDIR || exit cd $RESDIR || exit
...@@ -19,4 +19,6 @@ EMBLmyGFF3 $GFF $GENOME \ ...@@ -19,4 +19,6 @@ EMBLmyGFF3 $GFF $GENOME \
--locus_tag VPG \ --locus_tag VPG \
--project_id PRJEB80537 \ --project_id PRJEB80537 \
-vvv \ -vvv \
-o result.embl -o result.embl
\ No newline at end of file
gzip result.embl
\ No newline at end of file
#!/usr/bin/env bash #!/usr/bin/env bash
# submit the GFF3 file to ENA. Assumes we are in the submission directory
# and the genome manifest file is present and valid.
module load java
MODE=$1 # either -validate or -submit
java -jar ~/bin/webin-cli-8.0.0.jar \
-context genome \
-userName Webin-68127 \
-passwordFile ~/webin.pwd \
-manifest genome.manifest \
"$MODE"
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment