From 1448fec74b72f6a5ff0093801c02c5302ec74c94 Mon Sep 17 00:00:00 2001
From: Niko <nikolaos.papadopoulos@univie.ac.at>
Date: Fri, 29 Nov 2024 17:37:38 +0100
Subject: [PATCH] manually replacing erroneous gene model

---
 08-submission/gff-01-compose_gff.sh | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/08-submission/gff-01-compose_gff.sh b/08-submission/gff-01-compose_gff.sh
index c6e0f95..38131a3 100644
--- a/08-submission/gff-01-compose_gff.sh
+++ b/08-submission/gff-01-compose_gff.sh
@@ -2,6 +2,9 @@
 
 # compose the GFF3 file to prepare for ENA submission
 
+module load conda
+conda activate agat-1.4.1
+
 # set base and result directory
 BASE=/lisc/scratch/zoology/pycnogonum/genome/draft/annot_merge
 RESULT=/lisc/scratch/zoology/pycnogonum/genome/submission
@@ -12,12 +15,18 @@ BRAKER_R1=$BASE/braker.gff
 BRAKER_R2=$BASE/braker2_unique_renamed_nocodon_intron.gff3
 DENOVO=$BASE/denovo_txomes/overlap_translated.gff3
 TRNASCAN=$BASE/../trnascan/trnascan.gff3
-
+# Also add the g8324 gene as predicted by BRAKER3; manual inspection has determined that it should
+# replace gene models PB.7650, at_DN0411, and at_DN0412
+g8324=$BASE/g8324.gff3
 # navigate to output directory
 cd $RESULT || exit
 
 # merge the GFF3 files
-cat $ISOSEQ $BRAKER_R1 $BRAKER_R2 $DENOVO $TRNASCAN > merged.gff3
+cat $ISOSEQ $BRAKER_R1 $BRAKER_R2 $DENOVO $TRNASCAN $g8324 > merged_mix.gff3
+
+# remove the gene models for the 3 genes that are replaced by g8324
+{ echo "PB.7650"; echo "at_DN0411"; echo "at_DN0412"; } > kill_list
+agat_sp_filter_feature_from_kill_list.pl --gff merged_mix.gff3 --kill_list kill_list -p gene -o merged.gff3
 
 # rename pseudochromosomes 54-59 to 52-57 to reflect decontamination
 sed -i 's/pseudochrom_54/pseudochrom_52/g' merged.gff3
-- 
GitLab