From 998c1c3d53ebd6e601316c7895713d4b5dafb251 Mon Sep 17 00:00:00 2001
From: Niko <nikolaos.papadopoulos@univie.ac.at>
Date: Fri, 29 Nov 2024 17:37:23 +0100
Subject: [PATCH] manual removal of empty fields in GFF3 file

---
 08-submission/gff-03-ENA_conform.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/08-submission/gff-03-ENA_conform.sh b/08-submission/gff-03-ENA_conform.sh
index 9ddb137..1ee93f3 100644
--- a/08-submission/gff-03-ENA_conform.sh
+++ b/08-submission/gff-03-ENA_conform.sh
@@ -28,6 +28,7 @@ FLAGGED=./merged_sorted_named_dedup_flagged.gff3
 
 # first remove duplicate features
 agat_sp_fix_features_locations_duplicated.pl --gff $GFF -o $DEDUP
+sed -i 's/,\"\"//g' $DEDUP # remove the weird ,"" that appears after the ID for PacBio genes
 
 # flag short introns, according to https://www.biostars.org/p/374618/
 # The NCBI documentation suggests to use the pseudo=True tag for genes that are broken in some way
-- 
GitLab