From 998c1c3d53ebd6e601316c7895713d4b5dafb251 Mon Sep 17 00:00:00 2001 From: Niko <nikolaos.papadopoulos@univie.ac.at> Date: Fri, 29 Nov 2024 17:37:23 +0100 Subject: [PATCH] manual removal of empty fields in GFF3 file --- 08-submission/gff-03-ENA_conform.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/08-submission/gff-03-ENA_conform.sh b/08-submission/gff-03-ENA_conform.sh index 9ddb137..1ee93f3 100644 --- a/08-submission/gff-03-ENA_conform.sh +++ b/08-submission/gff-03-ENA_conform.sh @@ -28,6 +28,7 @@ FLAGGED=./merged_sorted_named_dedup_flagged.gff3 # first remove duplicate features agat_sp_fix_features_locations_duplicated.pl --gff $GFF -o $DEDUP +sed -i 's/,\"\"//g' $DEDUP # remove the weird ,"" that appears after the ID for PacBio genes # flag short introns, according to https://www.biostars.org/p/374618/ # The NCBI documentation suggests to use the pseudo=True tag for genes that are broken in some way -- GitLab