diff --git a/08-submission/gff-03-ENA_conform.sh b/08-submission/gff-03-ENA_conform.sh index 9ddb137036f7f7d26a345360f538b0ae38c2c832..1ee93f369f565c85162c494023141c5cb97487c9 100644 --- a/08-submission/gff-03-ENA_conform.sh +++ b/08-submission/gff-03-ENA_conform.sh @@ -28,6 +28,7 @@ FLAGGED=./merged_sorted_named_dedup_flagged.gff3 # first remove duplicate features agat_sp_fix_features_locations_duplicated.pl --gff $GFF -o $DEDUP +sed -i 's/,\"\"//g' $DEDUP # remove the weird ,"" that appears after the ID for PacBio genes # flag short introns, according to https://www.biostars.org/p/374618/ # The NCBI documentation suggests to use the pseudo=True tag for genes that are broken in some way