diff --git a/08-submission/gff-02-functional_annot.ipynb b/08-submission/gff-02-functional_annot.ipynb index c26e7ebe4e841a6c2b15bcaa3f7490e012e40990..98a24f3574ce367d9ab9ab21dc4efc431768555e 100644 --- a/08-submission/gff-02-functional_annot.ipynb +++ b/08-submission/gff-02-functional_annot.ipynb @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -162,6 +162,8 @@ " # for line in tqdm(gff.readlines()):\n", " for line in gff.readlines():\n", " line = line.strip()\n", + " if line[-1] == ';':\n", + " line = line[:-1]\n", " conditions_skip = line.startswith('#') or 'tRNA' in line or 'name=' in line\n", " if not conditions_skip:\n", " seq_id, source, feature_type, start, end, score, strand, phase, attributes = line.split('\\t')\n", @@ -170,13 +172,13 @@ " gene = attributes['ID']\n", " name = find_protein(gene, emapper)\n", " name = f'{name} (predicted)'\n", - " line = f'{line}name={name}'\n", + " line = f'{line};name={name}'\n", " if feature_type == 'mRNA':\n", " mRNA = attributes['ID']\n", " isoform = mRNA.split('.')[-1]\n", - " line = f'{line}name={name} isoform {isoform};gene_name={name}'\n", + " line = f'{line};name={name} isoform {isoform};gene_name={name}'\n", " if feature_type == 'CDS' or feature_type == 'exon':\n", - " line = f'{line}gene_name={name}'\n", + " line = f'{line};gene_name={name}'\n", " named.write(line + '\\n')" ] } diff --git a/08-submission/gff-02-functional_annot.sh b/08-submission/gff-02-functional_annot.sh new file mode 100644 index 0000000000000000000000000000000000000000..ff226d61581c3f95c4dcf2ae99478642652e3034 --- /dev/null +++ b/08-submission/gff-02-functional_annot.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# wrapper script for the functional annotation of the GFF3 file, which is a jupyter notebook + +module load conda + +conda activate jupyterhub-5.2.1 +# a conda environment that has pandas and can run jupyter notebooks + +jupyter execute gff-02-functional_annot.ipynb \ No newline at end of file