From 0bdd277c2b3820c57df53c9a4ccc0991cef30036 Mon Sep 17 00:00:00 2001 From: Niko <nikolaos.papadopoulos@univie.ac.at> Date: Wed, 27 Nov 2024 21:07:50 +0100 Subject: [PATCH] wrapper for functional input notebook; fixed double/missing semicolons --- 08-submission/gff-02-functional_annot.ipynb | 10 ++++++---- 08-submission/gff-02-functional_annot.sh | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) create mode 100644 08-submission/gff-02-functional_annot.sh diff --git a/08-submission/gff-02-functional_annot.ipynb b/08-submission/gff-02-functional_annot.ipynb index c26e7eb..98a24f3 100644 --- a/08-submission/gff-02-functional_annot.ipynb +++ b/08-submission/gff-02-functional_annot.ipynb @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -162,6 +162,8 @@ " # for line in tqdm(gff.readlines()):\n", " for line in gff.readlines():\n", " line = line.strip()\n", + " if line[-1] == ';':\n", + " line = line[:-1]\n", " conditions_skip = line.startswith('#') or 'tRNA' in line or 'name=' in line\n", " if not conditions_skip:\n", " seq_id, source, feature_type, start, end, score, strand, phase, attributes = line.split('\\t')\n", @@ -170,13 +172,13 @@ " gene = attributes['ID']\n", " name = find_protein(gene, emapper)\n", " name = f'{name} (predicted)'\n", - " line = f'{line}name={name}'\n", + " line = f'{line};name={name}'\n", " if feature_type == 'mRNA':\n", " mRNA = attributes['ID']\n", " isoform = mRNA.split('.')[-1]\n", - " line = f'{line}name={name} isoform {isoform};gene_name={name}'\n", + " line = f'{line};name={name} isoform {isoform};gene_name={name}'\n", " if feature_type == 'CDS' or feature_type == 'exon':\n", - " line = f'{line}gene_name={name}'\n", + " line = f'{line};gene_name={name}'\n", " named.write(line + '\\n')" ] } diff --git a/08-submission/gff-02-functional_annot.sh b/08-submission/gff-02-functional_annot.sh new file mode 100644 index 0000000..ff226d6 --- /dev/null +++ b/08-submission/gff-02-functional_annot.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +# wrapper script for the functional annotation of the GFF3 file, which is a jupyter notebook + +module load conda + +conda activate jupyterhub-5.2.1 +# a conda environment that has pandas and can run jupyter notebooks + +jupyter execute gff-02-functional_annot.ipynb \ No newline at end of file -- GitLab