{ "cells": [ { "cell_type": "markdown", "id": "e4d3a1ad-1f33-49b4-b0df-917f13f1f820", "metadata": {}, "source": [ "# Genome Classifier " ] }, { "cell_type": "code", "execution_count": 1, "id": "472aa6e1-c9aa-4d43-914e-4f71d3e6161c", "metadata": {}, "outputs": [], "source": [ "import modelseedpy" ] }, { "cell_type": "markdown", "id": "11c16251-0cdc-4cd8-a6be-1be80646677d", "metadata": {}, "source": [ "#### Pull the genome classifier model" ] }, { "cell_type": "code", "execution_count": 2, "id": "32587da5-317c-4a75-b3b1-c2467086a685", "metadata": {}, "outputs": [], "source": [ "from modelseedpy.helpers import get_classifier" ] }, { "cell_type": "code", "execution_count": 3, "id": "00c68e84-def5-446b-8bfa-311909287eab", "metadata": {}, "outputs": [], "source": [ "classifier = get_classifier('knn_filter')" ] }, { "cell_type": "code", "execution_count": 4, "id": "eaef7784-140a-4e11-970a-b29cf023f5c9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "modelseedpy.core.msgenomeclassifier.MSGenomeClassifier" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(classifier)" ] }, { "cell_type": "markdown", "id": "7e282aa9-f1b2-4a37-bf1e-25e29ef97480", "metadata": {}, "source": [ "#### Get a Genome and Annotate with RAST\n", "RAST annotation is essential since the classifier was trained with RAST annotated functions" ] }, { "cell_type": "code", "execution_count": 5, "id": "f4127633-a524-446b-9e8f-983ccf367035", "metadata": {}, "outputs": [], "source": [ "# Load e. coli genome\n", "genome = modelseedpy.MSGenome.from_fasta('GCF_000005845.2_ASM584v2_protein.faa', split=' ')" ] }, { "cell_type": "code", "execution_count": 6, "id": "73e8708a-ff54-4a6b-b777-adccfecc0192", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'id': '23AFF380-F4F9-11EB-BBBA-BBE5BBF382BD',\n", " 'parameters': ['-a',\n", " '-g',\n", " 200,\n", " '-m',\n", " 5,\n", " '-d',\n", " '/opt/patric-common/data/kmer_metadata_v2',\n", " '-u',\n", " 'http://pear.mcs.anl.gov:6100/query'],\n", " 'hostname': 'pear',\n", " 'tool_name': 'kmer_search',\n", " 'execution_time': 1628063644.76991},\n", " {'execution_time': 1628063644.90382,\n", " 'tool_name': 'KmerAnnotationByFigfam',\n", " 'hostname': 'pear',\n", " 'id': '23C46324-F4F9-11EB-BBBA-BBE5BBF382BD',\n", " 'parameters': ['annotate_hypothetical_only=1',\n", " 'dataset_name=Release70',\n", " 'kmer_size=8']},\n", " {'parameters': [],\n", " 'id': '23F64B78-F4F9-11EB-908D-F73BBDF382BD',\n", " 'tool_name': 'annotate_proteins_similarity',\n", " 'hostname': 'pear',\n", " 'execute_time': 1628063645.23091}]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "modelseedpy.RastClient().annotate_genome(genome)" ] }, { "cell_type": "markdown", "id": "6aa0fe52-eb01-4bee-9fd1-c5125c1e5f5e", "metadata": {}, "source": [ "#### Run classifier\n", "- A: Archaea\n", "- C: Cyanobacteria\n", "- N: Gram Negative\n", "- P: Gram Positive" ] }, { "cell_type": "code", "execution_count": null, "id": "ab9ae991-c4d7-494d-8cd5-f6923c534c8a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'N'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "classifier.classify(genome)" ] }, { "cell_type": "code", "execution_count": null, "id": "ba6b6bf2-936b-4e84-a90f-117d2c294942", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }