{ "cells": [ { "cell_type": "markdown", "id": "7f53fcb3-4b49-4d57-a2e5-afa555378ce0", "metadata": {}, "source": [ "# Reading ModelSEED Biochemistry from ModelSEED Database\n", "\n", "This notebook provides examples to read data from the biochemistry database" ] }, { "cell_type": "markdown", "id": "74246002-2a2e-405c-8aa0-2ed382c752cc", "metadata": {}, "source": [ "The biochemistry database is represented by the `modelseedpy.biochem.modelseed_biochem.ModelSEEDDatabase` class\n", "\n", "The database can be instantiated from either a local github copy of the database repository or loaded via web url from github.\n", "* `from_local` - from_local(*path to repository*)\n", " * Example:\n", " 1. first we obtain a copy of the repository: `git clone https://github.com/ModelSEED/ModelSEEDDatabase.git`\n", " 2. then we can load the repository with `from_local`: `from_local('/home/user/ModelSEEDDatabase')`\n", "* `from_github` - from_github(*commit version*)\n", " * Example:\n", " * load the dev branch from github (default: https://github.com/ModelSEED/ModelSEEDDatabase): `from_github('dev')`\n", " * load a specific commit version from github: `from_github('194ac8afe48f8a606c0dd07ba3c7af10c02ba2fd')`\n", " * load from another fork/repository: `from_github('master', 'https://raw.githubusercontent.com/Fxe/ModelSEEDDatabase')`" ] }, { "cell_type": "code", "execution_count": 1, "id": "ac3992a9-d5b3-48d7-b744-0dd08a32df27", "metadata": {}, "outputs": [], "source": [ "import logging\n", "import pandas as pd\n", "from modelseedpy.biochem.modelseed_biochem import get_structures_from_df, get_aliases_from_df, get_names_from_df, process_aliases, load_metabolites_from_df\n", "from modelseedpy.biochem.modelseed_biochem import ALIAS_RXN_IDENTIFIERS_ORG, ALIAS_MODELS\n", "from modelseedpy.biochem.modelseed_biochem import from_local2, from_local, from_github\n", "from modelseedpy.biochem.modelseed_reaction import ModelSEEDReaction, ModelSEEDReaction2\n", "logger = logging.getLogger(__name__)" ] }, { "cell_type": "code", "execution_count": 2, "id": "65ecaae2-d062-4221-b567-dddd4eec15bd", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 16.8 s, sys: 668 ms, total: 17.5 s\n", "Wall time: 17.5 s\n" ] } ], "source": [ "%%time\n", "modelseed_local2 = from_local2('/home/fliu/workspace/python/ModelSEEDDatabase/')" ] }, { "cell_type": "code", "execution_count": 3, "id": "cc04b352-3890-4bad-8a16-c9a05e401be2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 42.2 s, sys: 503 ms, total: 42.7 s\n", "Wall time: 42.7 s\n" ] } ], "source": [ "%%time\n", "modelseed_local1 = from_local('/home/fliu/workspace/python/ModelSEEDDatabase/')" ] }, { "cell_type": "code", "execution_count": 4, "id": "43934d69-bbd0-4cab-ab7f-a6e4296a7b56", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 18.8 s, sys: 1.12 s, total: 19.9 s\n", "Wall time: 24.3 s\n" ] } ], "source": [ "%%time\n", "modelseed_git = from_github('dev')" ] }, { "cell_type": "code", "execution_count": 5, "id": "7e4ce949-7c34-408e-976b-77d7eaf0993f", "metadata": {}, "outputs": [], "source": [ "inchi_key_lookup = {}\n", "for cpd in modelseed_git.compounds:\n", " inchi_key = cpd.inchi_key\n", " inchi_key = None if pd.isna(inchi_key) or len(inchi_key) == 0 else inchi_key\n", " cpd.inchi_key = inchi_key\n", " if cpd.inchi_key:\n", " a, b, p = cpd.inchi_key.split('-')\n", " if a not in inchi_key_lookup:\n", " inchi_key_lookup[a] = {}\n", " if b not in inchi_key_lookup[a]:\n", " inchi_key_lookup[a][b] = set()\n", " inchi_key_lookup[a][b].add((cpd.id, p))" ] }, { "cell_type": "code", "execution_count": 6, "id": "27d973b2-67af-43a0-b418-737843aeb038", "metadata": {}, "outputs": [], "source": [ "metabolite_reactions = {}\n", "for rxn in modelseed_git.reactions:\n", " if not rxn.is_obsolete:\n", " for m in rxn.metabolites:\n", " if m.seed_id not in metabolite_reactions:\n", " metabolite_reactions[m.seed_id] = set()\n", " metabolite_reactions[m.seed_id].add(rxn.id)" ] }, { "cell_type": "code", "execution_count": 14, "id": "28c90f2a-3f21-4616-8c69-2c746be512ec", "metadata": {}, "outputs": [], "source": [ "import cobrakbase\n", "kbase = cobrakbase.KBaseAPI()\n", "genome = kbase.get_from_ws('87388/3/1')" ] }, { "cell_type": "code", "execution_count": 38, "id": "87e117e3-762c-4840-a40b-66714ef675f5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'aliases': [['protein_id', 'WP_066378293.1'], ['locus_tag', 'A3776_RS08395']],\n", " 'cdss': ['A3776_RS08395_CDS_1'],\n", " 'dna_sequence': 'ATGATTGTACCCTTGCTATATCTGGCTTTAGCCGGAGCCTATCTATTAGTTGTCCCGGTGGCTTTAATGCTTTACCTCAACCTACGCTGGTATACGGCTGGCTCGATTGAGCGCACCGTCATGTATTTTTTTGTATTTTTGTTCTTTCCCGGACTGTTGGTTTTGTCGCCGTTTGTGAATCTGCGACCCAAACCCCGCAAAATTGAAGTTTAA',\n", " 'dna_sequence_length': 213,\n", " 'functions': ['NADH dehydrogenase subunit', 'NdhL'],\n", " 'id': 'A3776_RS08395',\n", " 'location': [['NZ_LUHI01000036.1', 21434, '-', 213]],\n", " 'md5': '1ae8b7b3d705b63184c99a9711785d23',\n", " 'protein_md5': '1ae8b7b3d705b63184c99a9711785d23',\n", " 'protein_translation': 'MIVPLLYLALAGAYLLVVPVALMLYLNLRWYTAGSIERTVMYFFVFLFFPGLLVLSPFVNLRPKPRKIEV',\n", " 'protein_translation_length': 70,\n", " 'quality': {'hit_count': 5, 'weighted_hit_count': 5.5205}}" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "genome.features.get_by_id('A3776_RS08395').data" ] }, { "cell_type": "code", "execution_count": 221, "id": "d950807e-d2b1-423e-be71-7f54a5a35930", "metadata": {}, "outputs": [], "source": [ "import json\n", "data = None\n", "with open('/home/fliu/workspace/data/sbml/iAnC892/iAnC892_single.json', 'r') as fh:\n", " data = json.load(fh)" ] }, { "cell_type": "code", "execution_count": 222, "id": "80409e3f-e672-47bc-b275-715f79276bfb", "metadata": {}, "outputs": [], "source": [ "%run /home/fliu/workspace/data/sbml/iAnC892/tools.py\n", "integrate_to_seed(data)" ] }, { "cell_type": "code", "execution_count": 161, "id": "d644b65e-3955-4da4-b346-3a3e4d0cd9ae", "metadata": {}, "outputs": [], "source": [ "with open('/home/fliu/workspace/data/sbml/iAnC892/iAnC892_single_mapped.json', 'w') as fh:\n", " fh.write(json.dumps(data, indent=4, sort_keys=True))" ] }, { "cell_type": "code", "execution_count": 162, "id": "df7e8efe-df5c-48a5-b240-48b7f51996ec", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 163, "id": "b9906460-1807-45ce-9b8b-65f4a910fce2", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 223, "id": "e64a4938-9fa5-496d-9658-633b88c0ae56", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 224, "id": "93c0dcbd-fdb0-41f7-8e70-f02668fe4108", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "
| Metabolite identifier | cpd00011_p | \n", "
| Name | CO2 | \n", "
| Memory address | \n", "0x07fcba66f68e0 | \n", "
| Formula | CO2 | \n", "
| Compartment | p | \n", "
| In 2 reaction(s) | \n", " cpd00011exccc_p, cpd00011excpp_e | \n", "
1.0 obj_ATP = 0.8035714285714285
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|---|---|---|---|
| cpd11632_e | \n", "EX_cpd11632_e | \n", "0.5 | \n", "0 | \n", "0.00% | \n", "
| hvphoton1_e | \n", "EX_hvphoton1_e | \n", "0.5 | \n", "0 | \n", "0.00% | \n", "
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|
1.0 obj_ATP = 0.0
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|
| Reaction identifier | CBFCu_k | \n", "
| Name | cytochrome b6/f complex | \n", "
| Memory address | \n", "0x07fcbbf9f9f70 | \n", "
| Stoichiometry | \n", "\n",
" 2.0 cpd00067_c + 2.0 cpd12265_k + cpd16503_k --> 4.0 cpd00067_k + cpd07274_k + 2.0 cpd12239_k \n", "2.0 H+ + 2.0 Oxidized plastocyanin + Plastoquinol-9 --> 4.0 H+ + Plastoquinone A + 2.0 Reduced plastocyanin \n", " | \n",
"
| GPR | A3776_RS13955 or A3776_RS22750 | \n", "
| Lower bound | 0.0 | \n", "
| Upper bound | 1000.0 | \n", "
| Reaction identifier | rxn46370_k | \n", "
| Name | nan | \n", "
| Memory address | \n", "0x07fcbbc01f790 | \n", "
| Stoichiometry | \n", "\n",
" cpd00007_k + 4.0 cpd00067_k + 2.0 cpd16503_k <=> 2.0 cpd00001_k + 4.0 cpd00067_c + 2.0 cpd07274_k + cpd11632_k \n", "O2 + 4.0 H+ + 2.0 Plastoquinol-9 <=> 2.0 H2O + 4.0 H+ + 2.0 Plastoquinone A + hn \n", " | \n",
"
| GPR | \n", " |
| Lower bound | -1000 | \n", "
| Upper bound | 1000 | \n", "
| Reaction identifier | rxn05937 | \n", "
| Name | Ferredoxin:NADP+ oxidoreductase | \n", "
| Memory address | \n", "0x07fcbbd112c70 | \n", "
| Stoichiometry | \n", "\n",
" cpd00006_0 + cpd00067_0 + cpd11620_0 <=> cpd00005_0 + cpd11621_0 \n", "NADP + H+ + Reducedferredoxin <=> NADPH + Oxidizedferredoxin \n", " | \n",
"
| GPR | \n", " |
| Lower bound | -1000 | \n", "
| Upper bound | 1000 | \n", "
| Reaction identifier | ATP_syn_l__hc | \n", "
| Name | ATP synthetase(u) | \n", "
| Memory address | \n", "0x07fcb9a4b07f0 | \n", "
| Stoichiometry | \n", "\n",
" 3.0 cpd00008[hc] + 3.0 cpd00009[hc] + 14.0 cpd00067[hl] --> 3.0 cpd00001[hc] + 3.0 cpd00002[hc] + 11.0 cpd00067[hc] \n", "3.0 ADP + 3.0 Phosphate + 14.0 H+ --> 3.0 H2O + 3.0 ATP + 11.0 H+ \n", " | \n",
"
| GPR | ( A3776_RS10860 or A3776_RS13440 ) and ( A3776_RS10820 or A3776_RS10980 ) and ( A3776_RS10975 or... | \n", "
| Lower bound | 0.0 | \n", "
| Upper bound | 1000.0 | \n", "
| Name | \n", "M_iAnC892 | \n", "
| Memory address | \n", "0x07fcb9861eee0 | \n", "
| Number of metabolites | \n", "1811 | \n", "
| Number of reactions | \n", "1848 | \n", "
| Number of groups | \n", "0 | \n", "
| Objective expression | \n", "1.0*biomass_eq_33047_c0 - 1.0*biomass_eq_33047_c0_reverse_4b9c5 | \n", "
| Compartments | \n", "Vegetative cytoplasm, Vegetative lumen, Vegetative periplasm, Extracellular space, Vegetative carboxysome, Heterocyst cytoplasm, Heterocyst lumen, Heterocyst periplasm, Pseudo compartment | \n", "
| Reaction identifier | cpd00011excpp_c1 | \n", "
| Name | cpd00011excpp | \n", "
| Memory address | \n", "0x07fcb94191fd0 | \n", "
| Stoichiometry | \n", "\n",
" cpd00011_p1 <=> cpd00011_e0 \n", "CO2 <=> CO2 \n", " | \n",
"
| GPR | \n", " |
| Lower bound | -1000.0 | \n", "
| Upper bound | 1000.0 | \n", "
1.0 biomass_eq_33047__vc = 19.82460353465784
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|---|---|---|---|
| hvphoton1[e] | \n", "EX_PHO1 | \n", "100 | \n", "0 | \n", "0.00% | \n", "
| cpd00007[e] | \n", "EX_cpd00007[e] | \n", "325 | \n", "0 | \n", "0.00% | \n", "
| cpd00009[e] | \n", "EX_cpd00009[e] | \n", "7.601 | \n", "0 | \n", "0.00% | \n", "
| cpd00013[e] | \n", "EX_cpd00013[e] | \n", "74.52 | \n", "0 | \n", "0.00% | \n", "
| cpd00027[e] | \n", "EX_cpd00027[e] | \n", "164.6 | \n", "6 | \n", "81.61% | \n", "
| cpd00034[e] | \n", "EX_cpd00034[e] | \n", "0.0587 | \n", "0 | \n", "0.00% | \n", "
| cpd00048[e] | \n", "EX_cpd00048[e] | \n", "1.315 | \n", "0 | \n", "0.00% | \n", "
| cpd00053[e] | \n", "EX_cpd00053[e] | \n", "16.06 | \n", "5 | \n", "6.63% | \n", "
| cpd00058[e] | \n", "EX_cpd00058[e] | \n", "0.0587 | \n", "0 | \n", "0.00% | \n", "
| cpd00060[e] | \n", "EX_cpd00060[e] | \n", "5.523 | \n", "5 | \n", "2.28% | \n", "
| cpd00063[e] | \n", "EX_cpd00063[e] | \n", "0.08795 | \n", "0 | \n", "0.00% | \n", "
| cpd00067[e] | \n", "EX_cpd00067[e] | \n", "1000 | \n", "0 | \n", "0.00% | \n", "
| cpd00107[e] | \n", "EX_cpd00107[e] | \n", "3.375 | \n", "6 | \n", "1.67% | \n", "
| cpd00118[e] | \n", "EX_cpd00118[e] | \n", "0.62 | \n", "4 | \n", "0.20% | \n", "
| cpd00129[e] | \n", "EX_cpd00129[e] | \n", "5.886 | \n", "5 | \n", "2.43% | \n", "
| cpd00149[e] | \n", "EX_cpd00149[e] | \n", "0.06303 | \n", "0 | \n", "0.00% | \n", "
| cpd00156[e] | \n", "EX_cpd00156[e] | \n", "3.8 | \n", "5 | \n", "1.57% | \n", "
| cpd00205[e] | \n", "EX_cpd00205[e] | \n", "3.301 | \n", "0 | \n", "0.00% | \n", "
| cpd00254[e] | \n", "EX_cpd00254[e] | \n", "0.5573 | \n", "0 | \n", "0.00% | \n", "
| cpd00264[e] | \n", "EX_cpd00264[e] | \n", "0.1261 | \n", "7 | \n", "0.07% | \n", "
| cpd00322[e] | \n", "EX_cpd00322[e] | \n", "7.114 | \n", "6 | \n", "3.53% | \n", "
| cpd00971[e] | \n", "EX_cpd00971[e] | \n", "0.07342 | \n", "0 | \n", "0.00% | \n", "
| cpd10515[e] | \n", "EX_cpd10515[e] | \n", "0.2772 | \n", "0 | \n", "0.00% | \n", "
| cpd11574[e] | \n", "EX_cpd11574[e] | \n", "0.0587 | \n", "0 | \n", "0.00% | \n", "
| cpd20863[e] | \n", "EX_cpd20863[e] | \n", "0.0587 | \n", "0 | \n", "0.00% | \n", "
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|---|---|---|---|
| cpd15380[hc] | \n", "DM_cpd15380__hc | \n", "-0.0007828 | \n", "5 | \n", "0.00% | \n", "
| cpd15380[vc] | \n", "DM_cpd15380__vc | \n", "-0.004324 | \n", "5 | \n", "0.01% | \n", "
| dialurate[hc] | \n", "DM_dialurate__hc | \n", "-3.979E-06 | \n", "4 | \n", "0.00% | \n", "
| dialurate[vc] | \n", "DM_dialurate__vc | \n", "-0.004324 | \n", "4 | \n", "0.00% | \n", "
| cpd00001[e] | \n", "EX_cpd00001[e] | \n", "-687 | \n", "0 | \n", "0.00% | \n", "
| cpd00011[e] | \n", "EX_cpd00011[e] | \n", "-372 | \n", "1 | \n", "99.99% | \n", "
1.0 biomass_eq_33047__vc = 0.18240098144907063
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|---|---|---|---|
| hvphoton1[e] | \n", "EX_PHO1 | \n", "61.35 | \n", "0 | \n", "0.00% | \n", "
| hvphoton2[e] | \n", "EX_PHO2 | \n", "51.03 | \n", "0 | \n", "0.00% | \n", "
| cpd00009[e] | \n", "EX_cpd00009[e] | \n", "0.07813 | \n", "0 | \n", "0.00% | \n", "
| cpd00034[e] | \n", "EX_cpd00034[e] | \n", "0.0005935 | \n", "0 | \n", "0.00% | \n", "
| cpd00048[e] | \n", "EX_cpd00048[e] | \n", "0.07041 | \n", "0 | \n", "0.00% | \n", "
| cpd00058[e] | \n", "EX_cpd00058[e] | \n", "0.0005935 | \n", "0 | \n", "0.00% | \n", "
| cpd00063[e] | \n", "EX_cpd00063[e] | \n", "0.0008893 | \n", "0 | \n", "0.00% | \n", "
| cpd00067[e] | \n", "EX_cpd00067[e] | \n", "19.48 | \n", "0 | \n", "0.00% | \n", "
| cpd00149[e] | \n", "EX_cpd00149[e] | \n", "0.0006373 | \n", "0 | \n", "0.00% | \n", "
| cpd00205[e] | \n", "EX_cpd00205[e] | \n", "0.03338 | \n", "0 | \n", "0.00% | \n", "
| cpd00242[e] | \n", "EX_cpd00242[e] | \n", "8.486 | \n", "1 | \n", "100.00% | \n", "
| cpd00254[e] | \n", "EX_cpd00254[e] | \n", "0.005635 | \n", "0 | \n", "0.00% | \n", "
| cpd00528[e] | \n", "EX_cpd00528[e] | \n", "0.6772 | \n", "0 | \n", "0.00% | \n", "
| cpd00971[e] | \n", "EX_cpd00971[e] | \n", "0.0007424 | \n", "0 | \n", "0.00% | \n", "
| cpd10515[e] | \n", "EX_cpd10515[e] | \n", "0.002803 | \n", "0 | \n", "0.00% | \n", "
| cpd11574[e] | \n", "EX_cpd11574[e] | \n", "0.0005935 | \n", "0 | \n", "0.00% | \n", "
| cpd20863[e] | \n", "EX_cpd20863[e] | \n", "0.0005935 | \n", "0 | \n", "0.00% | \n", "
| Metabolite | \n", "Reaction | \n", "Flux | \n", "C-Number | \n", "C-Flux | \n", "
|---|---|---|---|---|
| cpd02791[hc] | \n", "DM_cpd02791__hc | \n", "-0.0001159 | \n", "6 | \n", "8.65% | \n", "
| cpd02791[vc] | \n", "DM_cpd02791__vc | \n", "-0.001159 | \n", "6 | \n", "86.46% | \n", "
| cpd15380[hc] | \n", "DM_cpd15380__hc | \n", "-3.979E-06 | \n", "5 | \n", "0.25% | \n", "
| cpd15380[vc] | \n", "DM_cpd15380__vc | \n", "-3.979E-05 | \n", "5 | \n", "2.47% | \n", "
| dialurate[hc] | \n", "DM_dialurate__hc | \n", "-3.979E-06 | \n", "4 | \n", "0.20% | \n", "
| dialurate[vc] | \n", "DM_dialurate__vc | \n", "-3.979E-05 | \n", "4 | \n", "1.98% | \n", "
| cpd00001[e] | \n", "EX_cpd00001[e] | \n", "-2.027 | \n", "0 | \n", "0.00% | \n", "
| cpd00007[e] | \n", "EX_cpd00007[e] | \n", "-10.28 | \n", "0 | \n", "0.00% | \n", "