pax_global_header00006660000000000000000000000064133464520020014511gustar00rootroot0000000000000052 comment=b24479bc7fda84a4c421fa8a21a839c4267a30c8 pybel-0.12.1/000077500000000000000000000000001334645200200127055ustar00rootroot00000000000000pybel-0.12.1/.appveyor.yml000066400000000000000000000025321334645200200153550ustar00rootroot00000000000000environment: # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd" TOX_ENV: "pywin" matrix: - PYTHON: "C:\\Python27" PYTHON_VERSION: "2.7.8" PYTHON_ARCH: "32" - PYTHON: "C:\\Python27-x64" PYTHON_VERSION: "2.7.8" PYTHON_ARCH: "64" install: # Download setup scripts and unzip - ps: "wget https://github.com/cloudify-cosmo/appveyor-utils/archive/master.zip -OutFile ./master.zip" - "7z e master.zip */appveyor/* -oappveyor" # Install Python (from the official .msi of http://python.org) and pip when # not already installed. - "powershell ./appveyor/install.ps1" # Prepend newly installed Python to the PATH of this build (this cannot be # done from inside the powershell script as it would require to restart # the parent CMD process). - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" # Check that we have the expected version and architecture for Python - "python --version" - "python -c \"import struct; print(struct.calcsize('P') * 8)\"" build: false # Not a C# project, build stuff at the test step instead. before_test: - "%CMD_IN_ENV% pip install tox" test_script: - "%CMD_IN_ENV% tox" pybel-0.12.1/.bumpversion.cfg000066400000000000000000000015601334645200200160170ustar00rootroot00000000000000[bumpversion] current_version = 0.12.1 commit = True tag = False parse = (?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))? serialize = {major}.{minor}.{patch}-{release}+{build} {major}.{minor}.{patch}+{build} {major}.{minor}.{patch}-{release} {major}.{minor}.{patch} [bumpversion:part:release] optional_value = production first_value = dev values = dev production [bumpverion:part:build] values = [0-9A-Za-z-]+ [bumpversion:file:src/pybel/__init__.py] search = __version__ = '{current_version}' replace = __version__ = '{new_version}' [bumpversion:file:docs/source/conf.py] search = release = '{current_version}' replace = release = '{new_version}' [bumpversion:file:src/pybel/constants.py] search = VERSION = '{current_version}' replace = VERSION = '{new_version}' pybel-0.12.1/.codeclimate.yml000066400000000000000000000003371334645200200157620ustar00rootroot00000000000000engines: csslint: enabled: true duplication: enabled: true config: languages: - python fixme: enabled: true radon: enabled: true ratings: paths: - "**.py" exclude_paths: - tests/ pybel-0.12.1/.codecov.yml000066400000000000000000000002521334645200200151270ustar00rootroot00000000000000codecov: branch: master coverage: precision: 2 round: down range: "60...100" ignore: - "tests/*" - "src/pybel/cli.py" - "src/pybel/__main__.py" pybel-0.12.1/.coveragerc000066400000000000000000000006601334645200200150300ustar00rootroot00000000000000[run] branch = True source = pybel omit = src/pybel/__main__.py src/pybel/cli.py src/pybel/io/indra.py src/pybel/io/web.py src/pybel/resources/arty.py src/pybel/resources/deploy.py src/pybel/resources/defaults.py tests/* docs/* scripts/* [paths] source = src/pybel .tox/*/lib/python*/site-packages/pybel [report] show_missing = True exclude_lines = def __str__ def __repr__ pybel-0.12.1/.flake8000066400000000000000000000010661334645200200140630ustar00rootroot00000000000000######################### # Flake8 Configuration # # (.flake8) # # (formerly in tox.ini) # ######################### [flake8] ignore = E501,F401,F403,D105 exclude = .tox, .git, __pycache__, docs/source/conf.py, src/pybel/cli.py build, dist, tests/fixtures/*, *.pyc, *.egg-info, .cache, .eggs max-complexity = 10 import-order-style = pycharm application-import-names = pybel format = ${cyan}%(path)s${reset}:${yellow_bold}%(row)d${reset}:${green_bold}%(col)d${reset}: ${red_bold}%(code)s${reset} %(text)s pybel-0.12.1/.gitignore000066400000000000000000000033351334645200200147010ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # PyCharm project settings .idea/* *.pickle *.gpickle scratch scratch/* .pytest_cache ### OSX ### # General .DS_Store .AppleDouble .LSOverride # Icon must end with two \r Icon # Thumbnails ._* # Files that might appear in the root of a volume .DocumentRevisions-V100 .fseventsd .Spotlight-V100 .TemporaryItems .Trashes .VolumeIcon.icns .com.apple.timemachine.donotpresent # Directories potentially created on remote AFP share .AppleDB .AppleDesktop Network Trash Folder Temporary Items .apdisk ### Vim ### # Swap [._]*.s[a-v][a-z] [._]*.sw[a-p] [._]s[a-rt-v][a-z] [._]ss[a-gi-z] [._]sw[a-p] # Session Session.vim # Temporary .netrwhist *~ # Auto-generated tag files tags # Persistent undo [._]*.un~ pybel-0.12.1/.readthedocs.yml000066400000000000000000000000771334645200200157770ustar00rootroot00000000000000requirements_file: requirements-rtfd.txt python: version: 3 pybel-0.12.1/.travis.yml000066400000000000000000000053721334645200200150250ustar00rootroot00000000000000sudo: false cache: pip services: - mysql - postgresql language: python python: - 3.6 - 2.7 stages: - lint - docs - test env: - TOXENV=py DB=mysql PYBEL_TEST_CONNECTOR=mysqlclient PYBEL_TEST_CONNECTION=mysql+mysqldb://travis@localhost/test?charset=utf8 - TOXENV=py DB=postgresql PYBEL_TEST_CONNECTOR=psycopg2 PYBEL_TEST_CONNECTION=postgresql+psycopg2://travis@localhost/tests - TOXENV=py DB=sqlite jobs: include: # lint stage - stage: lint env: TOXENV=manifest - env: TOXENV=flake8 - env: TOXENV=pylint - env: TOXENV=xenon - env: TOXENV=vulture - env: TOXENV=pyroma # docs stage - stage: docs env: TOXENV=doc8 - env: TOXENV=readme - env: TOXENV=docs matrix: allow_failures: - env: TOXENV=flake8 - env: TOXENV=pylint - env: TOXENV=xenon - env: TOXENV=vulture - env: TOXENV=pyroma install: - sh -c 'if [ "$TOXENV" = "py" ]; then pip install tox codecov; else pip install tox; fi' before_script: - sh -c "if [ '$DB' = 'postgresql' ]; then psql -c 'DROP DATABASE IF EXISTS tests;' -U postgres; fi" - sh -c "if [ '$DB' = 'postgresql' ]; then psql -c 'DROP DATABASE IF EXISTS tests_tmp;' -U postgres; fi" - sh -c "if [ '$DB' = 'postgresql' ]; then psql -c 'CREATE DATABASE tests;' -U postgres; fi" - sh -c "if [ '$DB' = 'postgresql' ]; then psql -c 'CREATE DATABASE tests_tmp;' -U postgres; fi" - sh -c "if [ '$DB' = 'mysql' ]; then mysql -e 'DROP DATABASE IF EXISTS test;'; fi" - sh -c "if [ '$DB' = 'mysql' ]; then mysql -e 'CREATE DATABASE test CHARACTER SET utf8 COLLATE utf8_general_ci;'; fi" - sh -c "if [ '$DB' = 'mysql' ]; then mysql -e \"GRANT ALL PRIVILEGES ON test.* to 'travis'@'%' WITH GRANT OPTION;\"; fi" script: - tox after_success: - sh -c 'if [ "$TOXENV" = "py" ]; then tox -e coverage-report; codecov; fi' notifications: slack: pybel:n2KbWKBum3musnBg3L76gGwq deploy: - provider: pypi user: cthoyt password: secure: WK8SA0Vtdn9a4RMdpn8L0CFbHs/3a+mOJrwYLaL4kzzZNWjL3yt0ygnhY5138pQoy5ygQ/bLT/COJIE9cN0qP0uUyBMgerUPWozkXryoz79zsQxe2HXHkG/XMhxuNjvPwRKjw9+tmz/wWoCeMFQ0JczNvOg2DYFcT4ruqLY5muHBuMErVB5pwjGSpYmDAEpI30x6KEJTXz5VIDxRpLhI6uehpItBW195Yj5GRAWllnGTqn9zRTeZR7SME1/p2Af1RVOlam4Ur/3hSXYDMUqErNj/vrxvxgpO9lDakfpSUV0U7pETgwQ+g8mp0fgm8I643jfu4Zoj8lITf9ewl9CteiaC80gV4Nj9JA9mdGiiaZwmpkEPUiMq6YLaqef6QDJEL2kx1q40+cOOsnTWe3PjnmRx9oAovQR9koqz1kztr3F++D3H65s6Y+LF46mmAlQs5mQ2sjGGGzd8agBcmoBzAG/Y/oTpkLbZ1Xq1qMSTQML7b0AHgbkWsy0QYUm56pX35uMrcl+r5BezJ5dAG+p3FDfEurD9iR9I0pUoWcE3zZldA1EFNsX6tnrLUwXGrAX7YGT7ul9DgHnD/h8hsS2qcrktTgzWIBIEZSpXiWUoM7Wy0TMbBq+iU+83lhCfQqnOb8fiMaH09v2neMmmsaazIIdF9JIMS9Caq5x7Q/iujeI= distributions: sdist on: tags: true repo: pybel/pybel branch: master pybel-0.12.1/AUTHORS.rst000066400000000000000000000007701334645200200145700ustar00rootroot00000000000000Authors ======= The following have contributed to the development, maintenance, and testing of PyBEL. Maintainer ---------- `Charles Tapley Hoyt `_ Code Contributors ----------------- - `Andrej Konotopez `_ - `Scott Colby `_ - `Daniel Domingo-Fernández `_ - `Ben Gyori `_ Other Contributors ------------------ - `Christian Ebeling `_ pybel-0.12.1/CHANGELOG.rst000066400000000000000000001050561334645200200147350ustar00rootroot00000000000000Change Log ========== All notable changes to this project will be documented in this file. The format is based on `Keep a Changelog `_ and this project adheres to `Semantic Versioning `_ Unreleased ---------- `0.12.1 `_ - 2018-09-13 -------------------------------------------------------------------------------- Fixed ~~~~~ - Wrong names in CLI - Add missing star import for pybel.dsl.ListAbundance Changed ~~~~~~~ - Update iteration over BEL files to read in one pass Added ~~~~~ - More summary functions in pybel.struct `0.12.0 `_ - 2018-09-06 --------------------------------------------------------------------------------- Changed ~~~~~~~ - Update edge hashing algorithm (this invalidates old hashes) - Edge hashes are now used as keys instead of being put inside edge data dictionaries - Improved graph operations with new location of edge hashes - Update Node/Link JSON schema - Improve __contains__ and has_node functions to handle DSL objects - Require usage of DSL when creating BELGraph instances - Use DSL completely in ORM - Add SHA512 to authors to avoid issues with MySQL's collation Removed ~~~~~~~ - Remove ``pybel.tokens.node_to_tuple`` function and ``pybel.tokens.node_to_bel`` functions - All tuple-related functions in the DSL (AKA the tupleectomy) `0.11.11 `_ - 2018-07-31 ----------------------------------------------------------------------------------- Added ~~~~~ - Automatic generation of CLI documentation with ``sphinx-click`` - Several edge creation convenience functions to the ``BELGraph`` - Graph summary functions Changed ~~~~~~~ - Improve Drop networks (Thanks @scolby33) (https://github.com/pybel/pybel/pull/319) - Huge improvements to documentation and code style reccomended by flake8 Fixed ~~~~~ - Fixed handling of tuples (64d0685) Removed ~~~~~~~ - Remove function ``BELGraph.iter_data`` `0.11.10 `_ - 2018-07-23 ---------------------------------------------------------------------------------- Added ~~~~~ - Several subgraph functions (https://github.com/pybel/pybel/pull/315) Changed ~~~~~~~ - Better SQL implementation of get_recent_networks (https://github.com/pybel/pybel/pull/312) `0.11.9 `_ - 2018-07-?? -------------------------------------------------------------------------------- Removed ~~~~~~~ - Removed CX and NDEx IO in favor of https://github.com/pybel/pybel-cx Changed ~~~~~~~ - Better (less annoying) logging for deprecated transformations - Turn off SQL echoing by default - Update getting annotation entries - Better options for using TQDM while parsing Added ~~~~~ - Flag to INDRA machine to run locally - Add require annotations option to parser (https://github.com/pybel/pybel/issues/255) - Data missing key node predicate builder `0.11.8 `_ - 2018-06-27 -------------------------------------------------------------------------------- Added ~~~~~ - Deprecation system for pipeline functions (for when they're renamed) Changed ~~~~~~~ - Rely on edge predicates more heavily in selection/induction/expansion transformations - Rename several functions related to the "central dogma" for more clarity `0.11.7 `_ - 2018-06-26 -------------------------------------------------------------------------------- Fixed ~~~~~ - Bug where data did not get copied to sub-graphs on induction (https://github.com/pybel/pybel/issues/#307) `0.11.6 `_ - 2018-06-25 -------------------------------------------------------------------------------- Added ~~~~~ - Added get_annotation_values function to pybel.struct.summary Removed ~~~~~~~ - Removed Manager.ensure function Fixed ~~~~~ - Fixed a bug in Manager.from_connection (https://github.com/pybel/pybel/issues/#306) `0.11.5 `_ - 2018-06-22 -------------------------------------------------------------------------------- Changed ~~~~~~~ - Changed arguments in pybel.struct.mutations.get_subgraphs_by_annotation - Moved utility functions in pybel.struct.mutations `0.11.4 `_ - 2018-06-22 -------------------------------------------------------------------------------- Changed ~~~~~~~ - Use BELGraph.fresh_copy instead of importing the class in mutator functions Added ~~~~~ - Add pipeline (https://github.com/pybel/pybel/issues/301) - Testing of neighborhood functions - Added several transformation and grouping functions for BELGraph - INDRA Machine in CLI Fixed ~~~~~ - Add missing field from BaseAbundance (https://github.com/pybel/pybel/issues/302) `0.11.3 `_ - 2018-06-04 -------------------------------------------------------------------------------- Added ~~~~~ - Made testing code and date install as part of main package(https://github.com/pybel/pybel/pull/298) Removed ~~~~~~~ - Remove extension hook and extension loader (https://github.com/pybel/pybel/pull/300) `0.11.2 `_ - 2018-05-10 -------------------------------------------------------------------------------- Added ~~~~~ - Calculation of SHA512 hash to DSL abundances - Documented the deployment extra for setup.py - Added to and from JSON path IO functions - PMI Contact for CBN import and more default namespaces - Added common query builders to SQLAlchemy models Fixed ~~~~~ - Fixed name/version lookup in the database - Safer creation of directories (https://github.com/pybel/pybel/issues/#284) - Make export to GraphML more boring and permissive - Implement to_tuple for CentralDogma (https://github.com/pybel/pybel/issues/#281) - Unicode compatibility error. Thanks @bgyori! (https://github.com/pybel/pybel/pull/289) Changed ~~~~~~~ - Made parsing of fragments permissive to quoting (https://github.com/pybel/pybel/issues/#282) - Update citation handling - Update namespace methods in CLI - Added ``as_bel`` method to DSL - Update authentication with BEL Commons (https://github.com/pybel/pybel/commit/4f6b8b0ecab411e1d2b110e00c8bac77ace88308) - Unpin SQLAlchemy version. Most up-to-date should remain safe. Removed ~~~~~~~ - Removed static function ``pybel.BELGraph.hash_node`` since it just wraps ``pybel.utils.node_to_tuple`` - Removed unnecessary configuration editing from CLI - Removed OWL Parser (https://github.com/pybel/pybel/issues/290) - Removed support for BELEQ files (https://github.com/pybel/pybel/issues/294) - Remove artifactory code and migrated to https://github.com/pybel/pybel-artifactory. (https://github.com/pybel/pybel/issues/292) `0.11.1 `_ - 2018-02-19 -------------------------------------------------------------------------------- Added ~~~~~ - Added additional DSL shortcuts for building edges with the BELGraph - Added example graphs (statins, BRAF, orthology examples) - Added knowledge transfer function - Added progress bar for parser `0.11.0 `_ - 2018-02-07 -------------------------------------------------------------------------------- Changed ~~~~~~~ - Updated SQL schema and made new minimum unpickle version 0.11.0. - Parser now uses a compact representation of annotations instead of exploding to multiple edges (https://github.com/pybel/pybel/issues/261) - Update annotation filtering functions to reflect new data format (https://github.com/pybel/pybel/issues/262) - Update GraphML Output (https://github.com/pybel/pybel/issues/260) - Better error message when missing namespace resource (https://github.com/pybel/pybel/issues/265) Fixed ~~~~~ - Fixed more problems with edge store and testing (https://github.com/pybel/pybel/issues/225, https://github.com/pybel/pybel/issues/256, https://github.com/pybel/pybel/issues/257) - Fixed windows testing (https://github.com/pybel/pybel/issues/243) - Fixed broken network cascade, but is still slow (https://github.com/pybel/pybel/issues/256, https://github.com/pybel/pybel/issues/257, https://github.com/pybel/pybel/issues/259) - Fixed JGIF import (https://github.com/pybel/pybel/issues/266) and added scripts directory (3dc6b1f) - Fix extras in setup.py and requirements.txt Added ~~~~~ - Additional regex format for date parsing from PubMed (https://github.com/pybel/pybel/issues/259) - Add labels to nodes in GraphML output (https://github.com/pybel/pybel/issues/260) - Add edge predicate builders (https://github.com/pybel/pybel/issues/262) - Testing on multiple databases (SQLite, MySQL, PostgreSQL) (https://github.com/pybel/pybel/issues/238) - Added ``pybel.struct.mutations`` module - Added graph-based equivalency checking - Add more documentation to BELGraph (https://github.com/pybel/pybel/issues/271) `0.10.1 `_ - 2017-12-28 -------------------------------------------------------------------------------- Fixed ~~~~~ - Fixed truncation description parsing to handle double quotes Changed ~~~~~~~ - Made DSL functions into classes to allow inheritance and isinstance checking as well as preliminary to_tuple functionality Added ~~~~~ - Added more edge predicates (has_activity, has_degree, has_translocation, has_annotation) `0.10.0 `_ - 2017-12-22 ------------------------------------------------------------------------------- Changed ~~~~~~~ - Updated SQL schema and made new minimum unpickle version 0.10.0. - Moved `pybel.parser.language` to `pybel.language` - Moved `pybel.parser.canoncalize` to `pybel.tokens` - Overhaul of `pybel.struct.filters` - included many more functions, tests, and updated nomenclature - Update canoncalize functions to be generally reusable (take node data dictionaries) - Make NDEx2, Neo4j, OWL parsing, and INDRA setup.py install extras Fixed ~~~~~ - Names defined by regular expressions can now be included in the database cache (https://github.com/pybel/pybel/issues/250, https://github.com/pybel/pybel/issues/251) - Fixed ``Manager.has_name_version`` (https://github.com/pybel/pybel/issues/246) - Fixed CX output and upgraded to NDEx2 client - When joining graphs, keep their metadata (https://github.com/pybel/pybel/commit/affaecc73d2b4affa8aeecb3834ed7c6f5697cac) Added ~~~~~ - Included partOf relationship in BEL language (https://github.com/pybel/pybel/issues/244) - Added additional date formats to parse from PubMed (https://github.com/pybel/pybel/issues/239) - Filled out many more DSL functions and added testing - Added ability to set relationship parsing policy in BEL Parser (https://github.com/pybel/pybel/commit/09614465d80d2931e901fd54d067a5151e327283) - Implemented from PyBEL Web Function - Implemented to INDRA function `0.9.7 `_ - 2017-11-20 ----------------------------------------------------------------------------- Changed ~~~~~~~ - Use ``HASH`` as dictionary key instead of ``ID`` - Allow DSL to create nodes without names but with identifiers - Rename instance variables in parsers for consistency - Greater usage of DSL in parser `0.9.6 `_ - 2017-11-12 ----------------------------------------------------------------------------- Added ~~~~~ - Additional keyword arguments for JSON output functions Changed ~~~~~~~ - Updated parser intermediate data structure. Should have no affect on end users. - Smarter serialization of PyBEL data dictionaries to BEL Fixed ~~~~~ - Better handling of citations that have authors pre-parsed into lists (https://github.com/pybel/pybel/issues/247) `0.9.5 `_ - 2017-11-07 ----------------------------------------------------------------------------- Added ~~~~~ - Updates to DSL - More node filters and predicates - Added "partOf" relationship (https://github.com/pybel/pybel/issues/244) - Added more regular expressions for date parsing (https://github.com/pybel/pybel/issues/239) Fixed ~~~~~ - Fixed incorrect checking of network storage (https://github.com/pybel/pybel/issues/246) Changed ~~~~~~~ - Reorganized resources module to reduce dependencies on PyBEL Tools, which has lots of other big requirements - Moved ``pybel.summary`` module to ``pybel.struct.summary`` `0.9.4 `_ - 2017-11-03 ----------------------------------------------------------------------------- Fixed ~~~~~ - Problem with uploading products, reactants, and members to NDEx (#230) - Checking for adding uncachable nodes when populating edge store Added ~~~~~ - Database seeding functions - Citation management - Added PubMed Central as type in citation Removed ~~~~~~~ - Don't keep blobs in node or edge cache anymore `0.9.3 `_ - 2017-10-19 ----------------------------------------------------------------------------- Added ~~~~~ - Convenience functions for adding qualified and unqualified edges to BELGraph class - Sialic Acid Example BEL Graph - EGF Example BEL Graph - Added PyBEL Web export and stub for import - BioPAX Import - Dedicated BEL Syntax error Changed ~~~~~~~ - Update the BEL Script canonicalization rules to group citations then evidences better - Removed requirement of annotation entry in edge data dictionaries - Confident enough to make using the edge store True by default Fixed ~~~~~ - Fixed unset list parsing so it doesn't need quotes (#234) Removed ~~~~~~~ - In-memory caching of authors `0.9.2 `_ - 2017-09-27 ----------------------------------------------------------------------------- Fixed ~~~~~ - JSON Serialization bug for authors in Citation Model `0.9.1 `_ - 2017-09-26 ----------------------------------------------------------------------------- Added ~~~~~ - INDRA Import - Usage of built-in operators on BEL Graphs Changed ~~~~~~~ - Update list recent networks function to work better with SQL 99 compliant (basically everything except the old version of MySQL and SQLite) RDBMS - Better tests for queries to edge store - Better testing when extensions not installed (c1ac850) - Update documentation to new OpenBEL website links Fixed ~~~~~ - Fix crash when uploading network to edge store that has annotation pattern definitions (still needs some work though) - Added foreign keys for first and last authors in Citation model (requires database rebuild) - Froze NetworkX version at 1.11 since 2.0 breaks everything Removed ~~~~~~~ - Don't cache SQLAlchemy models locally (3d7d238) `0.9.0 `_ - 2017-09-19 ----------------------------------------------------------------------------- Added ~~~~~ - Option for setting scopefunc in Manager - Include extra citation information on inserting graph to database that might have come from citation enrichment - Node model to tuple and json functions are now complete Changed ~~~~~~~ - Added members lists to the node data dictionaries for complex and composite nodes - Added reactants and products lists to the node data dictionaries for reaction nodes Fixed ~~~~~~~ - GOCC and other location caching problem - Node tuples for reactions are now using standard node tuples for reactants and products. This was a huge issue but it had never come up before. DANGER - this means all old code will still work, but any node-tuple reliant code will have unexpected results. This also means that the node hashes in the database for all reactions will now be outdated, so the minimum version is being bumped. `0.8.1 `_ - 2017-09-08 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Change CacheManager class name to Manager - Change references from build_manager to Manager.ensure - Automatically update default database to minimum import version - Constants for extra citation fields and update to_json for Citation model Fixed ~~~~~ - Bug in author insertion for non-unique authors `0.8.0 `_ - 2017-09-08 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Made new minimum unpickle version 0.8.0. From now on, all unpickle changes (before a 1.0.0 release) will be accompanied by a minor version bump. - Overall better handling of citation insertion - Updated data models. Added to Citation model and renamed namespaceEntry in Node model. - Better init function for BELGraph - Force name and version to not be null in the database - Update pickle references to use six module - Update base cache manager - better connection handling and more exposed arguments Added ~~~~~ - Get graph functions to cache manager - Added more useful functions to cache manager - Kwargs for setting name, version, and description in BELGraph init - Getters and setters for version and description in BELGraph - Node data to tuple functions (https://github.com/pybel/pybel/issues/145) `0.7.3 `_ - 2017-09-05 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Update logging for parsing of bad version strings - Change where kwargs go in parse_lines function - Make non-standard parsing modes part of kwargs Fixed ~~~~~ - On-purpose singletons now properly identified (https://github.com/pybel/pybel/issues/218) Added ~~~~~ - CLI command for set connection (https://github.com/pybel/pybel/issues/220) - GEF and GAP activities added for INDRA `0.7.2 `_ - 2017-08-10 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Externalized more parsing constants - Updated version management - Keep track of all singleton lines in parsing - Update CLI - Update JGIF export from CBN Fixed ~~~~~ - Change node hashing ot only use type and reference Added ~~~~~ - Node intersection merge - Get most recent network by name in manager `0.7.1 `_ - 2017-07-25 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Externalized some PyParsing elements Fixed ~~~~~ - Version string tokenization `0.7.0 `_ - 2017-07-21 ------------------------------------------------------------------------------ Added ~~~~~ - Added Project key to document metadata parser (https://github.com/pybel/pybel/issues/215) - Reusable protocols for hashing nodes and edges Fixed ~~~~~ - Edge store working (https://github.com/pybel/pybel/issues/212) Changed ~~~~~~~ - Update resource urls (https://github.com/pybel/pybel/issues/211) - General improvements to exception handling - Made new minimum unpickle version 0.7.0 `0.6.2 `_ - 2017-06-28 ------------------------------------------------------------------------------ Added ~~~~~ - Environment variable for data locations - Add get network by ids merger `0.6.1 `_ - 2017-06-25 ------------------------------------------------------------------------------ Added ~~~~~ - Node and edge filter framework (https://github.com/pybel/pybel/issues/206) - Network joining (https://github.com/pybel/pybel/issues/205 and https://github.com/pybel/pybel/issues/204) - More thorough tests of IO Fixed ~~~~~ - Bug when getting multiple networks by identifier (https://github.com/pybel/pybel/issues/208) - Arguments to exceptions mixed up Changed ~~~~~~~ - Use context in command line interface to streamline code - Remove old, unused code `0.6.0 `_ - 2017-06-11 ------------------------------------------------------------------------------- Changed ~~~~~~~ - Merge OWL and BEL namespaces (https://github.com/pybel/pybel/issues/118) - Remove lots of unused/redundant code - Lots of functions renamed and moved... Sorry people. Added ~~~~~ - Multiple options for graph joining - Filter functions (https://github.com/pybel/pybel/issues/206) `0.5.11 `_ - 2017-06-07 --------------------------------------------------------------------------------- Changed ~~~~~~~ - Added line numbers to parsing exceptions - Update minimum pickle parsing from 0.5.10 to 0.5.11 to reflect changes in parsing exceptions `0.5.10 `_ - 2017-06-06 -------------------------------------------------------------------------------- Added ~~~~~ - Network outer join (https://github.com/pybel/pybel/issues/205) - Network full join with hash (https://github.com/pybel/pybel/issues/204 and https://github.com/pybel/pybel/issues/204) - Option to suppress singleton warnings (https://github.com/pybel/pybel/issues/200) Changed ~~~~~~~ - Moved :mod:`pybel.graph` to :mod:`pybel.struct.graph` - Parse exceptions are renamed - Update minimum pickle parsing from 0.5.4 to 0.5.10 to reflect changes in parsing execeptions and project structure Fixed ~~~~~ - Rewrote the CSV Exporter (https://github.com/pybel/pybel/issues/201) `0.5.9 `_ - 2017-05-28 ------------------------------------------------------------------------------ Added ~~~~~ - JGIF interchange (https://github.com/pybel/pybel/issues/193) and (https://github.com/pybel/pybel/issues/194) - Configuration file parsing (https://github.com/pybel/pybel/issues/197) `0.5.8 `_ - 2017-05-25 ------------------------------------------------------------------------------ Changed ~~~~~~~ - CX is now unstreamified on load, making compatibility with other CX sources (like NDEx) possible - Testing now enables ``PYBEL_TEST_CONNECTION`` environment variable to set a persistient database - Testing data cut down to reduce memory consumption Added ~~~~~ - NDEx upload and download `0.5.7 `_ - 2017-05-20 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Public IO changed for to/from_json and to/from_cx (https://github.com/pybel/pybel/issues/192) - Better error output for metadata failure (https://github.com/pybel/pybel/issues/191) Added ~~~~~ - Add BEL script line to edges (https://github.com/pybel/pybel/issues/155) - Export to GSEA gene list (https://github.com/pybel/pybel/issues/189) - Non-caching of namespaces support (https://github.com/pybel/pybel/issues/190) Note: I made a mistake with the release on 0.5.6, so I just bumped the patch one more. `0.5.5 `_ - 2017-05-08 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Updated CX output to have full provenance and list definitions (https://github.com/pybel/pybel/issues/180) Added ~~~~~ - DOI and URL are now acceptable citation types (https://github.com/pybel/pybel/issues/188) - Citation can now be given as a double of type and reference (https://github.com/pybel/pybel/issues/187) `0.5.4 `_ - 2017-04-28 ------------------------------------------------------------------------------ Fixed ~~~~~ - MySQL truncations of large BLOBs - Session management problems Changed ~~~~~~~ - If a namespace/annotation was redefined, will now thrown an exception instead of just a logging a warning - Update minimum pickle parsing from 0.5.3 to 0.5.4 to reflect changes in parse exceptions Added ~~~~~ - Ability to drop graph that isn't in graph store from CLI `0.5.3 `_ - 2017-04-19 ------------------------------------------------------------------------------ Added ~~~~~ - Lenient parsing mode for unqualified translocations (https://github.com/pybel/pybel/issues/178) Changed ~~~~~~~ - Check for dead URLs at BEL framework (https://github.com/pybel/pybel/issues/177) - Don't throw warnings for versions that are in YYYYMMDD format (https://github.com/pybel/pybel/issues/175) - Include character positions in some exceptions (https://github.com/pybel/pybel/issues/176) - Update minimum pickle parsing from 0.4.2 to 0.5.3 to reflect the new parse exceptions's names and arguments `0.5.2 `_ - 2017-04-16 ------------------------------------------------------------------------------ Fixed ~~~~~ - Ensure existence of namespaces/annotations during graph upload (https://github.com/pybel/pybel/issues/165) `0.5.1 `_ - 2017-04-10 ------------------------------------------------------------------------------ Added ~~~~~ - Parsing of labels (https://github.com/pybel/pybel/issues/173) Fixed ~~~~~ - Parsing of hasComponents lists (https://github.com/pybel/pybel/issues/172) `0.5.0 `_ - 2017-04-07 ------------------------------------------------------------------------------ Added ~~~~~ - Debugging on lines starting with #: comments (https://github.com/pybel/pybel/issues/162) - Added missing relations in pybel constants (https://github.com/pybel/pybel/issues/161) Changed ~~~~~~~ - Merge definition and graph cache (https://github.com/pybel/pybel/issues/164) - Warn when not using semantic versioning (https://github.com/pybel/pybel/issues/160) `0.4.4 `_ - 2017-04-03 ------------------------------------------------------------------------------ Added ~~~~~ - File paths in definition parsing (https://github.com/pybel/pybel/issues/158) - Quotes around variant string (https://github.com/pybel/pybel/issues/156) Changed ~~~~~~~ - Reorganized package to split line parsing from core data structure (https://github.com/pybel/pybel/issues/154) `0.4.3 `_ - 2017-03-21 ------------------------------------------------------------------------------ Added ~~~~~ - Documentation for constants (https://github.com/pybel/pybel/issues/146) - Date validation on parse-time (https://github.com/pybel/pybel/issues/147) Changed ~~~~~~~ - Externalized strings from modifier parsers - Move ``pybel.cx.hash_tuple`` to ``pybel.utils.hash_tuple`` (https://github.com/pybel/pybel/issues/144) Fixed ~~~~~ - Output to CX on CLI crashing (https://github.com/pybel/pybel/issues/152) - Assignment of graph metadata on reload (https://github.com/pybel/pybel/issues/153) `0.4.2 `_ - 2017-03-16 ------------------------------------------------------------------------------ Added ~~~~~ - Node property data model and I/O - Edge property data model and I/O Changed ~~~~~~~ - Update version checking to be more lenient. v0.4.2 is now the minimum for reloading a graph Removed ~~~~~~~ - Origin completion option on BEL parsing. See `PyBEL Tools `_ `0.4.1 `_ - 2017-03-11 ------------------------------------------------------------------------------ Added ~~~~~ - More output options for BEL - Explicit parsing of hasVariant, hasReactant, and hasProduct Fixed ~~~~~ - Allow parsing of non-standard ordering of annotations - Superfluous output of single nodes when writing BEL scripts `0.4.0 `_ - 2017-03-07 ------------------------------------------------------------------------------- Added ~~~~~ - Stable CX import and export - Edge Store data models and loading - Alternative control parsing technique without citation clearing - Node name calculator `0.3.11 `_ - 2017-03-05 --------------------------------------------------------------------------------- Fixed ~~~~~ - Fixed has_members not adding annotations tag - Reliance on node identifiers in canonicalization of complexes and composites - Fixed graph iterator filter `0.3.10 `_ - 2017-03-01 -------------------------------------------------------------------------------- Added ~~~~~ - Shortcut for adding unqualified edges Fixed ~~~~~ - All edges have annotations dictionary now - JSON Export doesn't crash if there aren't list annotations - All exceptions have __str__ function for stringification by JSON export if desired `0.3.9 `_ - 2017-02-21 ------------------------------------------------------------------------------ Added ~~~~~ - Experimental CX export for use with NDEx Changed ~~~~~~~ - Better testing with thorough BEL Fixed ~~~~~ - ParseResult objects no longer propogate through graph - Fixed outputting to JSON Removed ~~~~~~~ - Support for importing GraphML is no longer continued because there's too much information loss `0.3.8 `_ - 2017-02-12 ------------------------------------------------------------------------------ Added ~~~~~ - Annotation pattern definitions - Alternative json output to in-memory dictionary Changed ~~~~~~~ - Removed url rewriting for OpenBEL Framework - Group all annotations in edge data (see Data Model in docs) `0.3.7 `_ - 2017-02-06 ------------------------------------------------------------------------------ Added ~~~~~ - Added equivalentTo relation - Added OWL annotation support - Version integrity checking - Dump cache functionality Changed ~~~~~~~ - Merged GENE, GENE_VARIANT, and GENE_FUSION `0.3.6 `_ - 2017-02-03 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Switch ontospy dependency to onto2nx for Windows support `0.3.5 `_ - 2017-01-30 ------------------------------------------------------------------------------ Added ~~~~~ - Add thorough testing of BEL document Changed ~~~~~~~ - Improved string externalization - Update to data model for fusions - Improved parser performance `0.3.4 `_ - 2017-01-22 ------------------------------------------------------------------------------ Added ~~~~~ - Codec support for opening files by path Changed ~~~~~~~ - Protein modifications, gene modifications, and variants are now stored as dictionaries in the latent data structure - Many constants have been externalized - BEL default names, like kinaseActivity are automatically assigned a sentinel value as a namespace `0.3.3 `_ - 2017-01-18 ------------------------------------------------------------------------------ Added ~~~~~ - Make HGVS parsing less complicated by storing as strings - add warning tracking `0.3.2 `_ - 2017-01-13 ------------------------------------------------------------------------------ Added ~~~~~ - Gene modification support - Namespace equivalence mapping data models and manager - Extension loading Changed ~~~~~~~ - Better testing (local files only with mocks) - Better names for exceptions and warnings `0.3.1 `_ - 2017-01-03 ------------------------------------------------------------------------------ Added ~~~~~ - Bytes IO of BEL Graphs - Graph caching and Graph Cache Manager Fixed ~~~~~ - Annotations weren't getting cached because *somebody* forgot to add the urls. Fixed. - Removed typos in default namespace list Changed ~~~~~~~ - More explicit tests and overall test case refactoring - Better handling of BEL script metadata `0.3.0 `_ - 2016-12-29 ------------------------------------------------------------------------------ Added ~~~~~ - OWL namespace support and caching - Full support for BEL canonicalization and output Fixed ~~~~~ - Rewrote namespace cache and SQLAlchemy models Removed ~~~~~~~ - Removed unnecessary pandas and matplotlib dependencies `0.2.6 `_ - 2016-11-19 ------------------------------------------------------------------------------ Added ~~~~~ - Canonical BEL terms added to nodes on parsing - Fragment parsing - Support for alternative names for evidence (SupportingText) - More explicit support of unqualified edges - Created top-level constants file Fixed ~~~~~ - Fix incorrect HGVS protein truncation parsing - Fix missing location option in abundance tag parsing - Fix json input/output Removed ~~~~~~~ - Deleted junk code from mapper and namespace cache manager `0.2.5 `_ - 2016-11-13 ------------------------------------------------------------------------------ Added ~~~~~ - Nested statement parsing support - Fusion parsing support Fixed ~~~~~ - Fixed graphml input/output - Changed encodings of python files to utf-8 - Fixed typos in language.py `0.2.4 `_ - 2016-11-13 ------------------------------------------------------------------------------ Added ~~~~~ - Neo4J CLI output - Edge and node filtering - Assertions of document metadata key - Added BEL 2.0 protein modification default mapping support Changed ~~~~~~~ - Rewrite HGVS parsing - Updated canonicalization Fixed ~~~~~ - Typo in amino acid dictionary - Assertion of citation `0.2.3 `_ - 2016-11-09 ------------------------------------------------------------------------------ Changed ~~~~~~~ - Made logging lazy and updated logging codes - Update rewriting of old statements - Explicitly streamlined MatchFirst statements; huge speed improvements `0.2.2 `_ - 2016-10-25 ------------------------------------------------------------------------------ Removed ~~~~~~~ - Documentation is no longer stored in version control - Fixed file type in CLI `0.2.1 `_ - 2016-10-25 [YANKED] --------------------------------------------------------------------------------------- Added ~~~~~ - Added CLI for data manager 0.2.0 - 2016-10-22 ------------------ Added ~~~~~ - Added definition cache manager pybel-0.12.1/CONTRIBUTING.rst000066400000000000000000000071141334645200200153510ustar00rootroot00000000000000Contributing ============ Contributions, whether big or small, are appreciated! You can get involved by submitting an issue, making a suggestion, or adding code to the project. PyBEL is young and wants to address the problems the BEL community is currently facing, and has a lot of excited people working on it! Want to Chat? ------------- We're really interesting in what the community thinks about our software. Chat with us on Gitter at https://gitter.im/pybel/Lobby. Having a Problem? Submit an Issue. ---------------------------------- 1. Check that you have the latest version of :code:`PyBEL` 2. Check that StackOverflow hasn't already solved your problem 3. Go here: https://github.com/pybel/pybel/issues 4. Check that this issue hasn't been solved 5. Click "new issue" 6. Add a short, but descriptive title 7. Add a full description of the problem, including the code that caused it and any support files related to this code so others can reproduce your problem 8. Copy the output and error message you're getting Have a Question or Suggestion? ------------------------------ Same drill! Submit an issue and we'll have a nice conversation in the thread. Want to Contribute? ------------------- 1. Get the code. Fork the repository from GitHub using the big green button in the top-right corner of https://github.com/pybel/pybel 2. Clone your directory with $ git clone https://github.com//pybel 3. Install with :code:`pip`. The flag, :code:`-e`, makes your installation editable, so your changes will be reflected automatically in your installation. $ cd pybel $ python3 -m pip install -e . 4. Make a branch off of develop, then make contributions! This line makes a new branch and checks it out $ git checkout -b feature/ 5. This project should be well tested, so write unit tests in the :code:`tests/` directory 6. Check that all tests are passing and code coverage is good with :code:`tox` before committing. $ tox Pull Requests ~~~~~~~~~~~~~ Once you've got your feature or bugfix finished (or if its in a partially complete state but you want to publish it for comment), push it to your fork of the repository and open a pull request against the develop branch on GitHub. Make a descriptive comment about your pull request, perhaps referencing the issue it is meant to fix (something along the lines of "fixes issue #10" will cause GitHub to automatically link to that issue). The maintainers will review your pull request and perhaps make comments about it, request changes, or may pull it in to the develop branch! If you need to make changes to your pull request, simply push more commits to the feature branch in your fork to GitHub and they will automatically be added to the pull. You do not need to close and reissue your pull request to make changes! If you spend a while working on your changes, further commits may be made to the main :code:`PyBEL` repository (called "upstream") before you can make your pull request. In keep your fork up to date with upstream by pulling the changes--if your fork has diverged too much, it becomes difficult to properly merge pull requests without conflicts. To pull in upstream changes:: $ git remote add upstream https://github.com/pybel/pybel $ git fetch upstream develop Check the log to make sure the upstream changes don't affect your work too much:: $ git log upstream/develop Then merge in the new changes:: $ git merge upstream/develop More information about this whole fork-pull-merge process can be found `here on Github's website `_. pybel-0.12.1/LICENSE000066400000000000000000000261321334645200200137160ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2016-2018 Charles Tapley Hoyt Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. pybel-0.12.1/MANIFEST.in000066400000000000000000000007001334645200200144400ustar00rootroot00000000000000graft src graft tests prune scripts recursive-include docs/source *.py recursive-include docs/source *.rst recursive-include src/pybel/testing/bel *.bel recursive-include src/pybel/testing/belanno *.belanno recursive-include src/pybel/testing/belns *.belns include docs/Makefile global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store *.gpickle exclude .bumpversion.cfg include *.rst *.txt *.yml LICENSE tox.ini .flake8 doc8.ini .coveragerc pybel-0.12.1/README.rst000066400000000000000000000155251334645200200144040ustar00rootroot00000000000000PyBEL |zenodo| ============== `PyBEL `_ is a pure Python package for parsing and handling biological networks encoded in the `Biological Expression Language `_ (BEL). It also facilitates data interchange between common formats and databases such as `NetworkX `_, JSON, CSV, SIF, `Cytoscape `_, `CX `_, `NDEx `_, SQL, and `Neo4J `_. Its companion package, `PyBEL Tools `_, contains a suite of functions and pipelines for analyzing the resulting biological networks. We realize there we have a name conflict with the python wrapper for the cheminformatics package, OpenBabel. If you're looking for their python wrapper, see `here `_. =========== =============== ======================= ================== ======================= Stable |stable_build| |stable_windows_build| |stable_coverage| |stable_documentation| Development |develop_build| |develop_windows_build| |develop_coverage| |develop_documentation| =========== =============== ======================= ================== ======================= Citation -------- If you use PyBEL in your work, please cite: .. [1] Hoyt, C. T., *et al.* (2017). `PyBEL: a Computational Framework for Biological Expression Language `_. Bioinformatics, 34(December), 1–2. Getting Started --------------- This example illustrates how the `Selventa Small Corpus `_ can be loaded and visualized in a Jupyter Notebook. Note that this requires an extension, the ``pybel-jupyter`` repository. This is not included by default because installing the Jupyter and iPython stack has a large footprint. .. code-block:: python >>> import pybel, pybel_jupyter >>> graph = pybel.from_url('http://resources.openbel.org/belframework/20150611/knowledge/small_corpus.bel') >>> graph.number_of_nodes() # Will be smaller than expected because we have the most strict settings enabled 1207 >>> pybel_jupyter.to_jupyter(graph) # Need to pip install pybel-jupyter first More examples can be found in the `documentation `_ and in the `PyBEL Notebooks `_ repository. PyBEL also installs a command line interface with the command :code:`pybel` for simple utilities such as data conversion. In this example, a BEL Script is exported to GraphML for viewing in Cytoscape. .. code-block:: sh $ pybel convert --path ~/Desktop/example.bel --graphml ~/Desktop/example.graphml In Cytoscape, open with :code:`Import > Network > From File`. Installation |pypi_version| |python_versions| |pypi_license| ------------------------------------------------------------ PyBEL can be installed easily from `PyPI `_ with the following code in your favorite terminal: .. code-block:: sh $ python3 -m pip install pybel or from the latest code on `GitHub `_ with: .. code-block:: sh $ python3 -m pip install git+https://github.com/pybel/pybel.git@develop See the `installation documentation `_ for more advanced instructions. Also, check the change log at :code:`CHANGELOG.rst`. Contributing ------------ Contributions, whether filing an issue, making a pull request, or forking, are appreciated. See :code:`CONTRIBUTING.rst` for more information on getting involved. Please add your name to :code:`AUTHORS.rst`! Acknowledgements ---------------- - This package was originally developed as part of the master's work of `Charles Tapley Hoyt `_ at `Fraunhofer SCAI `_. - This software is proudly built with Paul McGuire's `PyParsing `_ package. - `Scott Colby `_ designed our `logo `_ and provided sage advice - `Christian Ebeling `_ for supervision and consultation Links ----- - Specified by `BEL 1.0 `_ and `BEL 2.0 `_ - Documented on `Read the Docs `_ - Versioned on `GitHub `_ - Tested on `Travis CI `_ - Distributed by `PyPI `_ - Chat on `Gitter `_ .. |stable_build| image:: https://travis-ci.org/pybel/pybel.svg?branch=master :target: https://travis-ci.org/pybel/pybel :alt: Stable Build Status .. |stable_windows_build| image:: https://ci.appveyor.com/api/projects/status/v22l3ymg3bdq525d/branch/master?svg=true :target: https://ci.appveyor.com/project/cthoyt/pybel :alt: Stable Windows Build Status .. |stable_coverage| image:: https://codecov.io/gh/pybel/pybel/coverage.svg?branch=master :target: https://codecov.io/gh/pybel/pybel/branch/master :alt: Stable Coverage Status .. |stable_documentation| image:: https://readthedocs.org/projects/pybel/badge/?version=stable :target: http://pybel.readthedocs.io/en/stable/ :alt: Stable Documentation Status .. |develop_build| image:: https://travis-ci.org/pybel/pybel.svg?branch=develop :target: https://travis-ci.org/pybel/pybel :alt: Development Build Status .. |develop_windows_build| image:: https://ci.appveyor.com/api/projects/status/v22l3ymg3bdq525d/branch/develop?svg=true :target: https://ci.appveyor.com/project/cthoyt/pybel :alt: Development Windows Build Status .. |develop_coverage| image:: https://codecov.io/gh/pybel/pybel/coverage.svg?branch=develop :target: https://codecov.io/gh/pybel/pybel/branch/develop :alt: Development Coverage Status .. |develop_documentation| image:: https://readthedocs.org/projects/pybel/badge/?version=latest :target: http://pybel.readthedocs.io/en/latest/ :alt: Development Documentation Status .. |climate| image:: https://codeclimate.com/github/pybel/pybel/badges/gpa.svg :target: https://codeclimate.com/github/pybel/pybel :alt: Code Climate .. |python_versions| image:: https://img.shields.io/pypi/pyversions/PyBEL.svg :alt: Stable Supported Python Versions .. |pypi_version| image:: https://img.shields.io/pypi/v/PyBEL.svg :alt: Current version on PyPI .. |pypi_license| image:: https://img.shields.io/pypi/l/PyBEL.svg :alt: Apache 2.0 License .. |zenodo| image:: https://zenodo.org/badge/68376693.svg :target: https://zenodo.org/badge/latestdoi/68376693 pybel-0.12.1/doc8.ini000066400000000000000000000000351334645200200142410ustar00rootroot00000000000000[doc8] max-line-length = 120 pybel-0.12.1/docs/000077500000000000000000000000001334645200200136355ustar00rootroot00000000000000pybel-0.12.1/docs/Makefile000066400000000000000000000166731334645200200153120ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " applehelp to make an Apple Help Book" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " epub3 to make an epub3" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" @echo " coverage to run coverage check of the documentation (if enabled)" @echo " dummy to check syntax errors of document sources" .PHONY: clean clean: rm -rf $(BUILDDIR)/* .PHONY: html html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/PyBEL.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/PyBEL.qhc" .PHONY: applehelp applehelp: $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @echo "N.B. You won't be able to view it unless you put it in" \ "~/Library/Documentation/Help or install it in your application" \ "bundle." .PHONY: devhelp devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/PyBEL" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/PyBEL" @echo "# devhelp" .PHONY: epub epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 epub3: $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." .PHONY: info info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage coverage: $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy dummy: $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." pybel-0.12.1/docs/source/000077500000000000000000000000001334645200200151355ustar00rootroot00000000000000pybel-0.12.1/docs/source/cli.rst000066400000000000000000000011721334645200200164370ustar00rootroot00000000000000Command Line Interface ====================== .. note:: The command line wrapper might not work on Windows. Use :code:`python3 -m pybel` if it has issues. PyBEL automatically installs the command :code:`pybel`. This command can be used to easily compile BEL documents and convert to other formats. See :code:`pybel --help` for usage details. This command makes logs of all conversions and warnings to the directory :code:`~/.pybel/`. .. click:: pybel.cli:main :prog: pybel :show-nested: Plugins ------- PyBEL's command line interface uses `click-plugins `_ to load extensions. pybel-0.12.1/docs/source/conf.py000066400000000000000000000243271334645200200164440ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import re import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath('../../src')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx_click.ext', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The encoding of source files. # # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = 'PyBEL' copyright = '2016-2018, Charles Tapley Hoyt' author = 'Charles Tapley Hoyt' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. release = '0.12.1' # The short X.Y version. parsed_version = re.match( '(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?', release ) version = parsed_version.expand('\g.\g.\g') if parsed_version.group('release'): tags.add('prerelease') # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # # today = '' # # Else, today_fmt is used as the format for a strftime call. # # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. # # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False todo_include_todos = True # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. # # html_title = u'PyBEL vX.Y.Z' # A shorter title for the navigation bar. Default is the same as html_title. # # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # # html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['_static'] html_static_path = [] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # # html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. # # html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # # html_additional_pages = {} # If false, no module index is generated. # # html_domain_indices = True # If false, no index is generated. # # html_use_index = True # If true, the index is split into individual pages for each letter. # # html_split_index = False # If true, links to the reST sources are added to the pages. # # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' # # html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. # # html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. # # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = 'PyBELdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'PyBEL.tex', u'PyBEL Documentation', u'Charles Tapley Hoyt', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # # latex_use_parts = False # If true, show page references after internal links. # # latex_show_pagerefs = False # If true, show URL addresses after external links. # # latex_show_urls = False # Documents to append as an appendix to all manuals. # # latex_appendices = [] # It false, will not define \strong, \code, itleref, \crossref ... but only # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added # packages. # # latex_keep_old_macro_names = True # If false, no module index is generated. # # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'pybel', u'PyBEL Documentation', [author], 1) ] # If true, show URL addresses after external links. # # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'PyBEL', u'PyBEL Documentation', author, 'PyBEL', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. # # texinfo_appendices = [] # If false, no module index is generated. # # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # # texinfo_no_detailmenu = False # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), 'networkx': ('https://networkx.github.io/', None), 'py2neo': ('http://py2neo.org/v3/', None), 'sqlalchemy': ('https://docs.sqlalchemy.org/en/latest', None), 'indra': ('https://indra.readthedocs.io/en/latest/', None), 'requests': ('http://docs.python-requests.org/en/master/', None), 'setuptools': ('https://setuptools.readthedocs.io/en/latest/', None), } autodoc_member_order = 'bysource' autoclass_content = 'both' if os.environ.get('READTHEDOCS', None): tags.add('readthedocs') pybel-0.12.1/docs/source/constants.rst000066400000000000000000000001611334645200200177010ustar00rootroot00000000000000Constants ========= .. automodule:: pybel.constants :members: .. automodule:: pybel.language :members: pybel-0.12.1/docs/source/cookbook.rst000066400000000000000000000015461334645200200175030ustar00rootroot00000000000000Cookbook ======== An extensive set of examples can be found on the `PyBEL Notebooks `_ repository on GitHub. These notebooks contain basic usage and also make numerous references to the analytical package `PyBEL Tools `_ Configuration ------------- The default connection string can be set as an environment variable in your ``~/.bashrc``. If you're using MySQL or MariaDB, it could look like this: .. code:: $ export PYBEL_CONNECTION="mysql+pymysql://user:password@server_name/database_name?charset=utf8" Prepare a Cytoscape Network ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Load, compile, and export to Cytoscape format: .. code-block:: sh $ pybel convert --path ~/Desktop/example.bel --graphml ~/Desktop/example.graphml In Cytoscape, open with :code:`Import > Network > From File`. pybel-0.12.1/docs/source/datamodel.rst000066400000000000000000000404041334645200200176230ustar00rootroot00000000000000Data Model ========== Molecular biology is a directed graph; not a table. BEL expresses how biological entities interact within many different contexts, with descriptive annotations. PyBEL represents data as a directional multigraph using an extension of :class:`networkx.MultiDiGraph`. Each node and edge has an associated data dictionary for storing relevant/contextual information. This allows for much easier programmatic access to answer more complicated questions, which can be written with python code. Because the data structure is the same in Neo4J, the data can be directly exported with :func:`pybel.to_neo4j`. Neo4J supports the Cypher querying language so that the same queries can be written in an elegant and simple way. Constants --------- These documents refer to many aspects of the data model using constants, which can be found in the top-level module :mod:`pybel.constants`. In these examples, all constants are imported with the following code: .. code-block:: python >>> from pybel.constants import * Terms describing abundances, annotations, and other internal data are designated in :mod:`pybel.constants` with full-caps, such as :data:`pybel.constants.FUNCTION` and :data:`pybel.constants.PROTEIN`. For normal usage, we suggest referring to values in dictionaries by these constants, in case the hard-coded strings behind these constants change. Function Nomenclature ~~~~~~~~~~~~~~~~~~~~~ The following table shows PyBEL's internal mapping from BEL functions to its own constants. This can be accessed programatically via :data:`pybel.parser.language.abundance_labels` +-------------------------------------------+----------------------------------------+ | BEL Function | PyBEL Constant | +===========================================+========================================+ | ``a()``, ``abundance()`` | :data:`pybel.constants.ABUNDANCE` | +-------------------------------------------+----------------------------------------+ | ``g()``, ``geneAbundance()`` | :data:`pybel.constants.GENE` | +-------------------------------------------+----------------------------------------+ | ``r()``, ``rnaAbunance()`` | :data:`pybel.constants.RNA` | +-------------------------------------------+----------------------------------------+ | ``m()``, ``microRNAAbundance()`` | :data:`pybel.constants.MIRNA` | +-------------------------------------------+----------------------------------------+ | ``p()``, ``proteinAbundance()`` | :data:`pybel.constants.PROTEIN` | +-------------------------------------------+----------------------------------------+ | ``bp()``, ``biologicalProcess()`` | :data:`pybel.constants.BIOPROCESS` | +-------------------------------------------+----------------------------------------+ | ``path()``, ``pathology()`` | :data:`pybel.constants.PATHOLOGY` | +-------------------------------------------+----------------------------------------+ | ``complex()``, ``complexAbundance()`` | :data:`pybel.constants.COMPLEX` | +-------------------------------------------+----------------------------------------+ | ``composite()``, ``compositeAbundance()`` | :data:`pybel.constants.COMPOSITE` | +-------------------------------------------+----------------------------------------+ | ``rxn()``, ``reaction()`` | :data:`pybel.constants.REACTION` | +-------------------------------------------+----------------------------------------+ Graph ----- .. automodule:: pybel.struct .. autoclass:: pybel.BELGraph :exclude-members: nodes_iter, edges_iter, add_warning :members: .. automethod:: __add__ .. automethod:: __iadd__ .. automethod:: __and__ .. automethod:: __iand__ .. autofunction:: pybel.struct.left_full_join .. autofunction:: pybel.struct.left_outer_join .. autofunction:: pybel.struct.union Nodes ----- Nodes are used to represent physical entities' abundances. The relevant data about a node is stored in its associated data dictionary in :mod:`networkx` that can be accessed with ``my_bel_graph.node[node]``. After parsing, :code:`p(HGNC:GSK3B)` becomes: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B' } This section describes the structure of the data dictionaries created for each type of node available in BEL. Programatically, these dictionaries can be converted to tuples, which are used as the keys for the network with the :func:`pybel.parser.canonicalize.node_to_tuple` function. Variants ~~~~~~~~ The addition of a variant tag results in an entry called 'variants' in the data dictionary associated with a given node. This entry is a list with dictionaries describing each of the variants. All variants have the entry 'kind' to identify whether it is a post-translational modification (PTM), gene modification, fragment, or HGVS variant. .. warning:: The canonical ordering for the elements of the ``VARIANTS`` list correspond to the sorted order of their corresponding node tuples using :func:`pybel.parser.canonicalize.sort_dict_list`. Rather than directly modifying the BELGraph's structure, use :meth:`pybel.BELGraph.add_node_from_data`, which takes care of automatically canonicalizing this dictionary. .. automodule:: pybel.parser.modifiers.variant .. automodule:: pybel.parser.modifiers.gene_substitution .. automodule:: pybel.parser.modifiers.gene_modification .. automodule:: pybel.parser.modifiers.protein_substitution .. automodule:: pybel.parser.modifiers.protein_modification .. automodule:: pybel.parser.modifiers.truncation .. automodule:: pybel.parser.modifiers.fragment Fusions ~~~~~~~ .. automodule:: pybel.parser.modifiers.fusion Unqualified Edges ----------------- Unqualified edges are automatically inferred by PyBEL and do not contain citations or supporting evidence. Variant and Modifications' Parent Relations ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ All variants, modifications, fragments, and truncations are connected to their parent entity with an edge having the relationship :code:`hasParent` For :code:`p(HGNC:GSK3B, var(p.Gly123Arg))`, the following edge is inferred: .. code:: p(HGNC:GSK3B, var(p.Gly123Arg)) hasParent p(HGNC:GSK3B) All variants have this relationship to their reference node. BEL does not specify relationships between variants, such as the case when a given phosphorylation is necessary to make another one. This knowledge could be encoded directly like BEL, since PyBEL does not restrict users from manually asserting unqualified edges. List Abundances ~~~~~~~~~~~~~~~ Complexes and composites that are defined by lists. As of version 0.9.0, they contain a list of the data dictionaries that describe their members. For example :code:`complex(p(HGNC:FOS), p(HGNC:JUN))` becomes: .. code:: { FUNCTION: COMPLEX, MEMBERS: [ { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'FOS' }, { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'JUN' } ] } The following edges are also inferred: .. code:: complex(p(HGNC:FOS), p(HGNC:JUN)) hasMember p(HGNC:FOS) complex(p(HGNC:FOS), p(HGNC:JUN)) hasMember p(HGNC:JUN) .. seealso:: BEL 2.0 specification on `complex abundances `_ Similarly, :code:`composite(a(CHEBI:malonate), p(HGNC:JUN))` becomes: .. code:: { FUNCTION: COMPOSITE, MEMBERS: [ { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'malonate' }, { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'JUN' } ] } The following edges are inferred: .. code:: composite(a(CHEBI:malonate), p(HGNC:JUN)) hasComponent a(CHEBI:malonate) composite(a(CHEBI:malonate), p(HGNC:JUN)) hasComponent p(HGNC:JUN) .. warning:: The canonical ordering for the elements of the ``MEMBERS`` list correspond to the sorted order of their corresponding node tuples using :func:`pybel.parser.canonicalize.sort_dict_list`. Rather than directly modifying the BELGraph's structure, use :meth:`BELGraph.add_node_from_data`, which takes care of automatically canonicalizing this dictionary. .. seealso:: BEL 2.0 specification on `composite abundances `_ Reactions ~~~~~~~~~ The usage of a reaction causes many nodes and edges to be created. The following example will illustrate what is added to the network for .. code:: rxn(reactants(a(CHEBI:"(3S)-3-hydroxy-3-methylglutaryl-CoA"), a(CHEBI:"NADPH"), \ a(CHEBI:"hydron")), products(a(CHEBI:"mevalonate"), a(CHEBI:"NADP(+)"))) As of version 0.9.0, the reactants' and products' data dictionaries are included as sub-lists keyed ``REACTANTS`` and ``PRODUCTS``. It becomes: .. code:: { FUNCTION: REACTION REACTANTS: [ { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: '(3S)-3-hydroxy-3-methylglutaryl-CoA' }, { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'NADPH' }, { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'hydron' } ], PRODUCTS: [ { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'mevalonate' }, { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'NADP(+)' } ] } .. warning:: The canonical ordering for the elements of the ``REACTANTS`` and ``PRODUCTS`` lists correspond to the sorted order of their corresponding node tuples using :func:`pybel.parser.canonicalize.sort_dict_list`. Rather than directly modifying the BELGraph's structure, use :meth:`BELGraph.add_node_from_data`, which takes care of automatically canonicalizing this dictionary. The following edges are inferred, where :code:`X` represents the previous reaction, for brevity: .. code:: X hasReactant a(CHEBI:"(3S)-3-hydroxy-3-methylglutaryl-CoA") X hasReactant a(CHEBI:"NADPH") X hasReactant a(CHEBI:"hydron") X hasProduct a(CHEBI:"mevalonate") X hasProduct a(CHEBI:"NADP(+)")) .. seealso:: BEL 2.0 specification on `reactions `_ Edges ----- Design Choices ~~~~~~~~~~~~~~ In the OpenBEL Framework, modifiers such as activities (kinaseActivity, etc.) and transformations (translocations, degradations, etc.) were represented as their own nodes. In PyBEL, these modifiers are represented as a property of the edge. In reality, an edge like :code:`sec(p(HGNC:A)) -> activity(p(HGNC:B), ma(kinaseActivity))` represents a connection between :code:`HGNC:A` and :code:`HGNC:B`. Each of these modifiers explains the context of the relationship between these physical entities. Further, querying a network where these modifiers are part of a relationship is much more straightforward. For example, finding all proteins that are upregulated by the kinase activity of another protein now can be directly queried by filtering all edges for those with a subject modifier whose modification is molecular activity, and whose effect is kinase activity. Having fewer nodes also allows for a much easier display and visual interpretation of a network. The information about the modifier on the subject and activity can be displayed as a color coded source and terminus of the connecting edge. The compiler in OpenBEL framework created nodes for molecular activities like :code:`kin(p(HGNC:YFG))` and induced an edge like :code:`p(HGNC:YFG) actsIn kin(p(HGNC:YFG))`. For transformations, a statement like :code:`tloc(p(HGNC:YFG), GOCC:intracellular, GOCC:"cell membrane")` also induced :code:`tloc(p(HGNC:YFG), GOCC:intracellular, GOCC:"cell membrane") translocates p(HGNC:YFG)`. In PyBEL, we recognize that these modifications are actually annotations to the type of relationship between the subject's entity and the object's entity. ``p(HGNC:ABC) -> tloc(p(HGNC:YFG), GOCC:intracellular, GOCC:"cell membrane")`` is about the relationship between :code:`p(HGNC:ABC)` and :code:`p(HGNC:YFG)`, while the information about the translocation qualifies that the object is undergoing an event, and not just the abundance. This is a confusion with the use of :code:`proteinAbundance` as a keyword, and perhaps is why many people prefer to use just the keyword :code:`p` Example Edge Data Structure ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Because this data is associated with an edge, the node data for the subject and object are not included explicitly. However, information about the activities, modifiers, and transformations on the subject and object are included. Below is the "skeleton" for the edge data model in PyBEL: .. code:: { SUBJECT: { # ... modifications to the subject node. Only present if non-empty. }, RELATION: POSITIVE_CORRELATION, OBJECT: { # ... modifications to the object node. Only present if non-empty. }, EVIDENCE: '...', CITATION : { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: '...', CITATION_DATE: 'YYYY-MM-DD', CITATION_AUTHORS: 'Jon Snow|John Doe', }, ANNOTATIONS: { 'Disease': { 'Colorectal Cancer': True, } # ... additional annotations as tuple[str,dict[str,bool]] pairs } } Each edge must contain the ``RELATION``, ``EVIDENCE``, and ``CITATION`` entries. The ``CITATION`` must minimally contain ``CITATION_TYPE`` and ``CITATION_REFERENCE`` since these can be used to look up additional metadata. .. note:: Since version 0.10.2, annotations now always appear as dictionaries, even if only one value is present. Activities ~~~~~~~~~~ Modifiers are added to this structure as well. Under this schema, :code:`p(HGNC:GSK3B, pmod(P, S, 9)) pos act(p(HGNC:GSK3B), ma(kin))` becomes: .. code:: { RELATION: POSITIVE_CORRELATION, OBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE } }, CITATION: { ... }, EVIDENCE: '...', ANNOTATIONS: { ... } } Activities without molecular activity annotations do not contain an :data:`pybel.constants.EFFECT` entry: Under this schema, :code:`p(HGNC:GSK3B, pmod(P, S, 9)) pos act(p(HGNC:GSK3B))` becomes: .. code:: { RELATION: POSITIVE_CORRELATION, OBJECT: { MODIFIER: ACTIVITY }, CITATION: { ... }, EVIDENCE: '...', ANNOTATIONS: { ... } } Locations ~~~~~~~~~ .. automodule:: pybel.parser.modifiers.location Translocations ~~~~~~~~~~~~~~ Translocations have their own unique syntax. :code:`p(HGNC:YFG1) -> sec(p(HGNC:YFG2))` becomes: .. code:: { RELATION: INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: { NAMESPACE: 'GOCC', NAME: 'intracellular' }, TO_LOC: { NAMESPACE: 'GOCC', NAME: 'extracellular space' } } }, CITATION: { ... }, EVIDENCE: '...', ANNOTATIONS: { ... } } .. seealso:: BEL 2.0 specification on `translocations `_ Degradations ~~~~~~~~~~~~ Degradations are more simple, because there's no ::data:`pybel.constants.EFFECT` entry. :code:`p(HGNC:YFG1) -> deg(p(HGNC:YFG2))` becomes: .. code:: { RELATION: INCREASES, OBJECT: { MODIFIER: DEGRADATION }, CITATION: { ... }, EVIDENCE: '...', ANNOTATIONS: { ... } } .. seealso:: BEL 2.0 specification on `degradations `_ pybel-0.12.1/docs/source/dsl.rst000066400000000000000000000001021334645200200164420ustar00rootroot00000000000000Internal DSL ============ .. automodule:: pybel.dsl :members: pybel-0.12.1/docs/source/examples.rst000066400000000000000000000003671334645200200175130ustar00rootroot00000000000000Example Networks ================ .. automodule:: pybel.examples .. automodule:: pybel.examples.egf_example .. py:data:: pybel.examples.egf_graph .. automodule:: pybel.examples.sialic_acid_example .. py:data:: pybel.examples.sialic_acid_graph pybel-0.12.1/docs/source/filters.rst000066400000000000000000000001031334645200200173310ustar00rootroot00000000000000Filters ======= .. automodule:: pybel.struct.filters :members: pybel-0.12.1/docs/source/grouping.rst000066400000000000000000000001061334645200200175160ustar00rootroot00000000000000Grouping ======== .. automodule:: pybel.struct.grouping :members: pybel-0.12.1/docs/source/index.rst000066400000000000000000000050351334645200200170010ustar00rootroot00000000000000PyBEL Documentation =================== Biological Expression Language (BEL) is a domain-specific language that enables the expression of complex molecular relationships and their context in a machine-readable form. Its simple grammar and expressive power have led to its successful use in the to describe complex disease networks with several thousands of relationships. PyBEL is a pure Python software package that parses BEL scripts, validates their semantics, and facilitates data interchange between common formats and database systems like JSON, CSV, Excel, SQL, CX, and Neo4J. Its companion package, `PyBEL-Tools `_, contains a library of functions for analysis of biological networks. For result-oriented guides, see the `PyBEL-Notebooks `_ repository. Installation is as easy as getting the code from `PyPI `_ with :code:`python3 -m pip install pybel` Citation -------- If you use PyBEL in your work, please cite [1]_: .. [1] Hoyt, C. T., *et al.* (2017). `PyBEL: a Computational Framework for Biological Expression Language `_. Bioinformatics, 34(December), 1–2. Links ----- - Specified by `BEL 1.0 `_ and `BEL 2.0 `_ - Documented on `Read the Docs `_ - Versioned on `GitHub `_ - Tested on `Travis CI `_ - Distributed by `PyPI `_ - Chat on `Gitter `_ .. toctree:: :maxdepth: 2 :caption: Getting Started :name: start overview installation .. toctree:: :maxdepth: 2 :caption: Data Structure :name: data datamodel examples summary filters transformations grouping pipeline .. toctree:: :maxdepth: 2 :caption: Conversion :name: conversion io .. toctree:: :caption: Database :name: database manager models .. toctree:: :maxdepth: 2 :caption: Topic Guide :name: topics cookbook troubleshooting cli .. toctree:: :caption: Reference :name: reference constants parser utilities dsl logging .. toctree:: :caption: Project :name: project roadmap postmortem technology Indices and Tables ------------------ * :ref:`genindex` * :ref:`modindex` * :ref:`search` pybel-0.12.1/docs/source/installation.rst000066400000000000000000000000601334645200200203640ustar00rootroot00000000000000Installation ============ .. automodule:: pybel pybel-0.12.1/docs/source/io.rst000066400000000000000000000062561334645200200163070ustar00rootroot00000000000000Input and Output ================ .. automodule:: pybel.io Import ------ Parsing Modes ~~~~~~~~~~~~~ The PyBEL parser has several modes that can be enabled and disabled. They are described below. Allow Naked Names ***************** By default, this is set to :code:`False`. The parser does not allow identifiers that are not qualified with namespaces (*naked names*), like in :code:`p(YFG)`. A proper namespace, like :code:`p(HGNC:YFG)` must be used. By setting this to :code:`True`, the parser becomes permissive to naked names. In general, this is bad practice and this feature will be removed in the future. Allow Nested ************ By default, this is set to :code:`False`. The parser does not allow nested statements is disabled. See `overview`. By setting this to :code:`True` the parser will accept nested statements one level deep. Citation Clearing ***************** By default, this is set to :code:`True`. While the BEL specification clearly states how the language should be used as a state machine, many BEL documents do not conform to the strict :code:`SET`/:code:`UNSET` rules. To guard against annotations accidentally carried from one set of statements to the next, the parser has two modes. By default, in citation clearing mode, when a :code:`SET CITATION` command is reached, it will clear all other annotations (except the :code:`STATEMENT_GROUP`, which has higher priority). This behavior can be disabled by setting this to :code:`False` to re-enable strict parsing. Reference ~~~~~~~~~ .. autofunction:: pybel.from_lines .. autofunction:: pybel.from_path .. autofunction:: pybel.from_url Canonicalization ---------------- .. autofunction:: pybel.to_bel_lines .. autofunction:: pybel.to_bel .. autofunction:: pybel.to_bel_path Transport --------- All transport pairs are reflective and data-preserving. Bytes ~~~~~ .. automodule:: pybel.io.gpickle .. autofunction:: pybel.from_pickle .. autofunction:: pybel.to_pickle .. autofunction:: pybel.from_bytes .. autofunction:: pybel.to_bytes Node-Link JSON ~~~~~~~~~~~~~~ .. automodule:: pybel.io.nodelink .. autofunction:: pybel.from_json .. autofunction:: pybel.to_json .. autofunction:: pybel.from_json_file .. autofunction:: pybel.to_json_file .. autofunction:: pybel.from_json_path .. autofunction:: pybel.to_json_path .. autofunction:: pybel.from_jsons .. autofunction:: pybel.to_jsons JSON Graph Interchange Format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: pybel.io.jgif .. autofunction:: pybel.from_jgif .. autofunction:: pybel.from_cbn_jgif .. autofunction:: pybel.to_jgif Export ------ .. automodule:: pybel.io.extras .. autofunction:: pybel.to_graphml .. autofunction:: pybel.to_csv .. autofunction:: pybel.to_sif .. autofunction:: pybel.to_gsea Database -------- .. automodule:: pybel.manager.database_io .. autofunction:: pybel.from_database .. autofunction:: pybel.to_database Neo4j ~~~~~ .. automodule:: pybel.io.neo4j .. autofunction:: pybel.to_neo4j BEL Commons ----------- .. automodule:: pybel.io.web .. autofunction:: pybel.from_web .. autofunction:: pybel.to_web INDRA ----- .. automodule:: pybel.io.indra .. autofunction:: pybel.from_indra_statements .. autofunction:: pybel.to_indra_statements .. autofunction:: pybel.from_biopax pybel-0.12.1/docs/source/logging.rst000066400000000000000000000002611334645200200173140ustar00rootroot00000000000000Logging Messages ================ Errors ------ .. automodule:: pybel.exceptions :members: Parse Exceptions ---------------- .. automodule:: pybel.parser.exc :members: pybel-0.12.1/docs/source/manager.rst000066400000000000000000000011441334645200200173010ustar00rootroot00000000000000Manager ======= Manager API ----------- The BaseManager takes care of building and maintaining the connection to the database via SQLAlchemy. .. autoclass:: pybel.manager.BaseManager :members: The Manager collates multiple groups of functions for interacting with the database. For sake of code clarity, they are separated across multiple classes that are documented below. .. autoclass:: pybel.manager.Manager :members: :show-inheritance: Manager Components ------------------ .. autoclass:: pybel.manager.NetworkManager :members: .. autoclass:: pybel.manager.QueryManager :members: pybel-0.12.1/docs/source/models.rst000066400000000000000000000001011334645200200171420ustar00rootroot00000000000000Models ====== .. automodule:: pybel.manager.models :members: pybel-0.12.1/docs/source/overview.rst000066400000000000000000000372211334645200200175420ustar00rootroot00000000000000Overview ======== Background on Systems Biology Modelling --------------------------------------- Biological Expression Language (BEL) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Biological Expression Language (BEL) is a domain specific language that enables the expression of complex molecular relationships and their context in a machine-readable form. Its simple grammar and expressive power have led to its successful use to describe complex disease networks with several thousands of relationships. For a detailed explanation, see the BEL `1.0 `_ and `2.0 `_ specifications. OpenBEL Links ~~~~~~~~~~~~~ - OpenBEL on `Google Groups `_ - OpenBEL `Wiki `_ - OpenBEL on `GitHub `_ - Chat on `Gitter `_ Design Considerations --------------------- Missing Namespaces and Improper Names ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The use of openly shared controlled vocabularies (namespaces) within BEL facilitates the exchange and consistency of information. Finding the correct :code:`namespace:name` pair is often a difficult part of the curation process. Outdated Namespaces ~~~~~~~~~~~~~~~~~~~ OpenBEL provides a variety of `namespaces `_ covering each of the BEL function types. These namespaces are generated by code found at https://github.com/OpenBEL/resource-generator and distributed at http://resources.openbel.org/belframework/. This code has not been maintained to reflect the changes in the underlying resources, so this repository has been forked and updated at https://github.com/pybel/resource-generator to reflect the most recent versions of the underlying namespaces. The files are now distributed using the Fraunhofer SCAI `Artifactory server `_. Generating New Namespaces ~~~~~~~~~~~~~~~~~~~~~~~~~ In some cases, it is appropriate to design a new namespace, using the `custom namespace specification `_ provided by the OpenBEL Framework. Packages for generating namespace, annotation, and knowledge resources have been grouped in the `Bio2BEL `_ organization on GitHub. Synonym Issues ~~~~~~~~~~~~~~ Due to the huge number of terms across many namespaces, it's difficult for curators to know the domain-specific synonyms that obscure the controlled/preferred term. However, the issue of synonym resolution and semantic searching has already been generally solved by the use of ontologies. Besides just a controlled vocabulary, they also a hierarchical model of knowledge, synonyms with cross-references to databases and other ontologies, and other information semantic reasoning. Ontologies in the biomedical domain can be found at `OBO `_ and `EMBL-EBI OLS `_. Additionally, as a tool for curators, the EMBL Ontology Lookup Service (OLS) allows for semantic searching. Simple queries for the terms 'mitochondrial dysfunction' and 'amyloid beta-peptides' immediately returned results from relevant ontologies, and ended a long debate over how to represent these objects within BEL. EMBL-EBI also provides a programmatic API to the OLS service, for searching terms (http://www.ebi.ac.uk/ols/api/search?q=folic%20acid) and suggesting resolutions (http://www.ebi.ac.uk/ols/api/suggest?q=folic+acid) Implementation -------------- PyBEL is implemented using the PyParsing module. It provides flexibility and incredible speed in parsing compared to regular expression implementation. It also allows for the addition of parsing action hooks, which allow the graph to be checked semantically at compile-time. It uses SQLite to provide a consistent and lightweight caching system for external data, such as namespaces, annotations, ontologies, and SQLAlchemy to provide a cross-platform interface. The same data management system is used to store graphs for high-performance querying. Extensions to BEL ----------------- The PyBEL compiler is fully compliant with both BEL v1.0 and v2.0 and automatically upgrades legacy statements. Additionally, PyBEL includes several additions to the BEL specification to enable expression of important concepts in molecular biology that were previously missing and to facilitate integrating new data types. A short example is the inclusion of protein oxidation in the default BEL namespace for protein modifications. Other, more elaborate additions are outlined below. Syntax for Epigenetics ~~~~~~~~~~~~~~~~~~~~~~ PyBEL introduces the gene modification function, gmod(), as a syntax for encoding epigenetic modifications. Its usage mirrors the pmod() function for proteins and includes arguments for methylation. For example, the methylation of NDUFB6 was found to be negatively correlated with its expression in a study of insulin resistance and Type II diabetes. This can now be expressed in BEL such as in the following statement: ``g(HGNC:NDUFB6, gmod(Me)) negativeCorrelation r(HGNC:NDUFB6)`` References: - https://www.ncbi.nlm.nih.gov/pubmed/17948130 - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4655260/ Definition of Namespaces as Regular Expressions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEL imposes the constraint that each identifier must be qualified with an enumerated namespace to enable semantic interoperability and data integration. However, enumerating a namespace with potentially billions of names, such as dbSNP, poses a computational issue. PyBEL introduces syntax for defining namespaces with a consistent pattern using a regular expression to overcome this issue. For these namespaces, semantic validation can be perform in post-processing against the underlying database. The dbSNP namespace can be defined with a syntax familiar to BEL annotation definitions with regular expressions as follows: ``DEFINE NAMESPACE dbSNP AS PATTERN "rs[0-9]+"`` Definition of Resources using OWL ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Previous versions of PyBEL until 0.11.2 had an alternative namespace definition. Now it is recommended to either generate namespace files with reproducible build scripts following the Bio2BEL framework, or to directly add them to the database with the Bio2BEL NamespaceManagerMixin extension. Explicit Node Labels ~~~~~~~~~~~~~~~~~~~~ While the BEL 2.0 specification made it possible to represent new terms, such as the APOE gene with two variants resulting in the E2 allele, it came at the price of encoding terms in a technical and less readable way. An explicit statement for labeling nodes has been added, such that the resulting data structure will have a label for the node: ``g(HGNC:APOE, var(c.388T>C), var(c.526C>T)) labeled "APOE E2"`` When InChI is used, these strings are very hard to visualize. Using a label is helpful for later visualization: .. code-block::none a(INCHI:"InChI=1S/C20H28N2O5/c1-3-27-20(26)16(12-11-15-8-5-4-6-9-15)21-14(2)18(23)22-13-7-10-17(22)19(24)25/h4-6, 8-9,14,16-17,21H,3,7,10-13H2,1-2H3,(H,24,25)/t14-,16-,17-/m0/s1") labeled "Enalapril"`` Below is the same molecule again, but represented with an InChIKey: ``a(INCHIKEY:"GBXSMTUPTTWBMN-XIRDDKMYSA-N") labeled "Enalapril"`` It's also easy to use the universe of RESTFul API services from UniChem, ChEMBL, or WikiData to download and annotate these automatically. For futher information on Enalapril can be found `WikiData `_, `UniChem `_, and `ChEMBL `_. Things to Consider ------------------ Do All Statements Need Supporting Text? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Yes! All statements must be minimally qualified with a citation and evidence (now called SupportingText in BEL 2.0) to maintain provenance. Statements without evidence can't be traced to their source or evaluated independently from the curator, so they are excluded. Multiple Annotations ~~~~~~~~~~~~~~~~~~~~ When an annotation has a list, it means that the following BEL relations are true for each of the listed values. The lines below show a BEL relation that corresponds to two edges, each with the same citation but different values for :code:`ExampleAnnotation`. This should be considered carefully for analyses dealing with the number of edges between two entities. .. code:: SET Citation = {"PubMed","Example Article","12345"} SET ExampleAnnotation = {"Example Value 1", "Example Value 2"} p(HGNC:YFG1) -> p(HGNC:YFG2) Furthermore, if there are multiple annotations with lists, the following BEL relations are true for all of the different combinations of them. The following statements will produce four edges, as the cartesian product of the values used for both :code:`ExampleAnnotation1` and :code:`ExampleAnnotation2`. This might not be the knowledge that the annotator wants to express, and is prone to mistakes, so use of annotation lists are not recommended. .. code:: SET Citation = {"PubMed","Example Article","12345"} SET ExampleAnnotation1 = {"Example Value 11", "Example Value 12"} SET ExampleAnnotation2 = {"Example Value 21", "Example Value 22"} p(HGNC:YFG1) -> p(HGNC:YFG2) Namespace and Annotation Name Choices ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :code:`*.belns` and :code:`*.belanno` configuration files include an entry called "Keyword" in their respective [Namespace] and [AnnotationDefinition] sections. To maintain understandability between BEL documents, PyBEL warns when the names given in :code:`*.bel` documents do not match their respective resources. For now, capitalization is not considered, but in the future, PyBEL will also warn when capitalization is not properly stylized, like forgetting the lowercase 'h' in "ChEMBL". Why Not Nested Statements? ~~~~~~~~~~~~~~~~~~~~~~~~~~ BEL has different relationships for modeling direct and indirect causal relations. Direct ****** - :code:`A => B` means that `A` directly increases `B` through a physical process. - :code:`A =| B` means that `A` directly decreases `B` through a physical process. Indirect ******** The relationship between two entities can be coded in BEL, even if the process is not well understood. - :code:`A -> B` means that `A` indirectly increases `B`. There are hidden elements in `X` that mediate this interaction through a pathway direct interactions :code:`A (=> or =|) X_1 (=> or =|) ... X_n (=> or =|) B`, or through a set of multiple pathways that constitute a network. - :code:`A -| B` means that `A` indirectly decreases `B`. Like for :code:`A -> B`, this process involves hidden components with varying activities. Increasing Nested Relationships ******************************* BEL also allows object of a relationship to be another statement. - :code:`A => (B => C)` means that `A` increases the process by which `B` increases `C`. The example in the BEL Spec :code:`p(HGNC:GATA1) => (act(p(HGNC:ZBTB16)) => r(HGNC:MPL))` represents GATA1 directly increasing the process by which ZBTB16 directly increases MPL. Before, directly increasing was used to specify physical contact, so it's reasonable to conclude that :code:`p(HGNC:GATA1) => act(p(HGNC:ZBTB16))`. The specification cites examples when `B` is an activity that only is affected in the context of `A` and `C`. This complicated enough that it is both impractical to standardize during curation, and impractical to represent in a network. - :code:`A -> (B => C)` can be interpreted by assuming that `A` indirectly increases `B`, and because of monotonicity, conclude that :code:`A -> C` as well. - :code:`A => (B -> C)` is more difficult to interpret, because it does not describe which part of process :code:`B -> C` is affected by `A` or how. Is it that :code:`A => B`, and :code:`B => C`, so we conclude :code:`A -> C`, or does it mean something else? Perhaps `A` impacts a different portion of the hidden process in :code:`B -> C`. These statements are ambiguous enough that they should be written as just :code:`A => B`, and :code:`B -> C`. If there is no literature evidence for the statement :code:`A -> C`, then it is not the job of the curator to make this inference. Identifying statements of this might be the goal of a bioinformatics analysis of the BEL network after compilation. - :code:`A -> (B -> C)` introduces even more ambiguity, and it should not be used. - :code:`A => (B =| C)` states `A` increases the process by which `B` decreases `C`. One interpretation of this statement might be that :code:`A => B` and :code:`B =| C`. An analysis could infer :code:`A -| C`. Statements in the form of :code:`A -> (B =| C)` can also be resolved this way, but with added ambiguity. Decreasing Nested Relationships ******************************* While we could agree on usage for the previous examples, the decrease of a nested statement introduces an unreasonable amount of ambiguity. - :code:`A =| (B => C)` could mean `A` decreases `B`, and `B` also increases `C`. Does this mean A decreases C, or does it mean that C is still increased, but just not as much? Which of these statements takes precedence? Or do their effects cancel? The same can be said about :code:`A -| (B => C)`, and with added ambiguity for indirect increases :code:`A -| (B -> C)` - :code:`A =| (B =| C)` could mean that `A` decreases `B` and `B` decreases `C`. We could conclude that `A` increases `C`, or could we again run into the problem of not knowing the precedence? The same is true for the indirect versions. Recommendations for Use in PyBEL ******************************** After considering the ambiguity of nested statements to be a great risk to clarity, and PyBEL disables the usage of nested statements by default. See the Input and Output section for different parser settings. At Fraunhofer SCAI, curators resolved these statements to single statements to improve the precision and readability of our BEL documents. While most statements in the form :code:`A rel1 (B rel2 C)` can be reasonably expanded to :code:`A rel1 B` and :code:`B rel2 C`, the few that cannot are the difficult-to-interpret cases that we need to be careful about in our curation and later analyses. Why Not RDF? ~~~~~~~~~~~~ Current bel2rdf serialization tools build URLs with the OpenBEL Framework domain as a namespace, rather than respect the original namespaces of original entities. This does not follow the best practices of the semantic web, where URL’s representing an object point to a real page with additional information. For example, UniProt Knowledge Base does an exemplary job of this. Ultimately, using non-standard URL’s makes harmonizing and data integration difficult. Additionally, the RDF format does not easily allow for the annotation of edges. A simple statement in BEL that one protein up-regulates another can be easily represented in a triple in RDF, but when the annotations and citation from the BEL document need to be included, this forces RDF serialization to use approaches like representing the statement itself as a node. RDF was not intended to represent this type of information, but more properly for locating resources (hence its name). Furthermore, many blank nodes are introduced throughout the process. This makes RDF incredibly difficult to understand or work with. Later, writing queries in SPARQL becomes very difficult because the data format is complicated and the language is limited. For example, it would be incredibly complicated to write a query in SPARQL to get the objects of statements from publications by a certain author. pybel-0.12.1/docs/source/parser.rst000066400000000000000000000022371334645200200171670ustar00rootroot00000000000000Parsers ======= This page is for users who want to squeeze the most bizarre possibilities out of PyBEL. Most users will not need this reference. PyBEL makes extensive use of the PyParsing module. The code is organized to different modules to reflect the different faces ot the BEL language. These parsers support BEL 2.0 and have some backwards compatibility for rewriting BEL v1.0 statements as BEL v2.0. The biologist and bioinformatician using this software will likely never need to read this page, but a developer seeking to extend the language will be interested to see the inner workings of these parsers. See: https://github.com/OpenBEL/language/blob/master/version_2.0/MIGRATE_BEL1_BEL2.md Metadata Parser --------------- .. autoclass:: pybel.parser.parse_metadata.MetadataParser :members: Control Parser -------------- .. autoclass:: pybel.parser.parse_control.ControlParser :members: Identifier Parser ----------------- .. autoclass:: pybel.parser.parse_identifier.IdentifierParser :members: BEL Parser ---------- .. autoclass:: pybel.parser.parse_bel.BELParser :members: Sub-Parsers ----------- .. automodule:: pybel.parser.modifiers :members: pybel-0.12.1/docs/source/pipeline.rst000066400000000000000000000004011334645200200174670ustar00rootroot00000000000000Pipeline ======== .. autoclass:: pybel.Pipeline :members: Transformation Decorators ------------------------- .. automodule:: pybel.struct.pipeline.decorators :members: Exceptions ---------- .. automodule:: pybel.struct.pipeline.exc :members: pybel-0.12.1/docs/source/postmortem.rst000066400000000000000000000022271334645200200201030ustar00rootroot00000000000000Current Issues ============== Speed ----- Speed is still an issue, because documents above 100K lines still take a couple minutes to run. This issue is exacerbated by (optionally) logging output to the console, which can make it more than 3x or 4x as slow. Namespaces ---------- The default namespaces from OpenBEL do not follow a standard file format. They are similar to INI config files, but do not use consistent delimiters. Also, many of the namespaces don't respect that the delimiter should not be used in the namespace names. There are also lots of names with strange characters, which may have been caused by copying from a data source that had specfic escape characters without proper care. Testing ------- Testing was very difficult because the example documents on the OpenBEL website had many semantic errors, such as using names and annotation values that were not defined within their respective namespace and annotation definition files. They also contained syntax errors like naked names, which are not only syntatically incorrect, but lead to bad science; and improper usage of activities, like illegally nesting an activity within a composite statement. pybel-0.12.1/docs/source/roadmap.rst000066400000000000000000000027761334645200200173260ustar00rootroot00000000000000Roadmap ======= This project road map documents not only the PyBEL repository, but the PyBEL Tools and BEL Commons repositories as well as the Bio2BEL project. PyBEL ----- - Performance improvements - Parallelization of parsing - On-the-fly validation with OLS or MIRIAM Bio2BEL ------- - Generation of new namespaces, equivalencies, and hierarchical knowledge (isA and partOf relations) - FlyBase - InterPro (Done) - UniProt (Done) - Human Phenotype Ontology - Uber Anatomy Ontology - HGNC Gene Families (Done) - Enyzme Classification (Done) - Integration of knowledge sources - ChEMBL - Comparative Toxicogenomics Database (Done) - BRENDA - MetaCyc - Protein complex definitions Data2BEL -------- Integration of analytical pipelines to convert data to BEL - LD Block Analysis - Gene Co-expression Analysis - Differential Gene Expression Analysis PyBEL Tools ----------- - Biological Grammar - Network motif identification - Stability analysis - Prior knowledge comparision - Molecular activity annotation - SNP Impact - Implementation of standard BEL Algorithms - RCR - NPA - SST - Development of new algorithms - Heat diffusion algorithms - Cart Before the Horse - Metapath analysis - Reasoning and inference rules - Subgraph Expansion application in NeuroMMSigDB - Chemical Enrichment in NeuroMMSigDB BEL Commons ----------- - Integration with BELIEF - Integration with NeuroMMSigDB (Done) - Import and export from NDEx pybel-0.12.1/docs/source/summary.rst000066400000000000000000000001031334645200200173560ustar00rootroot00000000000000Summary ======= .. automodule:: pybel.struct.summary :members: pybel-0.12.1/docs/source/technology.rst000066400000000000000000000135451334645200200200520ustar00rootroot00000000000000Technology ========== This page is meant to describe the development stack for PyBEL, and should be a useful introduction for contributors. Versioning ---------- PyBEL is versioned on GitHub so changes in its code can be tracked over time and to make use of the variety of software development plugins. Code is produced following the `Git Flow `_ philosophy, which means that new features are coded in branches off of the development branch and merged after they are triaged. Finally, develop is merged into master for releases. If there are bugs in releases that need to be fixed quickly, "hot fix" branches from master can be made, then merged back to master and develop after fixing the problem. Testing in PyBEL ---------------- PyBEL is written with extensive unit testing and integration testing. Whenever possible, test- driven development is practiced. This means that new ideas for functions and features are encoded as blank classes/functions and directly writing tests for the desired output. After tests have been written that define how the code should work, the implementation can be written. Test-driven development requires us to think about design before making quick and dirty implementations. This results in better code. Additionally, thorough testing suites make it possible to catch when changes break existing functionality. Tests are written with the standard :mod:`unittest` library. Some functionality, such as the :mod:`mock` module, are only available as default in Python 3, so backports must be used for testing in Python 2 Unit Testing ~~~~~~~~~~~~ Unit tests check that the functionality of the different parts of PyBEL work independently. An example unit test can be found in :code:`tests.test_parse_bel.TestAbundance.test_short_abundance`. It ensures that the parser is able to handle a given string describing the abundance of a chemical/other entity in BEL. It tests that the parser produces the correct output, that the BEL statement is converted to the correct internal representation. In this example, this is a tuple describing the abundance of oxygen atoms. Finally, it tests that this representation is added as a node in the underlying BEL graph with the appropriate attributes added. Integration Testing ~~~~~~~~~~~~~~~~~~~ Integration tests are more high level, and ensure that the software accomplishes more complicated goals by using many components. An example integration test is found in tests.test_import.TestImport.test_from_fileURL. This test ensures that a BEL script can be read and results in a NetworkX object that contains all of the information described in the script Tox ~~~ While IDEs like PyCharm provide excellent testing tools, they are not programmatic. `Tox `_ is python package that provides a CLI interface to run automated testing procedures (as well as other build functions, that aren't important to explain here). In PyBEL, it is used to run the unit tests in the :code:`tests` folder with the :mod:`pytest` harness. It also runs :code:`check-manifest`, builds the documentation with :mod:`sphinx`, and computes the code coverage of the tests. The entire procedure is defined in :code:`tox.ini`. Tox also allows test to be done on many different versions of Python. Continuous Integration ~~~~~~~~~~~~~~~~~~~~~~ Continuous integration is a philosophy of automatically testing code as it changes. PyBEL makes use of the Travis CI server to perform testing because of its tight integration with GitHub. Travis automatically installs git hooks inside GitHub so it knows when a new commit is made. Upon each commit, Travis downloads the newest commit from GitHub and runs the tests configured in the :code:`.travis.yml` file in the top level of the PyBEL repository. This file effectively instructs the Travis CI server to run Tox. It also allows for the modification of the environment variables. This is used in PyBEL to test many different versions of python. Code Coverage ~~~~~~~~~~~~~ After building, Travis sends code coverage results to `codecov.io `_. This site helps visualize untested code and track the improvement of testing coverage over time. It also integrates with GitHub to show which feature branches are inadequately tested. In development of PyBEL, inadequately tested code is not allowed to be merged into develop. Versioning ~~~~~~~~~~ PyBEL uses semantic versioning. In general, the project's version string will has a suffix :code:`-dev` like in :code:`0.3.4-dev` throughout the development cycle. After code is merged from feature branches to develop and it is time to deploy, this suffix is removed and develop branch is merged into master. The version string appears in multiple places throughout the project, so BumpVersion is used to automate the updating of these version strings. See .bumpversion.cfg for more information. Deployment ---------- PyBEL is also distributed through PyPI (pronounced Py-Pee-Eye). Travis CI has a wonderful integration with PyPI, so any time a tag is made on the master branch (and also assuming the tests pass), a new distribution is packed and sent to PyPI. Refer to the "deploy" section at the bottom of the :code:`.travis.yml` file for more information, or the Travis CI `PyPI deployment documentation `_. As a side note, Travis CI has an encryption tool so the password for the PyPI account can be displayed publicly on GitHub. Travis decrypts it before performing the upload to PyPI. Steps ~~~~~ 1. :code:`bumpversion release` on development branch 2. Push to git 3. After tests pass, merge develop in to master 4. After tests pass, create a tag on GitHub with the same name as the version number (on master) 5. Travis will automatically deploy to PyPI after tests pass. After checking deployment has been successful, switch to develop and :code:`bumpversion patch` pybel-0.12.1/docs/source/transformations.rst000066400000000000000000000001241334645200200211150ustar00rootroot00000000000000Transformations =============== .. automodule:: pybel.struct.mutation :members: pybel-0.12.1/docs/source/troubleshooting.rst000066400000000000000000000076021334645200200211230ustar00rootroot00000000000000Troubleshooting =============== Common problems and questions will be posted here. Encoding Issues ~~~~~~~~~~~~~~~ Sometimes, Windows computers stick a weird unicode object :code:`\u2013` at the beginning of files. This makes the function :py:func:`pybel.parser.utils.sanitize_file_lines` have a problem. The solution, when loading a BEL script via :py:func:`pybel.from_path` is to use the :code:`encodings` keyword argument to specify the right encoding. The default is :code:`utf-8` because this is the most common, but when this error happens, set it explicitly to :code:`utf_8_sig`. More specific documentation is available in the Inputs and Outputs page. Scenario ******** .. code-block:: python >>> import pybel >>> graph = pybel.from_path('~/Desktop/small_corpus.bel') UnicodeDecodeError Traceback (most recent call last) in () 7 ad = pybel.from_pickle(path_2_AD_pickle) 8 else: ----> 9 ad = pybel.from_path(path_2_AD_bel) 10 pybel.to_pickle(ad, path_2_AD_pickle) C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\graph.py in from_path(path, **kwargs) 61 log.info('Loading from path: %s', path) 62 with open(os.path.expanduser(path)) as f: ---> 63 return BELGraph(lines=f, **kwargs) 64 65 C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\graph.py in __init__(self, lines, context, lenient, definition_cache_manager, log_stream, *attrs, **kwargs) 102 103 if lines is not None: --> 104 self.parse_lines(lines, context, lenient, --> definition_cache_manager, log_stream) 105 106 def parse_lines(self, lines, context=None, lenient=False, definition_cache_manager=None, log_stream=None): C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\graph.py in parse_lines(self, lines, context, lenient, definition_cache_manager, log_stream) 125 self.context = context 126 --> 127 docs, defs, states = --> split_file_to_annotations_and_definitions(lines) 128 129 if isinstance(definition_cache_manager, DefinitionCacheManager): C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\parser\utils.py in split_file_to_annotations_and_definitions(file) 49 def split_file_to_annotations_and_definitions(file): 50 """Enumerates a line iterable and splits into 3 parts""" ---> 51 content = list(sanitize_file_lines(file)) 52 53 end_document_section = 1 + max(j for j, (i, l) in enumerate(content) if l.startswith('SET DOCUMENT')) C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\parser\utils.py in sanitize_file_lines(f) 16 it = iter(it) 17 ---> 18 for line_number, line in it: 19 if line.endswith('\\'): 20 log.log(4, 'Multiline quote starting on line: %d', line_number) C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\site-packages\pybel\parser\utils.py in (.0) 12 def sanitize_file_lines(f): 13 """Enumerates a line iterator and returns the pairs of (line number, line) that are cleaned""" ---> 14 it = (line.strip() for line in f) 15 it = filter(lambda i_l: i_l[1] and not i_l[1].startswith('#'), enumerate(it, start=1)) 16 it = iter(it) C:\Users\s8310253\AppData\Local\Continuum\Anaconda3420\lib\encodings\cp1252.py in decode(self, input, final) 21 class IncrementalDecoder(codecs.IncrementalDecoder): 22 def decode(self, input, final=False): ---> 23 return ---> codecs.charmap_decode(input,self.errors,decoding_table)[0] 24 25 class StreamWriter(Codec,codecs.StreamWriter): UnicodeDecodeError: 'charmap' codec can't decode byte 0x9d in position 4668: character maps to Solution ******** .. code-block:: python >>> import pybel >>> graph = pybel.from_path('~/Desktop/small_corpus.bel', encoding='utf_8_sig') >>> # Success! pybel-0.12.1/docs/source/utilities.rst000066400000000000000000000006571334645200200177120ustar00rootroot00000000000000Utilities ========= Some utilities that are used throughout the software are explained here: General Utilities ~~~~~~~~~~~~~~~~~ .. automodule:: pybel.utils :members: IO Utilities ~~~~~~~~~~~~ .. autofunction:: pybel.io.line_utils.parse_lines Parser Utilities ~~~~~~~~~~~~~~~~ .. automodule:: pybel.parser.utils :members: Canonicalization Utilities ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. automodule:: pybel.tokens :members: pybel-0.12.1/requirements-rtfd.txt000066400000000000000000000004001334645200200171200ustar00rootroot00000000000000# This requirements file does not contain the extras networkx>=2.1 sqlalchemy click click-plugins requests requests_file pyparsing six tqdm # for automatically generating CLI documentation. See: https://github.com/click-contrib/sphinx-click sphinx-click pybel-0.12.1/requirements.txt000066400000000000000000000001731334645200200161720ustar00rootroot00000000000000networkx>=2.1 sqlalchemy click click-plugins requests requests_file pyparsing six tqdm # Extras indra ndex2 py2neo==3.1.2 pybel-0.12.1/scripts/000077500000000000000000000000001334645200200143745ustar00rootroot00000000000000pybel-0.12.1/scripts/run_drop_test.py000066400000000000000000000037721334645200200176460ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test loading and dropping a network.""" import os import time from contextlib import contextmanager import click import pybel DEFAULT_CONNECTION = 'mysql+mysqldb://root@localhost/pbt?charset=utf8' PICKLE = 'small_corpus.bel.gpickle' SMALL_CORPUS_URL = 'https://arty.scai.fraunhofer.de/artifactory/bel/knowledge/selventa-small-corpus/selventa-small-corpus-20150611.bel' @contextmanager def time_me(start_string): """Wrap statements with time logging.""" print(start_string) parse_start_time = time.time() yield print(f'ran in {time.time() - parse_start_time:.2f} seconds') def get_numbers(graph: pybel.BELGraph, manager: pybel.Manager)->float: """Insert and drop a graph to count how long it takes. :param graph: :param manager: :return: The time it took to drop """ print('inserting') parse_start_time = time.time() network = manager.insert_graph(graph) print(f'inserted in {time.time() - parse_start_time:.2f} seconds') print('dropping') drop_start_time = time.time() manager.drop_network(network) drop_time = time.time() - drop_start_time print(f'dropped in {drop_time:.2f} seconds') return drop_time @click.command() @click.option('--connection', default=DEFAULT_CONNECTION, help='SQLAlchemy connection.') def main(connection): """Parse a network, load it to the database, then test how fast it drops.""" manager = pybel.Manager(connection) if os.path.exists(PICKLE): print(f'opening from {PICKLE}') graph = pybel.from_pickle(PICKLE) else: with time_me(f'opening from {SMALL_CORPUS_URL}'): graph = pybel.from_url(SMALL_CORPUS_URL, manager=manager, use_tqdm=True, citation_clearing=False) pybel.to_pickle(graph, PICKLE) n = 1 # FIXME this fails if you do it with the same manager times = [ get_numbers(graph, manager) for _ in range(n) ] print(times) print(sum(times) / n) if __name__ == '__main__': main() pybel-0.12.1/scripts/run_jgif.py000066400000000000000000000027131334645200200165540ustar00rootroot00000000000000# -*- coding: utf-8 -*- import json import logging import os import time import pybel from pybel.manager import Manager from pybel.manager.citation_utils import enrich_pubmed_citations from pybel.struct.mutation import strip_annotations log = logging.getLogger('test') def upload_cbn_dir(dir_path, manager): """Uploads CBN data to edge store :param str dir_path: Directory full of CBN JGIF files :param pybel.Manager manager: """ t = time.time() for jfg_path in os.listdir(dir_path): if not jfg_path.endswith('.jgf'): continue path = os.path.join(dir_path, jfg_path) log.info('opening %s', path) with open(path) as f: cbn_jgif_dict = json.load(f) graph = pybel.from_cbn_jgif(cbn_jgif_dict) out_path = os.path.join(dir_path, jfg_path.replace('.jgf', '.bel')) with open(out_path, 'w') as o: pybel.to_bel(graph, o) strip_annotations(graph) enrich_pubmed_citations(manager=manager, graph=graph) pybel.to_database(graph, manager=manager) log.info('') log.info('done in %.2f', time.time() - t) if __name__ == '__main__': logging.basicConfig(level=logging.INFO) log.setLevel(logging.INFO) logging.getLogger('pybel.parser.baseparser').setLevel(logging.WARNING) bms_base = os.environ['BMS_BASE'] cbn_base = os.path.join(bms_base, 'cbn', 'Human-2.0') m = Manager() upload_cbn_dir(cbn_base, m) pybel-0.12.1/setup.cfg000066400000000000000000000000771334645200200145320ustar00rootroot00000000000000[bdist_wheel] universal = 1 [metadata] license_file = LICENSE pybel-0.12.1/setup.py000066400000000000000000000064231334645200200144240ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Setup.py for PyBEL.""" import codecs # To use a consistent encoding import os import re import sys import setuptools ################################################################# PACKAGES = setuptools.find_packages(where='src') META_PATH = os.path.join('src', 'pybel', '__init__.py') KEYWORDS = ['Biological Expression Language', 'BEL', 'Systems Biology', 'Networks Biology'] CLASSIFIERS = [ 'Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: Apache Software License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Topic :: Scientific/Engineering :: Bio-Informatics' ] INSTALL_REQUIRES = [ 'networkx>=2.1', 'sqlalchemy', 'click', 'click-plugins', 'requests', 'requests_file', 'pyparsing', 'six', 'tqdm', ] if sys.version_info < (3, ): INSTALL_REQUIRES.append('configparser') INSTALL_REQUIRES.append('enum34') # Only necessary for NDEx? INSTALL_REQUIRES.append('functools32') INSTALL_REQUIRES.append('funcsigs') EXTRAS_REQUIRE = { 'indra': ['indra'], 'neo4j': ['py2neo==3.1.2'], } TESTS_REQUIRE = [ 'mock', 'pathlib', ] ENTRY_POINTS = { 'console_scripts': [ 'pybel = pybel.cli:main', ] } DEPENDENCY_LINKS = [ ] ################################################################# HERE = os.path.abspath(os.path.dirname(__file__)) def read(*parts): """Build an absolute path from *parts* and return the contents of the resulting file. Assume UTF-8 encoding.""" with codecs.open(os.path.join(HERE, *parts), 'rb', 'utf-8') as f: return f.read() META_FILE = read(META_PATH) def find_meta(meta): """Extract __*meta*__ from META_FILE.""" meta_match = re.search( r'^__{meta}__ = ["\']([^"\']*)["\']'.format(meta=meta), META_FILE, re.M ) if meta_match: return meta_match.group(1) raise RuntimeError('Unable to find __{meta}__ string'.format(meta=meta)) def get_long_description(): """Get the long_description from the README.rst file. Assume UTF-8 encoding.""" with codecs.open(os.path.join(HERE, 'README.rst'), encoding='utf-8') as f: long_description = f.read() return long_description if __name__ == '__main__': setuptools.setup( name=find_meta('title'), version=find_meta('version'), description=find_meta('description'), long_description=get_long_description(), url=find_meta('url'), author=find_meta('author'), author_email=find_meta('email'), maintainer=find_meta('author'), maintainer_email=find_meta('email'), license=find_meta('license'), classifiers=CLASSIFIERS, keywords=KEYWORDS, packages=PACKAGES, package_dir={'': 'src'}, include_package_data=True, install_requires=INSTALL_REQUIRES, extras_require=EXTRAS_REQUIRE, tests_require=TESTS_REQUIRE, entry_points=ENTRY_POINTS, dependency_links=DEPENDENCY_LINKS, ) pybel-0.12.1/src/000077500000000000000000000000001334645200200134745ustar00rootroot00000000000000pybel-0.12.1/src/pybel/000077500000000000000000000000001334645200200146075ustar00rootroot00000000000000pybel-0.12.1/src/pybel/__init__.py000066400000000000000000000077571334645200200167400ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Parsing, validation, compilation, and data exchange of Biological Expression Language (BEL). PyBEL is tested on both Python3 and legacy Python2 installations on Mac OS and Linux using `Travis CI `_ as well as on Windows using `AppVeyor `_. Installation ------------ Easiest ~~~~~~~ Download the latest stable code from `PyPI `_ with: .. code-block:: sh $ python3 -m pip install pybel Get the Latest ~~~~~~~~~~~~~~~ Download the most recent code from `GitHub `_ with: .. code-block:: sh $ python3 -m pip install git+https://github.com/pybel/pybel.git@develop For Developers ~~~~~~~~~~~~~~ Clone the repository from `GitHub `_ and install in editable mode with: .. code-block:: sh $ git clone https://github.com/pybel/pybel.git@develop $ cd pybel $ python3 -m pip install -e . Extras ------ The ``setup.py`` makes use of the ``extras_require`` argument of :func:`setuptools.setup` in order to make some heavy packages that support special features of PyBEL optional to install, in order to make the installation more lean by default. A single extra can be installed from PyPI like :code:`python3 -m pip install -e pybel[neo4j]` or multiple can be installed using a list like :code:`python3 -m pip install -e pybel[neo4j,inra]`. Likewise, for developer installation, extras can be installed in editable mode with :code:`python3 -m pip install -e .[neo4j]` or multiple can be installed using a list like :code:`python3 -m pip install -e .[neo4j,indra]`. The available extras are: neo4j ~~~~~ This extension installs the :mod:`py2neo` package to support upload and download to Neo4j databases. .. seealso:: - :func:`pybel.to_neo4j` indra ~~~~~ This extra installs support for :mod:`indra`, the integrated network dynamical reasoner and assembler. Because it also represents biology in BEL-like statements, many statements from PyBEL can be converted to INDRA, and visa-versa. This package also enables the import of BioPAX, SBML, and SBGN into BEL. .. seealso:: - :func:`pybel.from_biopax` - :func:`pybel.from_indra_statements` - :func:`pybel.from_indra_pickle` - :func:`pybel.to_indra` Caveats ------- - PyBEL extends the :code:`networkx` for its core data structure. Many of the graphical aspects of :code:`networkx` depend on :code:`matplotlib`, which is an optional dependency. - If :code:`HTMLlib5` is installed, the test that's supposed to fail on a web page being missing actually tries to parse it as RDFa, and doesn't fail. Disregard this. Upgrading --------- During the current development cycle, programmatic access to the definition and graph caches might become unstable. If you have any problems working with the database, try removing it with one of the following commands: 1. Running :code:`pybel manage drop` (unix) 2. Running :code:`python3 -m pybel manage drop` (windows) 3. Removing the folder :code:`~/.pybel` PyBEL will build a new database and populate it on the next run. """ from . import canonicalize, constants, examples, examples, io, struct from .canonicalize import * from .examples import * from .io import * from .manager import cache_manager, database_io from .manager.cache_manager import * from .manager.database_io import * from .struct import * from .utils import get_version __all__ = ( struct.__all__ + io.__all__ + canonicalize.__all__ + database_io.__all__ + cache_manager.__all__ + examples.__all__ + [ 'get_version', ] ) __version__ = '0.12.1' __title__ = 'PyBEL' __description__ = 'Parsing, validation, compilation, and data exchange of Biological Expression Language (BEL)' __url__ = 'https://github.com/pybel/pybel' __author__ = 'Charles Tapley Hoyt' __email__ = 'charles.hoyt@scai.fraunhofer.de' __license__ = 'Apache 2.0 License' __copyright__ = 'Copyright (c) 2016-2018 Charles Tapley Hoyt' pybel-0.12.1/src/pybel/__main__.py000066400000000000000000000005741334645200200167070ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Entrypoint module, in case you use `python -m pybel` Why does this file exist, and why __main__? For more info, read: - https://www.python.org/dev/peps/pep-0338/ - https://docs.python.org/2/using/cmdline.html#cmdoption-m - https://docs.python.org/3/using/cmdline.html#cmdoption-m """ from .cli import main if __name__ == '__main__': main() pybel-0.12.1/src/pybel/canonicalize.py000066400000000000000000000262661334645200200176340ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains output functions to BEL scripts.""" from __future__ import print_function import logging import itertools as itt from .constants import ( ACTIVITY, ANNOTATIONS, BEL_DEFAULT_NAMESPACE, CITATION, CITATION_REFERENCE, CITATION_TYPE, COMPLEX, COMPOSITE, DEGRADATION, EFFECT, EVIDENCE, FROM_LOC, FUNCTION, FUSION, GOCC_KEYWORD, GOCC_LATEST, IDENTIFIER, LOCATION, MODIFIER, NAME, NAMESPACE, OBJECT, PYBEL_AUTOEVIDENCE, REACTION, RELATION, SUBJECT, TO_LOC, TRANSLOCATION, UNQUALIFIED_EDGES, VARIANTS, ) from .dsl import BaseEntity from .resources.document import make_knowledge_header from .utils import ensure_quotes __all__ = [ 'to_bel_lines', 'to_bel', 'to_bel_path', 'edge_to_bel', ] log = logging.getLogger(__name__) def postpend_location(bel_string, location_model): """Rip off the closing parentheses and adds canonicalized modification. I did this because writing a whole new parsing model for the data would be sad and difficult :param str bel_string: BEL string representing node :param dict location_model: A dictionary containing keys :code:`pybel.constants.TO_LOC` and :code:`pybel.constants.FROM_LOC` :return: A part of a BEL string representing the location :rtype: str """ if not all(k in location_model for k in {NAMESPACE, NAME}): raise ValueError('Location model missing namespace and/or name keys: {}'.format(location_model)) return "{}, loc({}:{}))".format( bel_string[:-1], location_model[NAMESPACE], ensure_quotes(location_model[NAME]) ) def _decanonicalize_edge_node(node, edge_data, node_position): """Canonicalize a node with its modifiers stored in the given edge to a BEL string. :param BaseEntity node: A PyBEL node data dictionary :param dict edge_data: A PyBEL edge data dictionary :param node_position: Either :data:`pybel.constants.SUBJECT` or :data:`pybel.constants.OBJECT` :rtype: str """ node_str = node.as_bel() if node_position not in edge_data: return node_str node_edge_data = edge_data[node_position] if LOCATION in node_edge_data: node_str = postpend_location(node_str, node_edge_data[LOCATION]) modifier = node_edge_data.get(MODIFIER) if modifier is None: return node_str if DEGRADATION == modifier: return "deg({})".format(node_str) effect = node_edge_data.get(EFFECT) if ACTIVITY == modifier: if effect is None: return "act({})".format(node_str) if effect[NAMESPACE] == BEL_DEFAULT_NAMESPACE: return "act({}, ma({}))".format(node_str, effect[NAME]) return "act({}, ma({}:{}))".format(node_str, effect[NAMESPACE], ensure_quotes(effect[NAME])) if TRANSLOCATION == modifier: if effect is None: return 'tloc({})'.format(node_str) to_loc_data = effect[TO_LOC] from_loc_data = effect[FROM_LOC] from_loc = "fromLoc({}:{})".format( from_loc_data[NAMESPACE], ensure_quotes(from_loc_data[NAME]) ) to_loc = "toLoc({}:{})".format( to_loc_data[NAMESPACE], ensure_quotes(to_loc_data[NAME]) ) return "tloc({}, {}, {})".format(node_str, from_loc, to_loc) raise ValueError('invalid modifier: {}'.format(modifier)) def edge_to_bel(u, v, data, sep=None): """Take two nodes and gives back a BEL string representing the statement. :param BaseEntity u: The edge's source's PyBEL node data dictionary :param BaseEntity v: The edge's target's PyBEL node data dictionary :param dict data: The edge's data dictionary :param str sep: The separator between the source, relation, and target. Defaults to ' ' :return: The canonical BEL for this edge :rtype: str """ sep = sep or ' ' u_str = _decanonicalize_edge_node(u, data, node_position=SUBJECT) v_str = _decanonicalize_edge_node(v, data, node_position=OBJECT) return sep.join((u_str, data[RELATION], v_str)) def _sort_qualified_edges_helper(edge_tuple): u, v, k, d = edge_tuple return ( d[CITATION][CITATION_TYPE], d[CITATION][CITATION_REFERENCE], d[EVIDENCE], ) def sort_qualified_edges(graph): """Return the qualified edges, sorted first by citation, then by evidence, then by annotations. :param BELGraph graph: A BEL graph :rtype: tuple[tuple,tuple,int,dict] """ qualified_edges = ( (u, v, k, d) for u, v, k, d in graph.edges(keys=True, data=True) if graph.has_edge_citation(u, v, k) and graph.has_edge_evidence(u, v, k) ) return sorted(qualified_edges, key=_sort_qualified_edges_helper) def _citation_sort_key(t): """Make a confusing 4 tuple sortable by citation. :param tuple t: A 4-tuple of source node, target node, key, and data :rtype: tuple[str,str] """ return '"{}", "{}"'.format(t[3][CITATION][CITATION_TYPE], t[3][CITATION][CITATION_REFERENCE]) def _evidence_sort_key(t): """Make a confusing 4 tuple sortable by citation. :param tuple t: A 4-tuple of source node, target node, key, and data :rtype: str """ return t[3][EVIDENCE] def _set_annotation_to_str(annotation_data, key): """Return a set annotation string. :param dict[str,dict[str,bool] annotation_data: :param key: :return: """ value = annotation_data[key] if len(value) == 1: return 'SET {} = "{}"'.format(key, list(value)[0]) x = ('"{}"'.format(v) for v in sorted(value)) return 'SET {} = {{{}}}'.format(key, ', '.join(x)) def _unset_annotation_to_str(keys): """Return an unset annotation string. :rtype: str """ if len(keys) == 1: return 'UNSET {}'.format(list(keys)[0]) return 'UNSET {{{}}}'.format(', '.join('{}'.format(key) for key in keys)) def _to_bel_lines_header(graph): """Iterate the lines of a BEL graph's corresponding BEL script's header. :param pybel.BELGraph graph: A BEL graph :rtype: iter[str] """ if GOCC_KEYWORD not in graph.namespace_url: graph.namespace_url[GOCC_KEYWORD] = GOCC_LATEST return make_knowledge_header( namespace_url=graph.namespace_url, namespace_patterns=graph.namespace_pattern, annotation_url=graph.annotation_url, annotation_patterns=graph.annotation_pattern, annotation_list=graph.annotation_list, **graph.document ) def group_citation_edges(edges): """Return an iterator over pairs of citation values and their corresponding edge iterators. :param iter[tuple,tuple,int,dict] edges: An iterator over the 4-tuples of edges :rtype: tuple[str,tuple[tuple,tuple,int,dict]] """ return itt.groupby(edges, key=_citation_sort_key) def group_evidence_edges(edges): """Return an iterator over pairs of evidence values and their corresponding edge iterators. :param iter[tuple,tuple,int,dict] edges: An iterator over the 4-tuples of edges :rtype: tuple[str,tuple[tuple,tuple,int,dict]] """ return itt.groupby(edges, key=_evidence_sort_key) def _to_bel_lines_body(graph): """Iterate the lines of a BEL graph's corresponding BEL script's body. :param pybel.BELGraph graph: A BEL graph :rtype: iter[str] """ qualified_edges = sort_qualified_edges(graph) for citation, citation_edges in group_citation_edges(qualified_edges): yield '#' * 80 yield 'SET Citation = {{{}}}'.format(citation) for evidence, evidence_edges in group_evidence_edges(citation_edges): yield 'SET SupportingText = "{}"'.format(evidence) for u, v, _, data in evidence_edges: annotations_data = data.get(ANNOTATIONS) keys = sorted(annotations_data) if annotations_data is not None else tuple() for key in keys: yield _set_annotation_to_str(annotations_data, key) yield graph.edge_to_bel(u, v, data) if keys: yield _unset_annotation_to_str(keys) yield 'UNSET SupportingText' yield 'UNSET Citation' def _to_bel_lines_footer(graph): """Iterate the lines of a BEL graph's corresponding BEL script's footer. :param pybel.BELGraph graph: A BEL graph :rtype: iter[str] """ unqualified_edges_to_serialize = [ (u, v, d) for u, v, d in graph.edges(data=True) if d[RELATION] in UNQUALIFIED_EDGES and EVIDENCE not in d ] isolated_nodes_to_serialize = [ node for node in graph if not graph.pred[node] and not graph.succ[node] ] if unqualified_edges_to_serialize or isolated_nodes_to_serialize: yield '###############################################\n' yield 'SET Citation = {"PubMed","Added by PyBEL","29048466"}' yield 'SET SupportingText = "{}"'.format(PYBEL_AUTOEVIDENCE) for u, v, data in unqualified_edges_to_serialize: yield '{} {} {}'.format(u.as_bel(), data[RELATION], v.as_bel()) for node in isolated_nodes_to_serialize: yield node.as_bel() yield 'UNSET SupportingText' yield 'UNSET Citation' def to_bel_lines(graph): """Return an iterable over the lines of the BEL graph as a canonical BEL Script (.bel). :param pybel.BELGraph graph: the BEL Graph to output as a BEL Script :return: An iterable over the lines of the representative BEL script :rtype: iter[str] """ return itt.chain( _to_bel_lines_header(graph), _to_bel_lines_body(graph), _to_bel_lines_footer(graph) ) def to_bel(graph, file=None): """Output the BEL graph as canonical BEL to the given file/file-like/stream. :param BELGraph graph: the BEL Graph to output as a BEL Script :param file file: A writable file-like object. If None, defaults to standard out. """ for line in to_bel_lines(graph): print(line, file=file) def to_bel_path(graph, path, mode='w', **kwargs): """Write the BEL graph as a canonical BEL Script to the given path. :param BELGraph graph: the BEL Graph to output as a BEL Script :param str path: A file path :param str mode: The file opening mode. Defaults to 'w' """ with open(path, mode=mode, **kwargs) as bel_file: to_bel(graph, bel_file) def calculate_canonical_name(data): """Calculate the canonical name for a given node. If it is a simple node, uses the already given name. Otherwise, it uses the BEL string. :type data: BaseEntity :return: Canonical node name :rtype: str """ if data[FUNCTION] == COMPLEX and NAMESPACE in data: return data[NAME] if VARIANTS in data: return data.as_bel() if FUSION in data: return data.as_bel() if data[FUNCTION] in {REACTION, COMPOSITE, COMPLEX}: return data.as_bel() if VARIANTS not in data and FUSION not in data: # this is should be a simple node if IDENTIFIER in data and NAME in data: return '{namespace}:{identifier} ({name})'.format(**data) if IDENTIFIER in data: return '{namespace}:{identifier}'.format(namespace=data[NAMESPACE], identifier=data[IDENTIFIER]) return data[NAME] raise ValueError('Unexpected node data: {}'.format(data)) pybel-0.12.1/src/pybel/cli.py000066400000000000000000000320431334645200200157320ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Command line interface for PyBEL. Why does this file exist, and why not put this in __main__? You might be tempted to import things from __main__ later, but that will cause problems--the code will get executed twice: - When you run ``python3 -m pybel`` python will execute ``__main__.py`` as a script. That means there won't be any ``pybel.__main__`` in ``sys.modules``. - When you import __main__ it will get executed again (as a module) because there's no ``pybel.__main__`` in ``sys.modules``. Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration """ import logging import os import sys import time import click from click_plugins import with_plugins from pkg_resources import iter_entry_points from .canonicalize import to_bel from .constants import get_cache_connection from .examples import braf_graph, egf_graph, homology_graph, sialic_acid_graph, statin_graph from .io import from_path, from_pickle, to_csv, to_graphml, to_gsea, to_json_file, to_neo4j, to_pickle, to_sif, to_web from .io.web import _get_host from .manager import Manager from .manager.database_io import to_database from .manager.models import Edge, Namespace from .struct import get_unused_annotations, get_unused_namespaces from .utils import get_corresponding_pickle_path log = logging.getLogger(__name__) def _page(it): click.echo_via_pager('\n'.join(map(str, it))) connection_option = click.option( '-c', '--connection', default=get_cache_connection(), show_default=True, help='Database connection string.', ) host_option = click.option('--host', help='URL of BEL Commons. Defaults to {}'.format(_get_host())) def _from_pickle_callback(ctx, param, file): path = file.name if not path.endswith('.bel'): return from_pickle(file) cache_path = get_corresponding_pickle_path(path) if not os.path.exists(cache_path): click.echo('The BEL script {path} has not yet been compiled. First, try running the following command:\n\n ' 'pybel compile {path}\n'.format(path=path)) sys.exit(1) return from_pickle(cache_path) graph_pickle_argument = click.argument( 'graph', metavar='path', type=click.File('rb'), callback=_from_pickle_callback, ) @with_plugins(iter_entry_points('pybel.cli_plugins')) @click.group(help="PyBEL Command Line Interface on {}".format(sys.executable)) @click.version_option() @connection_option @click.pass_context def main(ctx, connection): """PyBEL Command Line.""" ctx.obj = Manager(connection=connection) ctx.obj.bind() # add the engine to the metadata and query property to the session @main.command() @click.argument('path') @click.option('--allow-naked-names', is_flag=True, help="Enable lenient parsing for naked names") @click.option('--allow-nested', is_flag=True, help="Enable lenient parsing for nested statements") @click.option('--disallow-unqualified-translocations', is_flag=True, help="Disallow unqualified translocations") @click.option('--no-identifier-validation', is_flag=True, help='Turn off identifier validation') @click.option('--no-citation-clearing', is_flag=True, help='Turn off citation clearing') @click.option('-r', '--required-annotations', multiple=True, help='Specify multiple required annotations') @click.option('--skip-tqdm', is_flag=True) @click.option('-v', '--verbose', is_flag=True) @click.pass_obj def compile(manager, path, allow_naked_names, allow_nested, disallow_unqualified_translocations, no_identifier_validation, no_citation_clearing, required_annotations, skip_tqdm, verbose): """Compile a BEL script to a graph.""" if verbose: logging.basicConfig(level=logging.DEBUG) log.setLevel(logging.DEBUG) log.debug('using connection: %s', manager.engine.url) click.secho('Compilation', fg='red', bold=True) if skip_tqdm: click.echo('```') graph = from_path( path, manager=manager, use_tqdm=(not (skip_tqdm or verbose)), allow_nested=allow_nested, allow_naked_names=allow_naked_names, disallow_unqualified_translocations=disallow_unqualified_translocations, citation_clearing=(not no_citation_clearing), required_annotations=required_annotations, no_identifier_validation=no_identifier_validation, allow_definition_failures=True, ) if skip_tqdm: click.echo('```') to_pickle(graph, get_corresponding_pickle_path(path)) click.echo('') _print_summary(graph, ticks=skip_tqdm) sys.exit(0 if 0 == len(graph.warnings) else 1) @main.command() @graph_pickle_argument def summarize(graph): """Summarize a graph.""" _print_summary(graph) def _print_summary(graph, ticks=False): if not ticks: click.secho('Summary', fg='red', bold=True) graph.summarize() click.secho('\nUnused Namespaces', fg='red', bold=True) if ticks: click.echo('```') for namespace in sorted(get_unused_namespaces(graph)): click.echo(namespace) if ticks: click.echo('```') click.secho('\nUnused Annotations', fg='red', bold=True) if ticks: click.echo('```') for annotation in sorted(get_unused_annotations(graph)): click.echo(annotation) if ticks: click.echo('```') @main.command() @graph_pickle_argument def warnings(graph): """List warnings from a graph.""" echo_warnings_via_pager(graph.warnings) @main.command() @graph_pickle_argument @click.pass_obj def insert(manager, graph): """Insert a graph to the database.""" to_database(graph, manager=manager, use_tqdm=True) @main.command() @graph_pickle_argument @host_option def post(graph, host): """Upload a graph to BEL Commons.""" resp = to_web(graph, host=host) resp.raise_for_status() @main.command() @graph_pickle_argument @click.option('--csv', type=click.File('w'), help='Path to output a CSV file.') @click.option('--sif', type=click.File('w'), help='Path to output an SIF file.') @click.option('--gsea', type=click.File('w'), help='Path to output a GRP file for gene set enrichment analysis.') @click.option('--graphml', help='Path to output a GraphML file. Use .graphml for Cytoscape.') @click.option('--json', type=click.File('w'), help='Path to output a node-link JSON file.') @click.option('--bel', type=click.File('w'), help='Output canonical BEL.') def serialize(graph, csv, sif, gsea, graphml, json, bel): """Serialize a graph to various formats.""" if csv: log.info('Outputting CSV to %s', csv) to_csv(graph, csv) if sif: log.info('Outputting SIF to %s', sif) to_sif(graph, sif) if graphml: log.info('Outputting GraphML to %s', graphml) to_graphml(graph, graphml) if gsea: log.info('Outputting GRP to %s', gsea) to_gsea(graph, gsea) if json: log.info('Outputting JSON to %s', json) to_json_file(graph, json) if bel: log.info('Outputting BEL to %s', bel) to_bel(graph, bel) @main.command() @graph_pickle_argument @click.option('--connection', default='http://localhost:7474/db/data/', help='Connection string for neo4j upload.') @click.password_option() def neo(graph, connection, password): """Upload to neo4j.""" import py2neo neo_graph = py2neo.Graph(connection, password=password) to_neo4j(graph, neo_graph) @main.command() @click.pass_obj @click.argument('agents', nargs=-1) @click.option('--local', is_flag=True, help='Upload to local database.') @host_option def machine(manager, agents, local, host): """Get content from the INDRA machine and upload to BEL Commons.""" from indra.sources import indra_db_rest from pybel import from_indra_statements statements = indra_db_rest.get_statements(agents=agents) click.echo('got {} statements from INDRA'.format(len(statements))) graph = from_indra_statements( statements, name='INDRA Machine for {}'.format(', '.join(sorted(agents))), version=time.strftime('%Y%m%d'), ) click.echo('built BEL graph with {} nodes and {} edges'.format(graph.number_of_nodes(), graph.number_of_edges())) if 0 == len(graph): click.echo('not uploading empty graph') sys.exit(-1) if local: to_database(graph, manager=manager) else: resp = to_web(graph, host=host) resp.raise_for_status() @main.group() def manage(): """Manage the database.""" @manage.command() @click.option('-y', '--yes', is_flag=True) @click.pass_obj def drop(manager, yes): """Drop the database.""" if yes or click.confirm('Drop database?'): manager.drop_all() @manage.command() @click.pass_obj def examples(manager): """Load examples to the database.""" for graph in (sialic_acid_graph, statin_graph, homology_graph, braf_graph, egf_graph): if manager.has_name_version(graph.name, graph.version): click.echo('already inserted {}'.format(graph)) continue click.echo('inserting {}'.format(graph)) manager.insert_graph(graph, use_tqdm=True) @manage.group() def namespaces(): """Manage namespaces.""" @namespaces.command() @click.argument('url') @click.pass_obj def insert(manager, url): """Add a namespace by URL.""" manager.get_or_create_namespace(url) def _ls(manager, model_cls, model_id): if model_id: n = manager.session.query(model_cls).get(model_id) _page(n.entries) else: for n in manager.session.query(model_cls).order_by(model_cls.uploaded.desc()): click.echo('\t'.join(map(str, (n.id, n.keyword, n.version, n.url)))) @namespaces.command() @click.option('--url', help='Specific resource URL to list') @click.option('-i', '--namespace-id', help='Specific resource URL to list') @click.pass_obj def ls(manager, url, namespace_id): """List cached namespaces.""" if url: n = manager.get_or_create_namespace(url) _page(n.entries) else: _ls(manager, Namespace, namespace_id) @namespaces.command() @click.argument('url') @click.pass_obj def drop(manager, url): """Drop a namespace by URL.""" manager.drop_namespace_by_url(url) @manage.group() def networks(): """Manage networks.""" @networks.command() @click.pass_obj def ls(manager): """List network names, versions, and optionally, descriptions.""" for n in manager.list_networks(): click.echo('{}\t{}\t{}'.format(n.id, n.name, n.version)) @networks.command() @click.option('-n', '--network-id', type=int, help='Identifier of network to drop') @click.option('-y', '--yes', is_flag=True, help='Drop all networks without confirmation if no identifier is given') @click.pass_obj def drop(manager, network_id, yes): """Drop a network by its identifier or drop all networks.""" if network_id: manager.drop_network_by_id(network_id) elif yes or click.confirm('Drop all networks?'): manager.drop_networks() @manage.group() def edges(): """Manage edges.""" @edges.command() @click.option('--offset', type=int) @click.option('--limit', type=int, default=10) @click.pass_obj def ls(manager, offset, limit): """List edges.""" q = manager.session.query(Edge) if offset: q = q.offset(offset) if limit > 0: q = q.limit(limit) for e in q: click.echo(e.bel) @manage.command() @click.pass_obj def summarize(manager): """Summarize the contents of the database.""" click.echo('Networks: {}'.format(manager.count_networks())) click.echo('Edges: {}'.format(manager.count_edges())) click.echo('Nodes: {}'.format(manager.count_nodes())) click.echo('Namespaces: {}'.format(manager.count_namespaces())) click.echo('Namespaces entries: {}'.format(manager.count_namespace_entries())) click.echo('Annotations: {}'.format(manager.count_annotations())) click.echo('Annotation entries: {}'.format(manager.count_annotation_entries())) def echo_warnings_via_pager(warnings, sep='\t'): """Output the warnings from a BEL graph with Click and the system's pager. :param warnings: A list of 4-tuples reprenting the warnings :param str sep: The separator. Defaults to tab. """ # Exit if no warnings if not warnings: click.echo('Congratulations! No warnings.') sys.exit(0) max_line_width = max( len(str(line_number)) for line_number, _, _, _ in warnings ) max_warning_width = max( len(exc.__class__.__name__) for _, _, exc, _ in warnings ) s1 = '{:>' + str(max_line_width) + '}' + sep s2 = '{:>' + str(max_warning_width) + '}' + sep def _make_line(line_number, line, exc): s = click.style(s1.format(line_number), fg='blue', bold=True) if exc.__class__.__name__.endswith('Error'): s += click.style(s2.format(exc.__class__.__name__), fg='red') else: s += click.style(s2.format(exc.__class__.__name__), fg='yellow') s += click.style(line, bold=True) + sep s += click.style(str(exc)) return s click.echo_via_pager('\n'.join( _make_line(line_number, line, exc) for line_number, line, exc, _ in warnings )) if __name__ == '__main__': main() pybel-0.12.1/src/pybel/constants.py000066400000000000000000000530141334645200200172000ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Constants for PyBEL. This module maintains the strings used throughout the PyBEL codebase to promote consistency. Configuration Loading --------------------- By default, PyBEL loads its configuration from ``~/.config/pybel/config.json``. This json is stored in the object :data:`pybel.constants.config`. """ from json import load from logging import getLogger from os import environ, makedirs, mkdir, path log = getLogger(__name__) VERSION = '0.12.1' #: The last PyBEL version where the graph data definition changed PYBEL_MINIMUM_IMPORT_VERSION = 0, 12, 0 BELFRAMEWORK_DOMAIN = 'http://resource.belframework.org' OPENBEL_DOMAIN = 'http://resources.openbel.org' SMALL_CORPUS_URL = OPENBEL_DOMAIN + '/belframework/20150611/knowledge/small_corpus.bel' LARGE_CORPUS_URL = OPENBEL_DOMAIN + '/belframework/20150611/knowledge/large_corpus.bel' FRAUNHOFER_RESOURCES = 'https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=' OPENBEL_NAMESPACE_RESOURCES = OPENBEL_DOMAIN + '/belframework/20150611/namespace/' OPENBEL_ANNOTATION_RESOURCES = OPENBEL_DOMAIN + '/belframework/20150611/annotation/' #: GOCC is the only namespace that needs to be stored because translocations use some of its values by default GOCC_LATEST = 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-cellular-component/go-cellular-component-20170511.belns' GOCC_KEYWORD = 'GOCC' #: The environment variable that contains the default SQL connection information for the PyBEL cache PYBEL_CONNECTION = 'PYBEL_CONNECTION' #: The default directory where PyBEL files, including logs and the default cache, are stored. Created if not exists. PYBEL_DIR = environ.get('PYBEL_RESOURCE_DIRECTORY', path.join(path.expanduser('~'), '.pybel')) if not path.exists(PYBEL_DIR): try: mkdir(PYBEL_DIR) except FileExistsError: log.debug('pybel data directory was created already: %s', PYBEL_DIR) DEFAULT_CACHE_NAME = 'pybel_{}.{}.{}_cache.db'.format(*PYBEL_MINIMUM_IMPORT_VERSION) #: The default cache location is ``~/.pybel/data/pybel_cache.db`` DEFAULT_CACHE_LOCATION = path.join(PYBEL_DIR, DEFAULT_CACHE_NAME) #: The default cache connection string uses sqlite. DEFAULT_CACHE_CONNECTION = 'sqlite:///' + DEFAULT_CACHE_LOCATION def get_config_dir(): """Return the path to the directory where configuration is stored for PyBEL. Can be overridden by setting the environment variable ``PYBEL_CONFIG_DIRECTORY``. :rtype: str """ return environ.get('PYBEL_CONFIG_DIRECTORY', path.join(path.expanduser('~'), '.config', 'pybel')) _config_dir = get_config_dir() if not path.exists(_config_dir): try: makedirs(_config_dir) except FileExistsError: log.debug('config folder was already created: %s', _config_dir) #: The global configuration for PyBEL is stored here. By default, it loads from ``~/.config/pybel/config.json`` config = { PYBEL_CONNECTION: DEFAULT_CACHE_CONNECTION } def get_config_path(): """Return the path of the configuration file. By default, should just be a file called ``config.json`` inside the directory returned by :func:`get_config_dir`. :rtype: str """ return path.join(_config_dir, 'config.json') _config_path = get_config_path() if path.exists(_config_path): with open(_config_path) as f: config.update(load(f)) def get_cache_connection(): """Get the preferred RFC-1738 database connection string. 1. Check the environment variable ``PYBEL_CONNECTION`` 2. Check the ``PYBEL_CONNECTION`` key in the config file ``~/.config/pybel/config.json``. Optionally, this config file might be in a different place if the environment variable ``PYBEL_CONFIG_DIRECTORY`` has been set. 3. Return a default connection string using a SQLite database in the ``~/.pybel``. Optionally, this directory might be in a different place if the environment variable ``PYBEL_RESOURCE_DIRECTORY`` has been set. :rtype: str """ connection = environ.get(PYBEL_CONNECTION) if connection is not None: log.info('getting environment-defined connection: %s', connection) return connection connection = config.get(PYBEL_CONNECTION) if connection is not None: log.info('getting configured connection %s', connection) return connection log.debug('using default connection %s', DEFAULT_CACHE_CONNECTION) return DEFAULT_CACHE_CONNECTION PYBEL_CONTEXT_TAG = 'pybel_context' PYBEL_AUTOEVIDENCE = 'Automatically added by PyBEL' #: The default namespace given to entities in the BEL language BEL_DEFAULT_NAMESPACE = 'bel' CITATION_TYPE_BOOK = 'Book' CITATION_TYPE_PUBMED = 'PubMed' CITATION_TYPE_PMC = 'PubMed Central' CITATION_TYPE_JOURNAL = 'Journal' CITATION_TYPE_ONLINE = 'Online Resource' CITATION_TYPE_URL = 'URL' CITATION_TYPE_DOI = 'DOI' CITATION_TYPE_OTHER = 'Other' #: The valid citation types #: .. seealso:: https://wiki.openbel.org/display/BELNA/Citation CITATION_TYPES = { CITATION_TYPE_BOOK, CITATION_TYPE_PUBMED, CITATION_TYPE_PMC, CITATION_TYPE_JOURNAL, CITATION_TYPE_ONLINE, CITATION_TYPE_URL, CITATION_TYPE_DOI, CITATION_TYPE_OTHER } NAMESPACE_DOMAIN_BIOPROCESS = 'BiologicalProcess' NAMESPACE_DOMAIN_CHEMICAL = 'Chemical' NAMESPACE_DOMAIN_GENE = 'Gene and Gene Products' NAMESPACE_DOMAIN_OTHER = 'Other' #: The valid namespace types #: .. seealso:: https://wiki.openbel.org/display/BELNA/Custom+Namespaces NAMESPACE_DOMAIN_TYPES = { NAMESPACE_DOMAIN_BIOPROCESS, NAMESPACE_DOMAIN_CHEMICAL, NAMESPACE_DOMAIN_GENE, NAMESPACE_DOMAIN_OTHER } #: Represents the key for the citation type in a citation dictionary CITATION_TYPE = 'type' #: Represents the key for the citation name in a citation dictionary CITATION_NAME = 'name' #: Represents the key for the citation reference in a citation dictionary CITATION_REFERENCE = 'reference' #: Represents the key for the citation date in a citation dictionary CITATION_DATE = 'date' #: Represents the key for the citation authors in a citation dictionary CITATION_AUTHORS = 'authors' #: Represents the key for the citation comment in a citation dictionary CITATION_COMMENTS = 'comments' #: Represents the key for the optional PyBEL citation title entry in a citation dictionary CITATION_TITLE = 'title' #: Represents the key for the optional PyBEL citation volume entry in a citation dictionary CITATION_VOLUME = 'volume' #: Represents the key for the optional PyBEL citation issue entry in a citation dictionary CITATION_ISSUE = 'issue' #: Represents the key for the optional PyBEL citation pages entry in a citation dictionary CITATION_PAGES = 'pages' #: Represents the key for the optional PyBEL citation first author entry in a citation dictionary CITATION_FIRST_AUTHOR = 'first' #: Represents the key for the optional PyBEL citation last author entry in a citation dictionary CITATION_LAST_AUTHOR = 'last' #: Represents the ordering of the citation entries in a control statement (SET Citation = ...) CITATION_ENTRIES = CITATION_TYPE, CITATION_NAME, CITATION_REFERENCE, CITATION_DATE, CITATION_AUTHORS, CITATION_COMMENTS # Used during BEL parsing MODIFIER = 'modifier' EFFECT = 'effect' TARGET = 'target' FROM_LOC = 'fromLoc' TO_LOC = 'toLoc' LOCATION = 'location' ACTIVITY = 'Activity' DEGRADATION = 'Degradation' TRANSLOCATION = 'Translocation' CELL_SECRETION = 'CellSecretion' CELL_SURFACE_EXPRESSION = 'CellSurfaceExpression' # Internal node data format keys #: The node data key specifying the node's function (e.g. :data:`GENE`, :data:`MIRNA`, :data:`BIOPROCESS`, etc.) FUNCTION = 'function' #: The key specifying an identifier dictionary's namespace. Used for nodes, activities, and transformations. NAMESPACE = 'namespace' #: The key specifying an identifier dictionary's name. Used for nodes, activities, and transformations. NAME = 'name' #: The key specifying an identifier dictionary IDENTIFIER = 'identifier' #: The key specifying an optional label for the node LABEL = 'label' #: The key specifying an optional description for the node DESCRIPTION = 'description' #: They key representing the nodes that are a member of a composite or complex MEMBERS = 'members' #: The key representing the nodes appearing in the reactant side of a biochemical reaction REACTANTS = 'reactants' #: The key representing the nodes appearing in the product side of a biochemical reaction PRODUCTS = 'products' #: The node data key specifying a fusion dictionary, containing :data:`PARTNER_3P`, :data:`PARTNER_5P`, # :data:`RANGE_3P`, and :data:`RANGE_5P` FUSION = 'fusion' #: The key specifying the identifier dictionary of the fusion's 3-Prime partner PARTNER_3P = 'partner_3p' #: The key specifying the identifier dictionary of the fusion's 5-Prime partner PARTNER_5P = 'partner_5p' #: The key specifying the range dictionary of the fusion's 3-Prime partner RANGE_3P = 'range_3p' #: The key specifying the range dictionary of the fusion's 5-Prime partner RANGE_5P = 'range_5p' FUSION_REFERENCE = 'reference' FUSION_START = 'left' FUSION_STOP = 'right' FUSION_MISSING = 'missing' #: The key specifying the node has a list of associated variants VARIANTS = 'variants' #: The key representing what kind of variation is being represented KIND = 'kind' #: The value for :data:`KIND` for an HGVS variant HGVS = 'hgvs' #: The value for :data:`KIND` for a protein modification PMOD = 'pmod' #: The value for :data:`KIND` for a gene modification GMOD = 'gmod' #: The value for :data:`KIND` for a fragment FRAGMENT = 'frag' #: The allowed values for :data:`KIND` PYBEL_VARIANT_KINDS = { HGVS, PMOD, GMOD, FRAGMENT } #: The group of all BEL-provided keys for node data dictionaries, used for hashing. PYBEL_NODE_DATA_KEYS = { FUNCTION, NAMESPACE, NAME, IDENTIFIER, VARIANTS, FUSION, MEMBERS, REACTANTS, PRODUCTS, } #: Used as a namespace when none is given when lenient parsing mode is turned on. Not recommended! DIRTY = 'dirty' #: Represents the BEL abundance, abundance() ABUNDANCE = 'Abundance' #: Represents the BEL abundance, geneAbundance() #: .. seealso:: http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xabundancea GENE = 'Gene' #: Represents the BEL abundance, rnaAbundance() RNA = 'RNA' #: Represents the BEL abundance, microRNAAbundance() MIRNA = 'miRNA' #: Represents the BEL abundance, proteinAbundance() PROTEIN = 'Protein' #: Represents the BEL function, biologicalProcess() BIOPROCESS = 'BiologicalProcess' #: Represents the BEL function, pathology() PATHOLOGY = 'Pathology' #: Represents the BEL abundance, compositeAbundance() COMPOSITE = 'Composite' #: Represents the BEL abundance, complexAbundance() COMPLEX = 'Complex' #: Represents the BEL transformation, reaction() REACTION = 'Reaction' #: A set of all of the valid PyBEL node functions PYBEL_NODE_FUNCTIONS = { ABUNDANCE, GENE, RNA, MIRNA, PROTEIN, BIOPROCESS, PATHOLOGY, COMPOSITE, COMPLEX, REACTION } #: The mapping from PyBEL node functions to BEL strings rev_abundance_labels = { ABUNDANCE: 'a', GENE: 'g', MIRNA: 'm', PROTEIN: 'p', RNA: 'r', BIOPROCESS: 'bp', PATHOLOGY: 'path', COMPLEX: 'complex', COMPOSITE: 'composite' } # Internal edge data keys #: The key for an internal edge data dictionary for the relation string RELATION = 'relation' #: The key for an internal edge data dictionary for the citation dictionary CITATION = 'citation' #: The key for an internal edge data dictionary for the evidence string EVIDENCE = 'evidence' #: The key for an internal edge data dictionary for the annotations dictionary ANNOTATIONS = 'annotations' #: The key for an internal edge data dictionary for the subject modifier dictionary SUBJECT = 'subject' #: The key for an internal edge data dictionary for the object modifier dictionary OBJECT = 'object' #: The key or an internal edge data dictionary for the line number LINE = 'line' #: The key representing the hash of the other HASH = 'hash' #: The group of all BEL-provided keys for edge data dictionaries, used for hashing. PYBEL_EDGE_DATA_KEYS = { RELATION, CITATION, EVIDENCE, ANNOTATIONS, SUBJECT, OBJECT, } #: The group of all PyBEL-specific keys for edge data dictionaries, not used for hashing. PYBEL_EDGE_METADATA_KEYS = { LINE, HASH, } #: The group of all PyBEL annotated keys for edge data dictionaries PYBEL_EDGE_ALL_KEYS = PYBEL_EDGE_DATA_KEYS | PYBEL_EDGE_METADATA_KEYS #: A BEL relationship HAS_REACTANT = 'hasReactant' #: A BEL relationship HAS_PRODUCT = 'hasProduct' #: A BEL relationship HAS_COMPONENT = 'hasComponent' #: A BEL relationship HAS_VARIANT = 'hasVariant' #: A BEL relationship HAS_MEMBER = 'hasMember' #: A BEL relationship #: :data:`GENE` to :data:`RNA` is called transcription TRANSCRIBED_TO = 'transcribedTo' #: A BEL relationship #: :data:`RNA` to :data:`PROTEIN` is called translation TRANSLATED_TO = 'translatedTo' #: A BEL relationship INCREASES = 'increases' #: A BEL relationship DIRECTLY_INCREASES = 'directlyIncreases' #: A BEL relationship DECREASES = 'decreases' #: A BEL relationship DIRECTLY_DECREASES = 'directlyDecreases' #: A BEL relationship CAUSES_NO_CHANGE = 'causesNoChange' #: A BEL relationship REGULATES = 'regulates' #: A BEL relationship NEGATIVE_CORRELATION = 'negativeCorrelation' #: A BEL relationship POSITIVE_CORRELATION = 'positiveCorrelation' #: A BEL relationship ASSOCIATION = 'association' #: A BEL relationship ORTHOLOGOUS = 'orthologous' #: A BEL relationship ANALOGOUS_TO = 'analogousTo' #: A BEL relationship IS_A = 'isA' #: A BEL relationship RATE_LIMITING_STEP_OF = 'rateLimitingStepOf' #: A BEL relationship SUBPROCESS_OF = 'subProcessOf' #: A BEL relationship BIOMARKER_FOR = 'biomarkerFor' #: A BEL relationship PROGONSTIC_BIOMARKER_FOR = 'prognosticBiomarkerFor' #: A BEL relationship, added by PyBEL EQUIVALENT_TO = 'equivalentTo' #: A BEL relationship, added by PyBEL PART_OF = 'partOf' #: A set of all causal relationships that have an increasing effect CAUSAL_INCREASE_RELATIONS = {INCREASES, DIRECTLY_INCREASES} #: A set of all causal relationships that have a decreasing effect CAUSAL_DECREASE_RELATIONS = {DECREASES, DIRECTLY_DECREASES} #: A set of direct causal relations DIRECT_CAUSAL_RELATIONS = {DIRECTLY_DECREASES, DIRECTLY_INCREASES} #: A set of direct causal relations INDIRECT_CAUSAL_RELATIONS = {DECREASES, INCREASES, REGULATES} #: A set of causal relationships that are polar CAUSAL_POLAR_RELATIONS = CAUSAL_INCREASE_RELATIONS | CAUSAL_DECREASE_RELATIONS #: A set of all causal relationships CAUSAL_RELATIONS = CAUSAL_INCREASE_RELATIONS | CAUSAL_DECREASE_RELATIONS | {REGULATES} #: A set of all relationships that are inherently directionless, and are therefore added to the graph twice TWO_WAY_RELATIONS = { NEGATIVE_CORRELATION, POSITIVE_CORRELATION, ASSOCIATION, ORTHOLOGOUS, ANALOGOUS_TO, EQUIVALENT_TO, } #: A set of all correlative relationships CORRELATIVE_RELATIONS = { POSITIVE_CORRELATION, NEGATIVE_CORRELATION } #: A set of polar relations POLAR_RELATIONS = CAUSAL_POLAR_RELATIONS | CORRELATIVE_RELATIONS #: A list of relationship types that don't require annotations or evidence UNQUALIFIED_EDGES = { HAS_REACTANT, HAS_PRODUCT, HAS_COMPONENT, HAS_VARIANT, TRANSCRIBED_TO, TRANSLATED_TO, HAS_MEMBER, IS_A, EQUIVALENT_TO, PART_OF, ORTHOLOGOUS, } # BEL Keywords BEL_KEYWORD_SET = 'SET' BEL_KEYWORD_DOCUMENT = 'DOCUMENT' BEL_KEYWORD_DEFINE = 'DEFINE' BEL_KEYWORD_NAMESPACE = 'NAMESPACE' BEL_KEYWORD_ANNOTATION = 'ANNOTATION' BEL_KEYWORD_AS = 'AS' BEL_KEYWORD_URL = 'URL' BEL_KEYWORD_LIST = 'LIST' BEL_KEYWORD_OWL = 'OWL' BEL_KEYWORD_PATTERN = 'PATTERN' BEL_KEYWORD_UNSET = 'UNSET' BEL_KEYWORD_STATEMENT_GROUP = 'STATEMENT_GROUP' BEL_KEYWORD_CITATION = 'Citation' BEL_KEYWORD_EVIDENCE = 'Evidence' BEL_KEYWORD_SUPPORT = 'SupportingText' BEL_KEYWORD_ALL = 'ALL' BEL_KEYWORD_METADATA_NAME = 'Name' BEL_KEYWORD_METADATA_VERSION = 'Version' BEL_KEYWORD_METADATA_DESCRIPTION = 'Description' BEL_KEYWORD_METADATA_AUTHORS = 'Authors' BEL_KEYWORD_METADATA_CONTACT = 'ContactInfo' BEL_KEYWORD_METADATA_LICENSES = 'Licenses' BEL_KEYWORD_METADATA_COPYRIGHT = 'Copyright' BEL_KEYWORD_METADATA_DISCLAIMER = 'Disclaimer' BEL_KEYWORD_METADATA_PROJECT = 'Project' # Internal metadata representation. See BELGraph documentation, since these are shielded from the user by properties. #: The key for the document metadata dictionary. Can be accessed by :code:`graph.graph[GRAPH_METADATA]`, or by using #: the property built in to the :class:`pybel.BELGraph`, :func:`pybel.BELGraph.document` GRAPH_METADATA = 'document_metadata' GRAPH_NAMESPACE_URL = 'namespace_url' GRAPH_NAMESPACE_PATTERN = 'namespace_pattern' GRAPH_ANNOTATION_URL = 'annotation_url' GRAPH_ANNOTATION_PATTERN = 'annotation_pattern' GRAPH_ANNOTATION_LIST = 'annotation_list' GRAPH_WARNINGS = 'warnings' GRAPH_PYBEL_VERSION = 'pybel_version' GRAPH_UNCACHED_NAMESPACES = 'namespaces_uncached' #: The key for the document name. Can be accessed by :code:`graph.document[METADATA_NAME]` or by using the property #: built into the :class:`pybel.BELGraph` class, :func:`pybel.BELGraph.name` METADATA_NAME = 'name' #: The key for the document version. Can be accessed by :code:`graph.document[METADATA_VERSION]` METADATA_VERSION = 'version' #: The key for the document description. Can be accessed by :code:`graph.document[METADATA_DESCRIPTION]` METADATA_DESCRIPTION = 'description' #: The key for the document authors. Can be accessed by :code:`graph.document[METADATA_NAME]` METADATA_AUTHORS = 'authors' #: The key for the document contact email. Can be accessed by :code:`graph.document[METADATA_CONTACT]` METADATA_CONTACT = 'contact' #: The key for the document licenses. Can be accessed by :code:`graph.document[METADATA_LICENSES]` METADATA_LICENSES = 'licenses' #: The key for the document copyright information. Can be accessed by :code:`graph.document[METADATA_COPYRIGHT]` METADATA_COPYRIGHT = 'copyright' #: The key for the document disclaimer. Can be accessed by :code:`graph.document[METADATA_DISCLAIMER]` METADATA_DISCLAIMER = 'disclaimer' #: The key for the document project. Can be accessed by :code:`graph.document[METADATA_PROJECT]` METADATA_PROJECT = 'project' #: Provides a mapping from BEL language keywords to internal PyBEL strings DOCUMENT_KEYS = { BEL_KEYWORD_METADATA_AUTHORS: METADATA_AUTHORS, BEL_KEYWORD_METADATA_CONTACT: METADATA_CONTACT, BEL_KEYWORD_METADATA_COPYRIGHT: METADATA_COPYRIGHT, BEL_KEYWORD_METADATA_DESCRIPTION: METADATA_DESCRIPTION, BEL_KEYWORD_METADATA_DISCLAIMER: METADATA_DISCLAIMER, BEL_KEYWORD_METADATA_LICENSES: METADATA_LICENSES, BEL_KEYWORD_METADATA_NAME: METADATA_NAME, BEL_KEYWORD_METADATA_VERSION: METADATA_VERSION, BEL_KEYWORD_METADATA_PROJECT: METADATA_PROJECT, } #: The keys to use when inserting a graph to the cache METADATA_INSERT_KEYS = { METADATA_NAME, METADATA_VERSION, METADATA_DESCRIPTION, METADATA_AUTHORS, METADATA_CONTACT, METADATA_LICENSES, METADATA_COPYRIGHT, METADATA_DISCLAIMER, } #: Provides a mapping from internal PyBEL strings to BEL language keywords. Is the inverse of :data:`DOCUMENT_KEYS` INVERSE_DOCUMENT_KEYS = {v: k for k, v in DOCUMENT_KEYS.items()} #: A set representing the required metadata during BEL document parsing REQUIRED_METADATA = { METADATA_NAME, METADATA_VERSION, METADATA_DESCRIPTION, METADATA_AUTHORS, METADATA_CONTACT } # Modifier parser constants #: The key for the starting position of a fragment range FRAGMENT_START = 'start' #: The key for the stopping position of a fragment range FRAGMENT_STOP = 'stop' #: The key signifying that there is neither a start nor stop position defined FRAGMENT_MISSING = 'missing' #: The key for any additional descriptive data about a fragment FRAGMENT_DESCRIPTION = 'description' #: The order for serializing gene modification data GMOD_ORDER = [KIND, IDENTIFIER] #: The key for the reference nucleotide in a gene substitution. #: Only used during parsing since this is converted to HGVS. GSUB_REFERENCE = 'reference' #: The key for the position of a gene substitution. #: Only used during parsing since this is converted to HGVS GSUB_POSITION = 'position' #: The key for the effect of a gene substitution. #: Only used during parsing since this is converted to HGVS GSUB_VARIANT = 'variant' #: The key for the protein modification code. PMOD_CODE = 'code' #: The key for the protein modification position. PMOD_POSITION = 'pos' #: The order for serializing information about a protein modification PMOD_ORDER = [KIND, IDENTIFIER, PMOD_CODE, PMOD_POSITION] #: The key for the reference amino acid in a protein substitution. #: Only used during parsing since this is concerted to HGVS PSUB_REFERENCE = 'reference' #: The key for the position of a protein substitution. Only used during parsing since this is converted to HGVS. PSUB_POSITION = 'position' #: The key for the variant of a protein substitution.Only used during parsing since this is converted to HGVS. PSUB_VARIANT = 'variant' #: The key for the position at which a protein is truncated TRUNCATION_POSITION = 'position' #: The mapping from BEL namespace codes to PyBEL internal abundance constants #: ..seealso:: https://wiki.openbel.org/display/BELNA/Assignment+of+Encoding+%28Allowed+Functions%29+for+BEL+Namespaces belns_encodings = { 'G': {GENE}, 'R': {RNA, MIRNA}, 'P': {PROTEIN}, 'M': {MIRNA}, 'A': {ABUNDANCE, RNA, MIRNA, PROTEIN, GENE, COMPLEX}, 'B': {PATHOLOGY, BIOPROCESS}, 'O': {PATHOLOGY}, 'C': {COMPLEX} } BELNS_ENCODING_STR = ''.join(sorted(belns_encodings)) PYBEL_REMOTE_HOST = 'PYBEL_REMOTE_HOST' PYBEL_REMOTE_USER = 'PYBEL_REMOTE_USER' PYBEL_REMOTE_PASSWORD = 'PYBEL_REMOTE_PASSWORD' #: The default location of PyBEL Web DEFAULT_SERVICE_URL = 'https://bel-commons.scai.fraunhofer.de' pybel-0.12.1/src/pybel/dsl/000077500000000000000000000000001334645200200153715ustar00rootroot00000000000000pybel-0.12.1/src/pybel/dsl/__init__.py000066400000000000000000000006511334645200200175040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """An internal domain-specific language (DSL) for BEL.""" from . import constants, edges, exc, node_classes, nodes, utils from .constants import * from .edges import * from .exc import * from .node_classes import * from .nodes import * from .utils import * __all__ = ( constants.__all__ + edges.__all__ + exc.__all__ + node_classes.__all__ + nodes.__all__ + utils.__all__ ) pybel-0.12.1/src/pybel/dsl/constants.py000066400000000000000000000015711334645200200177630ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Convenient dictionaries for mapping constants to DSL classes.""" from .node_classes import ( Abundance, BiologicalProcess, ComplexAbundance, CompositeAbundance, Gene, GeneFusion, MicroRna, NamedComplexAbundance, Pathology, Protein, ProteinFusion, Rna, RnaFusion, ) from ..constants import ABUNDANCE, BIOPROCESS, COMPLEX, COMPOSITE, GENE, MIRNA, PATHOLOGY, PROTEIN, RNA __all__ = [ 'FUNC_TO_DSL', 'FUNC_TO_FUSION_DSL', 'FUNC_TO_LIST_DSL' ] FUNC_TO_DSL = { PROTEIN: Protein, RNA: Rna, MIRNA: MicroRna, GENE: Gene, PATHOLOGY: Pathology, BIOPROCESS: BiologicalProcess, COMPLEX: NamedComplexAbundance, ABUNDANCE: Abundance, } FUNC_TO_FUSION_DSL = { GENE: GeneFusion, RNA: RnaFusion, PROTEIN: ProteinFusion, } FUNC_TO_LIST_DSL = { COMPLEX: ComplexAbundance, COMPOSITE: CompositeAbundance } pybel-0.12.1/src/pybel/dsl/edges.py000066400000000000000000000104621334645200200170350ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Internal DSL functions for edges.""" from .utils import entity from ..constants import ( ACTIVITY, BEL_DEFAULT_NAMESPACE, DEGRADATION, EFFECT, FROM_LOC, LOCATION, MODIFIER, TO_LOC, TRANSLOCATION, ) __all__ = [ 'activity', 'degradation', 'translocation', 'extracellular', 'intracellular', 'secretion', 'cell_surface_expression', 'location', ] intracellular = entity(name='intracellular', namespace='GOCC') extracellular = entity(name='extracellular space', namespace='GOCC') surface = entity(name='cell surface', namespace='GOCC') def _activity_helper(modifier, location=None): """Make an activity dictionary. :param str modifier: :param Optional[dict] location: An entity from :func:`pybel.dsl.entity` :rtype: dict """ rv = {MODIFIER: modifier} if location: rv[LOCATION] = location return rv def activity(name=None, namespace=None, identifier=None, location=None): """Make a subject/object modifier dictionary. :param str name: The name of the activity. If no namespace given, uses BEL default namespace :param Optional[str] namespace: The namespace of the activity :param Optional[str] identifier: The identifier of the name in the database :param Optional[dict] location: An entity from :func:`pybel.dsl.entity` representing the location of the node :rtype: dict """ rv = _activity_helper(ACTIVITY, location=location) if name or (namespace and identifier): rv[EFFECT] = entity( namespace=(namespace or BEL_DEFAULT_NAMESPACE), name=name, identifier=identifier ) return rv def degradation(location=None): """Make a degradation dictionary. :param Optional[dict] location: An entity from :func:`pybel.dsl.entity` representing the location of the node :rtype: dict """ return _activity_helper(DEGRADATION, location=location) def translocation(from_loc, to_loc): """Make a translocation dictionary. :param dict from_loc: An entity dictionary from :func:`pybel.dsl.entity` :param dict to_loc: An entity dictionary from :func:`pybel.dsl.entity` :rtype: dict """ rv = _activity_helper(TRANSLOCATION) rv[EFFECT] = { FROM_LOC: from_loc, TO_LOC: to_loc } return rv def secretion(): """Make a secretion translocation dictionary. This is a convenient wrapper representing the :func:`translocation` from the intracellular location to the extracellular space. :rtype: dict """ return translocation( from_loc=intracellular, to_loc=extracellular ) def cell_surface_expression(): """Make a cellular surface expression translocation dictionary. This is a convenient wrapper representing the :func:`translocation` from the intracellular location to the cell surface. :rtype: dict """ return translocation( from_loc=intracellular, to_loc=surface, ) def location(identifier): """Make a location object modifier dictionary. :param entity identifier: A namespace/name/identifier pair Usage: X increases the abundance of Y in the cytoplasm .. code-block:: python from pybel import BELGraph from pybel.dsl import protein, location graph = BELGraph() source = protein('HGNC', 'IRAK1') target = protein('HGNC', 'IRF7, variants=[ pmod('Ph', 'Ser', 477), pmod('Ph', 'Ser', 479), ]) graph.add_increases( source, target, citation=..., evidence=..., object_modifier=location(entity(namespace='GO', name='cytosol', identifier='GO:0005829')), ) X increases the kinase activity of Y in the cytoplasm. In this case, the :func:`activity` function takes a location as an optional argument. .. code-block:: python from pybel import BELGraph from pybel.dsl import protein, location graph = BELGraph() source = ... target = ... graph.add_increases( source, target, citation=..., evidence=..., object_modifier=activity('kin', location=entity(namespace='GO', name='cytosol', identifier='GO:0005829')), ) """ return { LOCATION: identifier } pybel-0.12.1/src/pybel/dsl/exc.py000066400000000000000000000005771334645200200165330ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Exceptions for the internal DSL.""" from ..exceptions import PyBELWarning __all__ = [ 'PyBELDSLException', 'InferCentralDogmaException', ] class PyBELDSLException(PyBELWarning, ValueError): """Raised when problems with the DSL.""" class InferCentralDogmaException(PyBELDSLException): """Raised when unable to infer central dogma.""" pybel-0.12.1/src/pybel/dsl/namespaces.py000066400000000000000000000010521334645200200200600ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains simple wrappers around node DSL functions for common namespaces.""" from .nodes import Abundance, Protein __all__ = [ 'chebi', 'hgnc', ] def chebi(name=None, identifier=None): """Build a ChEBI abundance node. :rtype: Abundance """ return Abundance(namespace='CHEBI', name=name, identifier=identifier) def hgnc(name=None, identifier=None): """Build an HGNC protein node. :rtype: Protein """ return Protein(namespace='HGNC', name=name, identifier=identifier) pybel-0.12.1/src/pybel/dsl/node_classes.py000066400000000000000000001006461334645200200204140ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Classes for DSL nodes.""" import abc import hashlib from operator import methodcaller import six from .exc import InferCentralDogmaException, PyBELDSLException from .utils import entity from ..constants import ( ABUNDANCE, BEL_DEFAULT_NAMESPACE, BIOPROCESS, COMPLEX, COMPOSITE, FRAGMENT, FRAGMENT_DESCRIPTION, FRAGMENT_MISSING, FRAGMENT_START, FRAGMENT_STOP, FUNCTION, FUSION, FUSION_MISSING, FUSION_REFERENCE, FUSION_START, FUSION_STOP, GENE, GMOD, HGVS, IDENTIFIER, KIND, MEMBERS, MIRNA, NAME, NAMESPACE, PARTNER_3P, PARTNER_5P, PATHOLOGY, PMOD, PMOD_CODE, PMOD_ORDER, PMOD_POSITION, PRODUCTS, PROTEIN, RANGE_3P, RANGE_5P, REACTANTS, REACTION, RNA, VARIANTS, rev_abundance_labels, ) from ..utils import ensure_quotes __all__ = [ 'Abundance', # Central Dogma Stuff 'Gene', 'Rna', 'MicroRna', 'Protein', # Fusions 'ProteinFusion', 'RnaFusion', 'GeneFusion', # Fusion Ranges 'EnumeratedFusionRange', 'MissingFusionRange', # Transformations 'ComplexAbundance', 'CompositeAbundance', 'BiologicalProcess', 'Pathology', 'NamedComplexAbundance', 'Reaction', # Variants 'ProteinModification', 'GeneModification', 'Hgvs', 'HgvsReference', 'HgvsUnspecified', 'ProteinSubstitution', 'Fragment', # Base Classes 'BaseEntity', 'BaseAbundance', 'CentralDogma', 'ListAbundance', 'Variant', 'FusionBase', 'FusionRangeBase', ] _as_bel = methodcaller('as_bel') @six.add_metaclass(abc.ABCMeta) class BaseEntity(dict): """This class represents all BEL nodes. It can be converted to a tuple and hashed.""" def __init__(self, func): """Build a PyBEL node data dictionary. :param str func: The PyBEL function """ super(BaseEntity, self).__init__(**{FUNCTION: func}) @property def function(self): """Return the function of this entity. :rtype: str """ return self[FUNCTION] @property def _func(self): return rev_abundance_labels[self.function] @abc.abstractmethod def as_bel(self): """Return this entity as a BEL string. :rtype: str """ def as_sha512(self): """Return this entity as a SHA512 hash encoded in UTF-8. :rtype: str """ return hashlib.sha512(self.as_bel().encode('utf8')).hexdigest() @property def sha512(self): """The SHA512 hash of this node. :rtype: str """ return self.as_sha512() def __hash__(self): # noqa: D105 return hash(self.as_bel()) def __eq__(self, other): return isinstance(other, BaseEntity) and self.as_bel() == other.as_bel() def __repr__(self): return ''.format(bel=self.as_bel()) def __str__(self): # noqa: D105 return self.as_bel() class BaseAbundance(BaseEntity): """The superclass for building node data dictionaries.""" def __init__(self, func, namespace, name=None, identifier=None): """Build an abundance from a function, namespace, and a name and/or identifier. :param str func: The PyBEL function :param str namespace: The name of the namespace :param Optional[str] name: The name of this abundance :param Optional[str] identifier: The database identifier for this abundance """ if name is None and identifier is None: raise PyBELDSLException('Either name or identifier must be specified') super(BaseAbundance, self).__init__(func=func) self.update(entity(namespace=namespace, name=name, identifier=identifier)) @property def namespace(self): """Return the namespace of this abundance. :rtype: str """ return self[NAMESPACE] @property def name(self): """Return the name of this abundance. :rtype: Optional[str] """ return self.get(NAME) @property def identifier(self): """Return the identifier of this abundance. :rtype: Optional[str] """ return self.get(IDENTIFIER) @property def _priority_id(self): return self.name or self.identifier def as_bel(self): """Return this node as a BEL string. :rtype: str """ return "{}({}:{})".format( self._func, self.namespace, ensure_quotes(self._priority_id) ) class Abundance(BaseAbundance): """Builds an abundance node data dictionary.""" def __init__(self, namespace, name=None, identifier=None): """Build a general abundance entity. :param str namespace: The name of the database used to identify this entity :param Optional[str] name: The database's preferred name or label for this entity :param Optional[str] identifier: The database's identifier for this entity Example: >>> Abundance(namespace='CHEBI', name='water') """ super(Abundance, self).__init__(ABUNDANCE, namespace=namespace, name=name, identifier=identifier) class BiologicalProcess(BaseAbundance): """Builds a biological process node data dictionary.""" def __init__(self, namespace, name=None, identifier=None): """Build a biological process node data dictionary. :param str namespace: The name of the database used to identify this biological process :param Optional[str] name: The database's preferred name or label for this biological process :param Optional[str] identifier: The database's identifier for this biological process Example: >>> BiologicalProcess(namespace='GO', name='apoptosis') """ super(BiologicalProcess, self).__init__(BIOPROCESS, namespace=namespace, name=name, identifier=identifier) class Pathology(BaseAbundance): """Builds a pathology node data dictionary.""" def __init__(self, namespace, name=None, identifier=None): """Build a pathology node data dictionary. :param str namespace: The name of the database used to identify this pathology :param Optional[str] name: The database's preferred name or label for this pathology :param Optional[str] identifier: The database's identifier for this pathology Example: >>> Pathology(namespace='DO', name='Alzheimer Disease') """ super(Pathology, self).__init__(PATHOLOGY, namespace=namespace, name=name, identifier=identifier) class CentralDogma(BaseAbundance): """The base class for "central dogma" abundances (i.e., genes, miRNAs, RNAs, and proteins).""" def __init__(self, func, namespace, name=None, identifier=None, variants=None): """Build a node data dictionary for a gene, RNA, miRNA, or protein. :param str func: The PyBEL function to use :param str namespace: The name of the database used to identify this entity :param Optional[str] name: The database's preferred name or label for this entity :param Optional[str] identifier: The database's identifier for this entity :param variants: An optional variant or list of variants :type variants: None or Variant or list[Variant] """ super(CentralDogma, self).__init__(func, namespace, name=name, identifier=identifier) if variants: self[VARIANTS] = ( [variants] if isinstance(variants, Variant) else sorted(variants, key=_as_bel) ) @property def variants(self): """Return this entity's variants, if they exist. :rtype: Optional[list[Variant]] """ return self.get(VARIANTS) def as_bel(self): """Return this node as a BEL string. :rtype: str """ variants = self.get(VARIANTS) if not variants: return super(CentralDogma, self).as_bel() variants_canon = sorted(map(str, variants)) return "{}({}:{}, {})".format( self._func, self.namespace, ensure_quotes(self._priority_id), ', '.join(variants_canon) ) def get_parent(self): """Get the parent, or none if it's already a reference node. :rtype: Optional[CentralDogma] Example usage: >>> ab42 = Protein(name='APP', namespace='HGNC', variants=[Fragment(start=672, stop=713)]) >>> app = ab42.get_parent() >>> assert 'p(HGNC:APP)' == app.as_bel() """ if VARIANTS not in self: return return self.__class__(namespace=self.namespace, name=self.name, identifier=self.identifier) def with_variants(self, variants): """Create a new entity with the given variants. :param Variant or list[Variant] variants: An optional variant or list of variants :rtype: CentralDogma Example Usage: >>> app = Protein(name='APP', namespace='HGNC') >>> ab42 = app.with_variants([Fragment(start=672, stop=713)]) >>> assert 'p(HGNC:APP, frag(672_713))' == ab42.as_bel() """ return self.__class__( namespace=self.namespace, name=self.name, identifier=self.identifier, variants=variants, ) @six.add_metaclass(abc.ABCMeta) class Variant(dict): """The superclass for variant dictionaries.""" def __init__(self, kind): """Build the variant data dictionary. :param str kind: The kind of variant """ super(Variant, self).__init__({KIND: kind}) @abc.abstractmethod def as_bel(self): """Return this variant as a BEL string. :rtype: str """ def __str__(self): # noqa: D105 return self.as_bel() class ProteinModification(Variant): """Build a protein modification variant dictionary.""" def __init__(self, name, code=None, position=None, namespace=None, identifier=None): """Build a protein modification variant data dictionary. :param str name: The name of the modification :param Optional[str] code: The three letter amino acid code for the affected residue. Capital first letter. :param Optional[int] position: The position of the affected residue :param Optional[str] namespace: The namespace to which the name of this modification belongs :param Optional[str] identifier: The identifier of the name of the modification Either the name or the identifier must be used. If the namespace is omitted, it is assumed that a name is specified from the BEL default namespace. Example from BEL default namespace: >>> ProteinModification('Ph', code='Thr', position=308) Example from custom namespace: >>> ProteinModification(name='protein phosphorylation', namespace='GO', code='Thr', position=308) Example from custom namespace additionally qualified with identifier: >>> ProteinModification(name='protein phosphorylation', namespace='GO', >>> identifier='GO:0006468', code='Thr', position=308) """ super(ProteinModification, self).__init__(PMOD) self[IDENTIFIER] = entity( namespace=(namespace or BEL_DEFAULT_NAMESPACE), name=name, identifier=identifier ) if code: self[PMOD_CODE] = code if position: self[PMOD_POSITION] = position def as_bel(self): """Return this protein modification variant as a BEL string. :rtype: str """ return 'pmod({}{})'.format( str(self[IDENTIFIER]), ''.join(', {}'.format(self[x]) for x in PMOD_ORDER[2:] if x in self) ) class GeneModification(Variant): """Build a gene modification variant dictionary.""" def __init__(self, name, namespace=None, identifier=None): """Build a gene modification variant data dictionary. :param str name: The name of the gene modification :param Optional[str] namespace: The namespace of the gene modification :param Optional[str] identifier: The identifier of the name in the database Either the name or the identifier must be used. If the namespace is omitted, it is assumed that a name is specified from the BEL default namespace. Example from BEL default namespace: >>> GeneModification(name='Me') Example from custom namespace: >>> GeneModification(name='DNA methylation', namespace='GO', identifier='GO:0006306',) """ super(GeneModification, self).__init__(GMOD) self[IDENTIFIER] = entity( namespace=(namespace or BEL_DEFAULT_NAMESPACE), name=name, identifier=identifier ) def as_bel(self): """Return this gene modification variant as a BEL string. :rtype: str """ return 'gmod({})'.format(str(self[IDENTIFIER])) class Hgvs(Variant): """Builds a HGVS variant dictionary.""" def __init__(self, variant): """Build an HGVS variant data dictionary. :param str variant: The HGVS variant string Example: >>> Protein(namespace='HGNC', name='AKT1', variants=[Hgvs('p.Ala127Tyr')]) """ super(Hgvs, self).__init__(HGVS) self[IDENTIFIER] = variant def as_bel(self): """Return this HGVS variant as a BEL string. :rtype: str """ return 'var("{}")'.format(self[IDENTIFIER]) class HgvsReference(Hgvs): """Represents the "reference" variant in HGVS.""" def __init__(self): super(HgvsReference, self).__init__('=') class HgvsUnspecified(Hgvs): """Represents an unspecified variant in HGVS.""" def __init__(self): super(HgvsUnspecified, self).__init__('?') class ProteinSubstitution(Hgvs): """A protein substitution variant.""" def __init__(self, from_aa, position, to_aa): """Build an HGVS variant data dictionary for the given protein substitution. :param str from_aa: The 3-letter amino acid code of the original residue :param int position: The position of the residue :param str to_aa: The 3-letter amino acid code of the new residue Example: >>> Protein(namespace='HGNC', name='AKT1', variants=[ProteinSubstitution('Ala', 127, 'Tyr')]) """ super(ProteinSubstitution, self).__init__('p.{}{}{}'.format(from_aa, position, to_aa)) class Fragment(Variant): """Represent the information about a protein fragment.""" def __init__(self, start=None, stop=None, description=None): """Build a protein fragment data dictionary. :param start: The starting position :type start: None or int or str :param stop: The stopping position :type stop: None or int or str :param Optional[str] description: An optional description Example of specified fragment: >>> Protein(name='APP', namespace='HGNC', variants=[Fragment(start=672, stop=713)]) Example of unspecified fragment: >>> Protein(name='APP', namespace='HGNC', variants=[Fragment()]) """ super(Fragment, self).__init__(FRAGMENT) if start and stop: self[FRAGMENT_START] = start self[FRAGMENT_STOP] = stop else: self[FRAGMENT_MISSING] = '?' if description: self[FRAGMENT_DESCRIPTION] = description def as_bel(self): """Return this fragment variant as a BEL string. :rtype: str """ if FRAGMENT_MISSING in self: res = '"?"' else: res = '"{}_{}"'.format(self[FRAGMENT_START], self[FRAGMENT_STOP]) if FRAGMENT_DESCRIPTION in self: res += ', "{}"'.format(self[FRAGMENT_DESCRIPTION]) return 'frag({})'.format(res) class Gene(CentralDogma): """Represents a gene.""" def __init__(self, namespace, name=None, identifier=None, variants=None): """Build a gene node data dictionary. :param str namespace: The name of the database used to identify this entity :param Optional[str] name: The database's preferred name or label for this entity :param Optional[str] identifier: The database's identifier for this entity :param variants: An optional variant or list of variants :type variants: None or Variant or list[Variant] """ super(Gene, self).__init__(GENE, namespace, name=name, identifier=identifier, variants=variants) class _Transcribable(CentralDogma): """A base class for RNA and micro-RNA to share getting of their corresponding genes.""" def get_gene(self): """Get the corresponding gene or raise an exception if it's not the reference node. :rtype: pybel.dsl.Gene :raises: InferCentralDogmaException """ if self.variants: raise InferCentralDogmaException('can not get gene for variant') return Gene( namespace=self.namespace, name=self.name, identifier=self.identifier ) class Rna(_Transcribable): """Represents an RNA.""" def __init__(self, namespace, name=None, identifier=None, variants=None): """Build an RNA node data dictionary. :param str namespace: The name of the database used to identify this entity :param Optional[str] name: The database's preferred name or label for this entity :param Optional[str] identifier: The database's identifier for this entity :param variants: An optional variant or list of variants :type variants: None or Variant or list[Variant] Example: AKT1 protein coding gene's RNA: >>> Rna(namespace='HGNC', name='AKT1', identifier='391') Non-coding RNA's can also be encoded such as `U85 `_: >>> Rna(namespace='SNORNABASE', identifer='SR0000073') """ super(Rna, self).__init__(RNA, namespace, name=name, identifier=identifier, variants=variants) class MicroRna(_Transcribable): """Represents an micro-RNA.""" def __init__(self, namespace, name=None, identifier=None, variants=None): """Build an miRNA node data dictionary. :param str namespace: The name of the database used to identify this entity :param Optional[str] name: The database's preferred name or label for this entity :param Optional[str] identifier: The database's identifier for this entity :param variants: A list of variants :type variants: None or Variant or list[Variant] Human miRNA's are listed on HUGO's `MicroRNAs (MIR) `_ gene family. MIR1-1 from `HGNC `_: >>> MicroRna(namespace='HGNC', name='MIR1-1', identifier='31499') MIR1-1 from `miRBase `_: >>> MicroRna(namespace='MIRBASE', identifier='MI0000651') MIR1-1 from `Entrez Gene `_ >>> MicroRna(namespace='ENTREZ', identifier='406904') """ super(MicroRna, self).__init__(MIRNA, namespace, name=name, identifier=identifier, variants=variants) class Protein(CentralDogma): """Represents a protein.""" def __init__(self, namespace, name=None, identifier=None, variants=None): """Build a protein node data dictionary. :param str namespace: The name of the database used to identify this entity :param Optional[str] name: The database's preferred name or label for this entity :param Optional[str] identifier: The database's identifier for this entity :param variants: An optional variant or list of variants :type variants: None or Variant or list[Variant] Example: AKT >>> Protein(namespace='HGNC', name='AKT1') Example: AKT with optionally included HGNC database identifier >>> Protein(namespace='HGNC', name='AKT1', identifier='391') Example: AKT with phosphorylation >>> Protein(namespace='HGNC', name='AKT', variants=[ProteinModification('Ph', code='Thr', position=308)]) """ super(Protein, self).__init__(PROTEIN, namespace, name=name, identifier=identifier, variants=variants) def get_rna(self): """Get the corresponding RNA or raise an exception if it's not the reference node. :rtype: pybel.dsl.Rna :raises: InferCentralDogmaException """ if self.variants: raise InferCentralDogmaException('can not get rna for variant') return Rna( namespace=self.namespace, name=self.name, identifier=self.identifier ) def _entity_list_as_bel(entities): """Stringify a list of BEL entities. :type entities: iter[BaseAbundance] :rtype: str """ return ', '.join( e.as_bel() for e in entities ) class Reaction(BaseEntity): """Build a reaction node data dictionary.""" def __init__(self, reactants, products): """Build a reaction node data dictionary. :param reactants: A list of PyBEL node data dictionaries representing the reactants :type reactants: BaseAbundance or iter[BaseAbundance] :param products: A list of PyBEL node data dictionaries representing the products :type products: BaseAbundance or iter[BaseAbundance] Example: >>> reaction([Protein(namespace='HGNC', name='KNG1')], [Abundance(namespace='CHEBI', name='bradykinin')]) """ super(Reaction, self).__init__(func=REACTION) if isinstance(reactants, BaseEntity): reactants = [reactants] else: reactants = sorted(reactants, key=_as_bel) if isinstance(products, BaseEntity): products = [products] else: products = sorted(products, key=_as_bel) self.update({ REACTANTS: reactants, PRODUCTS: products, }) @property def reactants(self): """Return the list of reactants in this reaction. :rtype: list[BaseAbundance] """ return self[REACTANTS] @property def products(self): """Return the list of products in this reaction. :rtype: list[BaseAbundance] """ return self[PRODUCTS] def as_bel(self): """Return this reaction as a BEL string. :rtype: str """ return 'rxn(reactants({}), products({}))'.format( _entity_list_as_bel(self.reactants), _entity_list_as_bel(self.products) ) reaction = Reaction class ListAbundance(BaseEntity): """The superclass for building list abundance (complex, abundance) node data dictionaries.""" def __init__(self, func, members): """Build a list abundance node data dictionary. :param str func: The PyBEL function :param members: A list of PyBEL node data dictionaries :type members: BaseAbundance or list[BaseAbundance] """ super(ListAbundance, self).__init__(func=func) self[MEMBERS] = ( [members] if isinstance(members, BaseEntity) else sorted(members, key=_as_bel) ) @property def members(self): """Return the list of members in this list abundance. :rtype: list[BaseAbundance] """ return self[MEMBERS] def as_bel(self): """Return this list abundance as a BEL string. :rtype: str """ return '{}({})'.format( self._func, _entity_list_as_bel(self.members) ) class ComplexAbundance(ListAbundance): """Build a complex abundance node data dictionary with the optional ability to specify a name.""" def __init__(self, members, namespace=None, name=None, identifier=None): """Build a complex list node data dictionary. :param list[BaseAbundance] members: A list of PyBEL node data dictionaries :param Optional[str] namespace: The namespace from which the name originates :param Optional[str] name: The name of the complex :param Optional[str] identifier: The identifier in the namespace in which the name originates """ super(ComplexAbundance, self).__init__(func=COMPLEX, members=members) if namespace: self.update(entity(namespace=namespace, name=name, identifier=identifier)) class NamedComplexAbundance(BaseAbundance): """Build a named complex abundance node data dictionary.""" def __init__(self, namespace=None, name=None, identifier=None): """Build a complex abundance node data dictionary. :param str namespace: The name of the database used to identify this entity :param str name: The database's preferred name or label for this entity :param str identifier: The database's identifier for this entity Example: >>> NamedComplexAbundance(namespace='SCOMP', name='Calcineurin Complex') """ super(NamedComplexAbundance, self).__init__( func=COMPLEX, namespace=namespace, name=name, identifier=identifier, ) class CompositeAbundance(ListAbundance): """Build a composite abundance node data dictionary.""" def __init__(self, members): """Build a composite abundance node data dictionary. :param list[BaseAbundance] members: A list of PyBEL node data dictionaries """ super(CompositeAbundance, self).__init__(func=COMPOSITE, members=members) @six.add_metaclass(abc.ABCMeta) class FusionRangeBase(dict): """The superclass for fusion range data dictionaries.""" @abc.abstractmethod def as_bel(self): """Return this fusion range as BEL. :rtype: str """ def __str__(self): # noqa: D105 return self.as_bel() class MissingFusionRange(FusionRangeBase): """Represents a fusion range with no defined start or end.""" def __init__(self): """Build a missing fusion range.""" super(MissingFusionRange, self).__init__({ FUSION_MISSING: '?' }) def as_bel(self): """Return this missing fusion range as BEL. :rtype: tuple """ return '?' class EnumeratedFusionRange(FusionRangeBase): """Represents an enumerated fusion range.""" def __init__(self, reference, start, stop): """Build an enumerated fusion range. :param str reference: The reference code :param int or str start: The start position, either specified by its integer position, or '?' :param int or str stop: The stop position, either specified by its integer position, '?', or '* Example fully specified RNA fusion range: >>> EnumeratedFusionRange('r', 1, 79) """ super(EnumeratedFusionRange, self).__init__({ FUSION_REFERENCE: reference, FUSION_START: start, FUSION_STOP: stop }) def as_bel(self): """Return this fusion range as a BEL string. :rtype: str """ return '{reference}.{start}_{stop}'.format( reference=self[FUSION_REFERENCE], start=self[FUSION_START], stop=self[FUSION_STOP] ) class FusionBase(BaseEntity): """The superclass for building fusion node data dictionaries.""" def __init__(self, func, partner_5p, partner_3p, range_5p=None, range_3p=None): """Build a fusion node data dictionary. :param str func: A PyBEL function :param CentralDogma partner_5p: A PyBEL node data dictionary for the 5-prime partner :param CentralDogma partner_3p: A PyBEL node data dictionary for the 3-prime partner :param Optional[FusionRangeBase] range_5p: A fusion range for the 5-prime partner :param Optional[FusionRangeBase] range_3p: A fusion range for the 3-prime partner """ super(FusionBase, self).__init__(func=func) self[FUSION] = { PARTNER_5P: partner_5p, PARTNER_3P: partner_3p, RANGE_5P: range_5p or MissingFusionRange(), RANGE_3P: range_3p or MissingFusionRange() } @property def partner_5p(self): """The 5' partner. :rtype: CentralDogma """ return self[FUSION][PARTNER_5P] @property def partner_3p(self): """The 3' partner. :rtype: CentralDogma """ return self[FUSION][PARTNER_3P] @property def range_5p(self): """The 5' partner's range. :rtype: FusionRangeBase """ return self[FUSION][RANGE_5P] @property def range_3p(self): """The 3' partner's range. :rtype: FusionRangeBase """ return self[FUSION][RANGE_3P] def as_bel(self): """Return this fusion as a BEL string. :rtype: str """ return '{}(fus({}:{}, "{}", {}:{}, "{}"))'.format( self._func, self.partner_5p.namespace, self.partner_5p._priority_id, self.range_5p.as_bel(), self.partner_3p.namespace, self.partner_3p._priority_id, self.range_3p.as_bel(), ) class ProteinFusion(FusionBase): """Builds a protein fusion data dictionary.""" def __init__(self, partner_5p, partner_3p, range_5p=None, range_3p=None): """Build a protein fusion node data dictionary. :param pybel.dsl.Protein partner_5p: A PyBEL node data dictionary for the 5-prime partner :param pybel.dsl.Protein partner_3p: A PyBEL node data dictionary for the 3-prime partner :param Optional[FusionRangeBase] range_5p: A fusion range for the 5-prime partner :param Optional[FusionRangeBase] range_3p: A fusion range for the 3-prime partner """ super(ProteinFusion, self).__init__( func=PROTEIN, partner_5p=partner_5p, range_5p=range_5p, partner_3p=partner_3p, range_3p=range_3p, ) class RnaFusion(FusionBase): """Builds an RNA fusion data dictionary.""" def __init__(self, partner_5p, partner_3p, range_5p=None, range_3p=None): """Build an RNA fusion node data dictionary. :param pybel.dsl.Rna partner_5p: A PyBEL node data dictionary for the 5-prime partner :param pybel.dsl.Rna partner_3p: A PyBEL node data dictionary for the 3-prime partner :param Optional[FusionRangeBase] range_5p: A fusion range for the 5-prime partner :param Optional[FusionRangeBase] range_3p: A fusion range for the 3-prime partner Example, with fusion ranges using the 'r' qualifier: >>> RnaFusion( >>> ... partner_5p=Rna(namespace='HGNC', name='TMPRSS2'), >>> ... range_5p=EnumeratedFusionRange('r', 1, 79), >>> ... partner_3p=Rna(namespace='HGNC', name='ERG'), >>> ... range_3p=EnumeratedFusionRange('r', 312, 5034) >>> ) Example with missing fusion ranges: >>> RnaFusion( >>> ... partner_5p=Rna(namespace='HGNC', name='TMPRSS2'), >>> ... partner_3p=Rna(namespace='HGNC', name='ERG'), >>> ) """ super(RnaFusion, self).__init__( func=RNA, partner_5p=partner_5p, range_5p=range_5p, partner_3p=partner_3p, range_3p=range_3p, ) class GeneFusion(FusionBase): """Builds a gene fusion data dictionary.""" def __init__(self, partner_5p, partner_3p, range_5p=None, range_3p=None): """Build a gene fusion node data dictionary. :param pybel.dsl.Gene partner_5p: A PyBEL node data dictionary for the 5-prime partner :param pybel.dsl.Gene partner_3p: A PyBEL node data dictionary for the 3-prime partner :param Optional[FusionRangeBase] range_5p: A fusion range for the 5-prime partner :param Optional[FusionRangeBase] range_3p: A fusion range for the 3-prime partner Example, using fusion ranges with the 'c' qualifier >>> GeneFusion( >>> ... partner_5p=Gene(namespace='HGNC', name='TMPRSS2'), >>> ... range_5p=EnumeratedFusionRange('c', 1, 79), >>> ... partner_3p=Gene(namespace='HGNC', name='ERG'), >>> ... range_3p=EnumeratedFusionRange('c', 312, 5034) >>> ) Example with missing fusion ranges: >>> GeneFusion( >>> ... partner_5p=Gene(namespace='HGNC', name='TMPRSS2'), >>> ... partner_3p=Gene(namespace='HGNC', name='ERG'), >>> ) """ super(GeneFusion, self).__init__( func=GENE, partner_5p=partner_5p, range_5p=range_5p, partner_3p=partner_3p, range_3p=range_3p, ) pybel-0.12.1/src/pybel/dsl/nodes.py000066400000000000000000000030661334645200200170600ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Convenient wrappers for DSL classes. Also provided for backwards compatibility. """ from .node_classes import ( Abundance, BiologicalProcess, ComplexAbundance, CompositeAbundance, EnumeratedFusionRange, Fragment, Gene, GeneFusion, GeneModification, Hgvs, HgvsReference, HgvsUnspecified, MicroRna, MissingFusionRange, NamedComplexAbundance, Pathology, Protein, ProteinFusion, ProteinModification, ProteinSubstitution, Reaction, Rna, RnaFusion, ) __all__ = [ 'abundance', 'gene', 'rna', 'mirna', 'protein', 'complex_abundance', 'composite_abundance', 'bioprocess', 'pathology', 'named_complex_abundance', 'reaction', 'pmod', 'gmod', 'hgvs', 'hgvs_reference', 'hgvs_unspecified', 'protein_substitution', 'fragment', 'fusion_range', 'missing_fusion_range', 'protein_fusion', 'rna_fusion', 'gene_fusion', ] abundance = Abundance bioprocess = BiologicalProcess pathology = Pathology pmod = ProteinModification gmod = GeneModification hgvs = Hgvs hgvs_unspecified = HgvsUnspecified hgvs_reference = HgvsReference protein_substitution = ProteinSubstitution fragment = Fragment gene = Gene rna = Rna mirna = MicroRna protein = Protein reaction = Reaction complex_abundance = ComplexAbundance named_complex_abundance = NamedComplexAbundance composite_abundance = CompositeAbundance missing_fusion_range = MissingFusionRange fusion_range = EnumeratedFusionRange protein_fusion = ProteinFusion rna_fusion = RnaFusion gene_fusion = GeneFusion pybel-0.12.1/src/pybel/dsl/utils.py000066400000000000000000000024171334645200200171070ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for the internal DSL.""" from .exc import PyBELDSLException from ..constants import BEL_DEFAULT_NAMESPACE, IDENTIFIER, NAME, NAMESPACE from ..utils import ensure_quotes __all__ = [ 'Entity', 'entity', ] class Entity(dict): """Represents a named entity with a namespace and name/identifier.""" def __init__(self, namespace, name=None, identifier=None): """Create a dictionary representing a reference to an entity. :param str namespace: The namespace to which the entity belongs :param str name: The name of the entity :param str identifier: The identifier of the entity in the namespace :rtype: dict """ if name is None and identifier is None: raise PyBELDSLException('cannot create an entity with neither a name nor identifier') super(Entity, self).__init__({ NAMESPACE: namespace, }) if name is not None: self[NAME] = name if identifier is not None: self[IDENTIFIER] = identifier def __str__(self): # noqa: D105 if self[NAMESPACE] == BEL_DEFAULT_NAMESPACE: return self[NAME] return '{}:{}'.format(self[NAMESPACE], ensure_quotes(self[NAME])) entity = Entity pybel-0.12.1/src/pybel/examples/000077500000000000000000000000001334645200200164255ustar00rootroot00000000000000pybel-0.12.1/src/pybel/examples/__init__.py000066400000000000000000000010221334645200200205310ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This directory contains example networks, precompiled as BEL graphs that are appropriate to use in examples.""" from .braf_example import braf_graph from .egf_example import egf_graph from .homology_example import homology_graph from .sialic_acid_example import sialic_acid_graph from .statin_example import statin_graph from .tloc_example import ras_tloc_graph __all__ = [ 'egf_graph', 'sialic_acid_graph', 'statin_graph', 'braf_graph', 'homology_graph', 'ras_tloc_graph', ] pybel-0.12.1/src/pybel/examples/braf_example.py000066400000000000000000000044661334645200200214360ustar00rootroot00000000000000# -*- coding: utf-8 -*- """An example describing a single evidence about BRAF. .. code-block:: SET Citation = {"PubMed", "11283246"} SET Evidence = "Expression of both dominant negative forms, RasN17 and Rap1N17, in UT7-Mpl cells decreased thrombopoietin-mediated Elk1-dependent transcription. This suggests that both Ras and Rap1 contribute to thrombopoietin-induced ELK1 transcription." SET Species = 9606 p(HGNC:THPO) increases kin(p(HGNC:BRAF)) p(HGNC:THPO) increases kin(p(HGNC:RAF1)) kin(p(HGNC:BRAF)) increases tscript(p(HGNC:ELK1)) UNSET ALL """ from ..dsl import activity, protein from ..struct.graph import BELGraph __all__ = [ 'braf_graph' ] braf_graph = BELGraph( name='BRAF Subgraph', version='1.0.0', description="Some relations surrounding BRAF", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) braf_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns', }) braf_graph.annotation_url.update({ 'Species': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/species-taxonomy-id/species-taxonomy-id-20170511.belanno' }) thpo = protein(namespace='HGNC', name='THPO', identifier='11795') braf = protein(namespace='HGNC', name='BRAF', identifier='1097') raf1 = protein(namespace='HGNC', name='RAF1', identifier='9829') elk1 = protein(namespace='HGNC', name='ELK1', identifier='3321') evidence = "Expression of both dominant negative forms, RasN17 and Rap1N17, in UT7-Mpl cells decreased " \ "thrombopoietin-mediated Elk1-dependent transcription. This suggests that both Ras and Rap1 contribute to " \ "thrombopoietin-induced ELK1 transcription." braf_graph.add_increases( thpo, braf, evidence=evidence, citation='11283246', object_modifier=activity(name='kin'), annotations={'Species': '9606'} ) braf_graph.add_increases( thpo, raf1, evidence=evidence, citation='11283246', object_modifier=activity(name='kin'), annotations={'Species': '9606'} ) braf_graph.add_increases( braf, elk1, evidence=evidence, citation='11283246', subject_modifier=activity(name='kin'), object_modifier=activity(name='tscript'), annotations={'Species': '9606'} ) pybel-0.12.1/src/pybel/examples/egf_example.py000066400000000000000000000130251334645200200212540ustar00rootroot00000000000000# -*- coding: utf-8 -*- """An example describing EGF's effect on cellular processes. .. code-block:: none SET Citation = {"PubMed","Clin Cancer Res 2003 Jul 9(7) 2416-25","12855613"} SET Evidence = "This induction was not seen either when LNCaP cells were treated with flutamide or conditioned medium were pretreated with antibody to the epidermal growth factor (EGF)" SET Species = 9606 tscript(p(HGNC:AR)) increases p(HGNC:EGF) UNSET ALL SET Citation = {"PubMed","Int J Cancer 1998 Jul 3 77(1) 138-45","9639405"} SET Evidence = "DU-145 cells treated with 5000 U/ml of IFNgamma and IFN alpha, both reduced EGF production with IFN gamma reduction more significant." SET Species = 9606 p(HGNC:IFNA1) decreases p(HGNC:EGF) p(HGNC:IFNG) decreases p(HGNC:EGF) UNSET ALL SET Citation = {"PubMed","DNA Cell Biol 2000 May 19(5) 253-63","10855792"} SET Evidence = "Although found predominantly in the cytoplasm and, less abundantly, in the nucleus, VCP can be translocated from the nucleus after stimulation with epidermal growth factor." SET Species = 9606 p(HGNC:EGF) increases tloc(p(HGNC:VCP),GOCCID:0005634,GOCCID:0005737) UNSET ALL SET Citation = {"PubMed","J Clin Oncol 2003 Feb 1 21(3) 447-52","12560433"} SET Evidence = "Valosin-containing protein (VCP; also known as p97) has been shown to be associated with antiapoptotic function and metastasis via activation of the nuclear factor-kappaB signaling pathway." SET Species = 9606 cat(p(HGNC:VCP)) increases tscript(complex(p(HGNC:NFKB1), p(HGNC:NFKB2), p(HGNC:REL), p(HGNC:RELA), p(HGNC:RELB))) tscript(complex(p(HGNC:NFKB1), p(HGNC:NFKB2), p(HGNC:REL), p(HGNC:RELA), p(HGNC:RELB))) decreases bp(MESHPP:Apoptosis) UNSET ALL """ from ..dsl import activity, bioprocess, complex_abundance, entity, protein, translocation from ..struct.graph import BELGraph __all__ = [ 'egf_graph' ] egf_graph = BELGraph( name='EGF Pathway', version='1.0.0', description="The downstream effects of EGF", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) egf_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns', 'GOBP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/go-biological-process-20170725.belns' }) egf_graph.annotation_url.update({ 'Confidence': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/confidence/confidence-1.0.0.belanno', 'Species': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/species-taxonomy-id/species-taxonomy-id-20170511.belanno' }) ar = protein(name='AR', namespace='HGNC') egf = protein(name='EGF', namespace='HGNC') ifna1 = protein(name='IFNA1', namespace='HGNC') ifng = protein(name='IFNG', namespace='HGNC') vcp = protein(name='VCP', namespace='HGNC') nfkb1 = protein(name='NFKB1', namespace='HGNC') nfkb2 = protein(name='NFKB2', namespace='HGNC') rel = protein(name='REL', namespace='HGNC') rela = protein(name='RELA', namespace='HGNC') relb = protein(name='RELB', namespace='HGNC') nfkb_complex = complex_abundance([nfkb1, nfkb2, rel, rela, relb]) apoptosis = bioprocess(namespace='GOBP', name='apoptotic process', identifier='0006915') egf_graph.add_increases( ar, egf, citation='12855613', evidence='This induction was not seen either when LNCaP cells were treated with flutamide or conditioned medium ' 'were pretreated with antibody to the epidermal growth factor (EGF)', annotations={'Species': '9606'}, subject_modifier=activity('tscript'), ) egf_graph.add_decreases( ifna1, egf, citation='9639405', evidence='DU-145 cells treated with 5000 U/ml of IFNgamma and IFN alpha, both reduced EGF production with IFN ' 'gamma reduction more significant.', annotations={'Species': '9606'} ) egf_graph.add_decreases( ifng, egf, citation='9639405', evidence='DU-145 cells treated with 5000 U/ml of IFNgamma and IFN alpha, both reduced EGF production with IFN ' 'gamma reduction more significant.', annotations={'Species': '9606'} ) egf_graph.add_increases( egf, vcp, citation='10855792', evidence='Although found predominantly in the cytoplasm and, less abundantly, in the nucleus, VCP can be ' 'translocated from the nucleus after stimulation with epidermal growth factor.', annotations={'Species': '9606'}, object_modifier=translocation( from_loc=entity(namespace='GOCC', name='nucleus', identifier='0005634'), to_loc=entity(namespace='GOCC', name='cytoplasm', identifier='0005737'), ) ) egf_graph.add_increases( vcp, nfkb_complex, citation='12560433', evidence="Valosin-containing protein (VCP; also known as p97) has been shown to be associated with antiapoptotic" " function and metastasis via activation of the nuclear factor-kappaB signaling pathway.", annotations={'Species': '9606'}, subject_modifier=activity('cat'), object_modifier=activity('tscript'), ) egf_graph.add_decreases( nfkb_complex, apoptosis, citation='12560433', evidence="Valosin-containing protein (VCP; also known as p97) has been shown to be associated with antiapoptotic " "function and metastasis via activation of the nuclear factor-kappaB signaling pathway.", annotations={'Species': '9606'}, subject_modifier=activity('tscript'), ) pybel-0.12.1/src/pybel/examples/homology_example.py000066400000000000000000000121361334645200200223520ustar00rootroot00000000000000# -*- coding: utf-8 -*- """An example with orthology statements. The following is an example of orthology annotations from `HomoloGene:37670 `_ .. code-block: none SET Citation = {"PubMed","J Immunol 1999 Sep 1 163(5) 2452-62","10452980","","",""} SET Evidence = "M-CSF triggers the activation of extracellular signal-regulated protein kinases (ERK)-1/2." SET Species = 10090 p(MGI:Csf1) increases kin(p(MGI:Mapk1)) """ from ..dsl import activity, gene, protein, rna from ..struct.graph import BELGraph __all__ = [ 'homology_graph' ] # TODO make SGD resource homology_graph = BELGraph( name='Homology and Equivalence Example Graph', version='1.0.1', description="Adds several equivalence and orthology relationships related to the mitogen-activated protein kinase " "(MAPK1)", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) homology_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns', 'MGI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/mgi-mouse-genes/mgi-mouse-genes-20170725.belns', 'RGD': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/rgd-rat-genes/rgd-rat-genes-20170725.belns', 'FLYBASE': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/flybase/flybase-20170508.belns', 'ENTREZ': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/entrez-gene-ids/entrez-gene-ids-20170725.belns', # 'SGD': '?', }) homology_graph.annotation_url.update({ 'Species': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/species-taxonomy-id/species-taxonomy-id-20170511.belanno' }) human_mapk1_gene = gene(namespace='HGNC', name='MAPK1', identifier='HGNC:6871') human_mapk1_gene_entrez = gene(namespace='ENTREZ', name='5594') human_mapk1_rna = rna(namespace='HGNC', name='MAPK1', identifier='HGNC:6871') human_mapk1_protein = protein(namespace='HGNC', name='MAPK1', identifier='HGNC:6871') mouse_mapk1_gene = gene(namespace='MGI', name='Mapk1', identifier='MGI:1346858') mouse_mapk1_gene_entrez = gene(namespace='ENTREZ', name='26413') mouse_mapk1_rna = rna(namespace='MGI', name='Mapk1', identifier='MGI:1346858') mouse_mapk1_protein = protein(namespace='MGI', name='Mapk1', identifier='MGI:1346858') rat_mapk1 = gene(namespace='RGD', name='Mapk1', identifier='70500') rat_mapk1_entrez = gene(namespace='ENTREZ', name='116590') fly_mapk1 = gene(namespace='FLYBASE', name='rl', identifier='FBgn0003256') fly_mapk1_entrez = gene(namespace='ENTREZ', name='3354888') human_csf1_gene = gene(namespace='HGNC', name='CSF1', identifier='HGNC:2432') human_csf1_rna = rna(namespace='HGNC', name='CSF1', identifier='HGNC:2432') human_csf1_protein = protein(namespace='HGNC', name='CSF1', identifier='HGNC:2432') mouse_csf1_gene = gene(namespace='MGI', name='Csf1', identifier='MGI:1339753') mouse_csf1_rna = rna(namespace='MGI', name='Csf1', identifier='MGI:1339753') mouse_csf1_protein = protein(namespace='MGI', name='Csf1', identifier='MGI:1339753') # yeast_mapk1 = gene(namespace='SGD', name='KSS1', identifier='SGD:S000003272') # yeast_mapk1_entrez = gene(namespace='ENTREZ', name='KSS1', identifier='852931') # TODO make homologene resource and add is_a relationships for this # mapk1_homologene = gene(namespace='HOMOLOGENE', identifier='37670') homology_graph.add_equivalence(human_mapk1_gene, human_mapk1_gene_entrez) homology_graph.add_equivalence(mouse_mapk1_gene, mouse_mapk1_gene_entrez) homology_graph.add_equivalence(rat_mapk1, rat_mapk1_entrez) homology_graph.add_equivalence(fly_mapk1, fly_mapk1_entrez) # graph.add_equivalence(yeast_mapk1, yeast_mapk1_entrez) homology_graph.add_orthology(human_csf1_gene, mouse_csf1_gene) homology_graph.add_orthology(human_mapk1_gene, mouse_mapk1_gene) homology_graph.add_orthology(human_mapk1_gene, rat_mapk1) homology_graph.add_orthology(human_mapk1_gene, fly_mapk1) # graph.add_orthology(human_mapk1, yeast_mapk1) """SET Citation = {"PubMed","J Immunol 1999 Sep 1 163(5) 2452-62","10452980","","",""} SET Evidence = "M-CSF triggers the activation of extracellular signal-regulated protein kinases (ERK)-1/2." SET Species = 10090 p(MGI:Csf1) increases kin(p(MGI:Mapk1))""" homology_graph.add_increases( u=mouse_csf1_protein, v=mouse_mapk1_protein, citation='10452980', evidence='M-CSF triggers the activation of extracellular signal-regulated protein kinases (ERK)-1/2.', object_modifier=activity('kin'), annotations={'Species': '10090'} ) homology_graph.add_transcription(mouse_mapk1_gene, mouse_mapk1_rna) homology_graph.add_translation(mouse_mapk1_rna, mouse_mapk1_protein) homology_graph.add_transcription(human_mapk1_gene, human_mapk1_rna) homology_graph.add_translation(human_mapk1_rna, human_mapk1_protein) homology_graph.add_transcription(human_csf1_gene, human_csf1_rna) homology_graph.add_translation(human_csf1_rna, human_csf1_protein) homology_graph.add_transcription(mouse_csf1_gene, mouse_csf1_rna) homology_graph.add_translation(mouse_csf1_rna, mouse_csf1_protein) pybel-0.12.1/src/pybel/examples/sialic_acid_example.py000066400000000000000000000126551334645200200227470ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Curation of the article "Genetics ignite focus on microglial inflammation in Alzheimer's disease". .. code-block:: none SET Citation = {"PubMed", "26438529"} SET Evidence = "Sialic acid binding activates CD33, resulting in phosphorylation of the CD33 immunoreceptor tyrosine-based inhibitory motif (ITIM) domains and activation of the SHP-1 and SHP-2 tyrosine phosphatases [66, 67]." complex(p(HGNC:CD33),a(CHEBI:"sialic acid")) -> p(HGNC:CD33, pmod(P)) act(p(HGNC:CD33, pmod(P))) => act(p(HGNC:PTPN6), ma(phos)) act(p(HGNC:CD33, pmod(P))) => act(p(HGNC:PTPN11), ma(phos)) UNSET {Evidence, Species} SET Evidence = "These phosphatases act on multiple substrates, including Syk, to inhibit immune activation [68, 69]. Hence, CD33 activation leads to increased SHP-1 and SHP-2 activity that antagonizes Syk, inhibiting ITAM-signaling proteins, possibly including TREM2/DAP12 (Fig. 1, [70, 71])." SET Species = 9606 act(p(HGNC:PTPN6)) =| act(p(HGNC:SYK)) act(p(HGNC:PTPN11)) =| act(p(HGNC:SYK)) act(p(HGNC:SYK)) -> act(p(HGNC:TREM2)) act(p(HGNC:SYK)) -> act(p(HGNC:TYROBP)) UNSET ALL """ from ..dsl import abundance, activity, bioprocess, complex_abundance, pmod, protein from ..struct.graph import BELGraph __all__ = [ 'sialic_acid_graph' ] citation = '26438529' evidence_1 = """ Sialic acid binding activates CD33, resulting in phosphorylation of the CD33 immunoreceptor tyrosine-based inhibitory motif (ITIM) domains and activation of the SHP-1 and SHP-2 tyrosine phosphatases [66, 67]. """.replace('\n', ' ').strip() evidence_2 = """These phosphatases act on multiple substrates, including Syk, to inhibit immune activation [68, 69]. Hence, CD33 activation leads to increased SHP-1 and SHP-2 activity that antagonizes Syk, inhibiting ITAM-signaling proteins, possibly including TREM2/DAP12 (Fig. 1, [70, 71]). """.replace('\n', ' ').strip() sialic_acid_graph = BELGraph( name='Sialic Acid Graph', version='1.0.0', description="The downstream effects of sialic acid in immune signaling", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) sialic_acid_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/' 'hgnc-human-genes-20170725.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns', 'GOBP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/' 'go-biological-process-20170725.belns' }) sialic_acid_graph.annotation_url.update({ 'Confidence': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/confidence/confidence-1.0.0.belanno', 'Species': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/species-taxonomy-id/' 'species-taxonomy-id-20170511.belanno' }) sialic_acid = abundance(name='sialic acid', namespace='CHEBI', identifier='26667') cd33 = protein(name='CD33', namespace='HGNC', identifier='1659') sialic_acid_cd33_complex = complex_abundance([sialic_acid, cd33]) shp1 = protein(namespace='HGNC', name='PTPN6', identifier='9658') shp2 = protein(namespace='HGNC', name='PTPN11', identifier='9644') syk = protein(namespace='HGNC', name='SYK', identifier='11491') dap12 = protein(namespace='HGNC', name='TYROBP', identifier='12449') trem2 = protein(namespace='HGNC', name='TREM2', identifier='17761') cd33_phosphorylated = protein(name='CD33', namespace='HGNC', identifier='1659', variants=[pmod('Ph')]) immune_response = bioprocess(name='immune response', namespace='GOBP', identifier='0006955') sialic_acid_graph.add_increases( sialic_acid_cd33_complex, cd33, citation=citation, annotations={'Species': '9606', 'Confidence': 'High'}, evidence=evidence_1, object_modifier=activity() ) sialic_acid_graph.add_increases( cd33, cd33_phosphorylated, citation=citation, annotations={'Species': '9606', 'Confidence': 'High'}, evidence=evidence_1, subject_modifier=activity() ) sialic_acid_graph.add_directly_increases( cd33_phosphorylated, shp1, citation=citation, evidence=evidence_1, annotations={'Species': '9606', 'Confidence': 'High'}, subject_modifier=activity(), object_modifier=activity('phos'), ) sialic_acid_graph.add_directly_increases( cd33_phosphorylated, shp2, citation=citation, evidence=evidence_1, annotations={'Species': '9606', 'Confidence': 'High'}, subject_modifier=activity(), object_modifier=activity('phos'), ) sialic_acid_graph.add_directly_decreases( shp1, syk, citation=citation, evidence=evidence_2, annotations={'Species': '9606', 'Confidence': 'High'}, subject_modifier=activity(), object_modifier=activity() ) sialic_acid_graph.add_directly_decreases( shp2, syk, citation=citation, evidence=evidence_2, annotations={'Species': '9606', 'Confidence': 'High'}, subject_modifier=activity(), object_modifier=activity() ) sialic_acid_graph.add_increases( syk, trem2, citation=citation, evidence=evidence_2, annotations={'Species': '9606', 'Confidence': 'Low'}, subject_modifier=activity(), object_modifier=activity() ) sialic_acid_graph.add_increases( syk, dap12, citation=citation, evidence=evidence_2, annotations={'Species': '9606', 'Confidence': 'Low'}, subject_modifier=activity(), object_modifier=activity() ) pybel-0.12.1/src/pybel/examples/statin_example.py000066400000000000000000000044371334645200200220240ustar00rootroot00000000000000# -*- coding: utf-8 -*- """An example describing statins.""" from ..dsl import abundance, protein from ..struct.graph import BELGraph __all__ = [ 'statin_graph' ] statin_graph = BELGraph( name='Statin Graph', version='1.0.1', description="The effects of statins from ChEBI", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) statin_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns', 'EC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/enzyme-class/enzyme-class-20170508.belns' }) statin_graph.annotation_url.update({ 'Confidence': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/confidence/confidence-1.0.0.belanno', }) fluvastatin = abundance(namespace='CHEBI', name='fluvastatin', identifier='38561') avorastatin = abundance(namespace='CHEBI', name='atorvastatin', identifier='39548') synthetic_statin = abundance(namespace='CHEBI', name='statin (synthetic)', identifier='87635') statin = abundance(namespace='CHEBI', name='statin', identifier='87631') mevinolinic_acid = abundance(namespace='CHEBI', name='mevinolinic acid', identifier='82985') hmgcr_inhibitor = abundance(namespace='CHEBI', identifier='35664', name='EC 1.1.1.34/EC 1.1.1.88 (hydroxymethylglutaryl-CoA reductase) inhibitor') ec_11134 = protein(namespace='EC', name='1.1.1.34') ec_11188 = protein(namespace='EC', name='1.1.1.88') hmgcr = protein(namespace='HGNC', name='HMGCR', identifier='5006') statin_graph.add_is_a(avorastatin, synthetic_statin) statin_graph.add_is_a(fluvastatin, synthetic_statin) statin_graph.add_is_a(synthetic_statin, statin) statin_graph.add_is_a(statin, hmgcr_inhibitor) statin_graph.add_is_a(mevinolinic_acid, hmgcr_inhibitor) statin_graph.add_is_a(hmgcr, ec_11134) statin_graph.add_inhibits( hmgcr_inhibitor, ec_11134, evidence='From ChEBI', citation='23180789', annotations={ 'Confidence': 'Axiomatic' } ) statin_graph.add_inhibits( hmgcr_inhibitor, ec_11188, evidence='From ChEBI', citation='23180789', annotations={ 'Confidence': 'Axiomatic' } ) pybel-0.12.1/src/pybel/examples/tloc_example.py000066400000000000000000000050751334645200200214620ustar00rootroot00000000000000# -*- coding: utf-8 -*- """An example describing a translocation. .. code-block:: none SET Citation = {"PubMed", "16170185"} SET Evidence = "These modifications render Ras functional and capable of localizing to the lipid-rich inner surface of the cell membrane. The first and most critical modification, farnesylation, which is principally catalyzed by protein FTase, adds a 15-carbon hydrobobic farnesyl isoprenyl tail to the carboxyl terminus of Ras." SET TextLocation = Review cat(complex(p(HGNC:FNTA),p(HGNC:FNTB))) directlyIncreases p(SFAM:"RAS Family",pmod(F)) p(SFAM:"RAS Family",pmod(F)) directlyIncreases tloc(p(SFAM:"RAS Family"),MESHCS:"Intracellular Space",MESHCS:"Cell Membrane") """ from ..dsl import activity, complex_abundance, entity, pmod, protein, translocation from ..struct.graph import BELGraph __all__ = ['ras_tloc_graph'] ras_tloc_graph = BELGraph( name='RAS Transocation Graph', version='1.0.0', description='The farnesylation of RAS causes its translocation to the cell membrange.' ) ras_tloc_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns', 'FPLX': 'https://raw.githubusercontent.com/sorgerlab/famplex/1b7e14ec0fd02ee7ed71514c6e267f57d5641a4b/export/famplex.belns', 'GO': "https://raw.githubusercontent.com/pharmacome/terminology/1b20f0637c395f8aa89c2e2e342d7b704062c242/external/go-names.belns" }) evidence = "These modifications render Ras functional and capable of localizing to the lipid-rich inner surface of the cell membrane. The first and most critical modification, farnesylation, which is principally catalyzed by protein FTase, adds a 15-carbon hydrobobic farnesyl isoprenyl tail to the carboxyl terminus of Ras." pmid = '16170185' fnta = protein(namespace='HGNC', name='FNTA', identifier='3782') fntb = protein(namespace='HGNC', name='FNTA', identifier='3785') fnt = complex_abundance(namespace='FPLX', name='FNT', identifier='RAS', members=[fnta, fntb]) ras = protein(namespace='FPLX', name='RAS', identifier='RAS') ras_farn = ras.with_variants(pmod('Farn')) ras_tloc_graph.add_directly_increases( fnt, ras_farn, evidence=evidence, citation=pmid, subject_modifier=activity('cat'), ) ras_tloc_graph.add_directly_increases( ras_farn, ras, evidence=evidence, citation=pmid, object_modifier=translocation( from_loc=entity(namespace='GO', name='intracellular', identifier='GO:0005622'), to_loc=entity(namespace='GO', name='plasma membrane', identifier='GO:0005886'), ) ) pybel-0.12.1/src/pybel/examples/various_example.py000066400000000000000000000061751334645200200222130ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Small graphs with grouped nodes".""" from ..dsl import abundance, composite_abundance, reaction, protein, complex_abundance from ..struct.graph import BELGraph __all__ = [ 'single_reaction_graph', 'single_composite_graph', 'single_complex_graph', ] citation = 'None' evidence = """None""".replace('\n', ' ').strip() single_reaction_graph = BELGraph( name='Single Reaction graph', version='1.0.0', description="Example graph", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) single_reaction_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/' 'hgnc-human-genes-20170725.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns', 'GOBP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/' 'go-biological-process-20170725.belns' }) hk1 = protein(name='HK1', namespace='HGNC', identifier='4922') atp = abundance(name='ATP', namespace='CHEBI', identifier='15422') adp = abundance(name='ADP', namespace='CHEBI', identifier='16761') phosphate = abundance(name='phosphoric acid', namespace='CHEBI', identifier='26078') glucose = abundance(name='glucose', namespace='CHEBI', identifier='17234') glucose_6_phosphate = abundance(name='D-glucopyranose 6-phosphate', namespace='CHEBI', identifier='4170') glycolisis_step_1 = reaction(reactants=[glucose, hk1, atp, phosphate], products=[glucose_6_phosphate, adp, hk1]) composite_example = composite_abundance(members=[glucose_6_phosphate, adp, hk1]) complex_example = complex_abundance(members=[glucose_6_phosphate, adp, hk1]) single_reaction_graph.add_node_from_data(glycolisis_step_1) single_complex_graph = BELGraph( name='Single Complex graph', version='1.0.0', description="Example graph", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) single_complex_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/' 'hgnc-human-genes-20170725.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns', 'GOBP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/' 'go-biological-process-20170725.belns' }) single_complex_graph.add_node_from_data(complex_example) single_composite_graph = BELGraph( name='Single Composite graph', version='1.0.0', description="Example graph", authors='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', ) single_composite_graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/' 'hgnc-human-genes-20170725.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns', 'GOBP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/' 'go-biological-process-20170725.belns' }) single_composite_graph.add_node_from_data(composite_example) pybel-0.12.1/src/pybel/exceptions.py000066400000000000000000000005201334645200200173370ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains base exceptions that are shared through the package.""" class PyBELWarning(Exception): """The base class for warnings during compilation from which PyBEL can recover.""" class PyBELCanonicalizeError(PyBELWarning, IndexError): """Raised when problem canonicalizing a node.""" pybel-0.12.1/src/pybel/io/000077500000000000000000000000001334645200200152165ustar00rootroot00000000000000pybel-0.12.1/src/pybel/io/__init__.py000066400000000000000000000014151334645200200173300ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ PyBEL provides multiple lossless interchange options for BEL. Lossy output formats are also included for convenient export to other programs. Notably, a *de facto* interchange using Resource Description Framework (RDF) to match the ability of other existing software is excluded due the immaturity of the BEL to RDF mapping. """ from . import extras, gpickle, indra, jgif, lines, neo4j, nodelink, web from .extras import * from .gpickle import * from .indra import * from .jgif import * from .lines import * from .neo4j import * from .nodelink import * from .web import * __all__ = ( lines.__all__ + nodelink.__all__ + gpickle.__all__ + neo4j.__all__ + extras.__all__ + jgif.__all__ + indra.__all__ + web.__all__ ) pybel-0.12.1/src/pybel/io/exc.py000066400000000000000000000016351334645200200163540ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Exceptions for input/output.""" from ..exceptions import PyBELWarning import_version_message_fmt = 'Tried importing from PyBEL v{}. Need at least v{}' class ImportVersionWarning(PyBELWarning, ValueError): """Raised when trying to import data from an old version of PyBEL.""" def __init__(self, actual_version_tuple, minimum_version_tuple): """Build an import version warning. :type actual_version_tuple: str :type minimum_version_tuple: str """ super(ImportVersionWarning, self).__init__(actual_version_tuple, minimum_version_tuple) self.actual_tuple = actual_version_tuple self.minimum_tuple = minimum_version_tuple def __str__(self): actual_s = '.'.join(map(str, self.actual_tuple)) minimum_s = '.'.join(map(str, self.minimum_tuple)) return import_version_message_fmt.format(actual_s, minimum_s) pybel-0.12.1/src/pybel/io/extras.py000066400000000000000000000064301334645200200171010ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains IO functions for outputting BEL graphs to lossy formats, such as GraphML and CSV.""" from __future__ import print_function import json import logging import networkx as nx from ..constants import FUNCTION, NAME, NAMESPACE, RELATION from ..struct import BELGraph __all__ = [ 'to_graphml', 'to_csv', 'to_sif', 'to_gsea', ] log = logging.getLogger(__name__) def to_graphml(graph, file): """Write this graph to GraphML XML file using :func:`networkx.write_graphml`. The .graphml file extension is suggested so Cytoscape can recognize it. :param BELGraph graph: A BEL graph :param file file: A file or file-like object """ graph = nx.MultiDiGraph() for node in graph: graph.add_node(node.as_bel(), function=node.function) for u, v, key, node in graph.edges(data=True, keys=True): graph.add_edge( u.as_bel(), v.as_bel(), key=key, interaction=node[RELATION], ) nx.write_graphml(graph, file) def to_csv(graph, file=None, sep='\t'): """Write the graph as a tab-separated edge list. The resulting file will contain the following columns: 1. Source BEL term 2. Relation 3. Target BEL term 4. Edge data dictionary See the Data Models section of the documentation for which data are stored in the edge data dictionary, such as queryable information about transforms on the subject and object and their associated metadata. :param BELGraph graph: A BEL graph :param file file: A writable file or file-like. Defaults to stdout. :param str sep: The separator. Defaults to tab. """ for u, v, data in graph.edges(data=True): print( graph.edge_to_bel(u, v, data=data, sep=sep), json.dumps(data), sep=sep, file=file ) def to_sif(graph, file=None, sep='\t'): """Write the graph as a tab-separated SIF file. The resulting file will contain the following columns: 1. Source BEL term 2. Relation 3. Target BEL term This format is simple and can be used readily with many applications, but is lossy in that it does not include relation metadata. :param BELGraph graph: A BEL graph :param file file: A writable file or file-like. Defaults to stdout. :param str sep: The separator. Defaults to tab. """ for u, v, data in graph.edges(data=True): print( graph.edge_to_bel(u, v, data=data, sep=sep), file=file ) def to_gsea(graph, file=None): """Write the genes/gene products to a GRP file for use with GSEA gene set enrichment analysis. :param BELGraph graph: A BEL graph :param file file: A writeable file or file-like object. Defaults to stdout. .. seealso:: - GRP `format specification `_ - GSEA `publication `_ """ print('# {}'.format(graph.name), file=file) nodes = { data[NAME] for data in graph if NAMESPACE in data and data[NAMESPACE] == 'HGNC' and NAME in data } for node in sorted(nodes): print(node, file=file) pybel-0.12.1/src/pybel/io/gpickle.py000066400000000000000000000050041334645200200172050ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Conversion functions for BEL graphs with bytes and Python pickles.""" from networkx import read_gpickle, write_gpickle from six.moves.cPickle import HIGHEST_PROTOCOL, dumps, loads from .utils import raise_for_not_bel, raise_for_old_graph __all__ = [ 'to_bytes', 'from_bytes', 'to_pickle', 'from_pickle', ] def to_bytes(graph, protocol=HIGHEST_PROTOCOL): """Converts a graph to bytes with pickle. Note that the pickle module has some incompatibilities between Python 2 and 3. To export a universally importable pickle, choose 0, 1, or 2. :param BELGraph graph: A BEL network :param int protocol: Pickling protocol to use. Defaults to ``HIGHEST_PROTOCOL``. :return: Pickled bytes representing the graph :rtype: bytes .. seealso:: https://docs.python.org/3.6/library/pickle.html#data-stream-format """ raise_for_not_bel(graph) return dumps(graph, protocol=protocol) def from_bytes(bytes_graph, check_version=True): """Read a graph from bytes (the result of pickling the graph). :param bytes bytes_graph: File or filename to write :param bool check_version: Checks if the graph was produced by this version of PyBEL :return: A BEL graph :rtype: BELGraph """ graph = loads(bytes_graph) raise_for_not_bel(graph) if check_version: raise_for_old_graph(graph) return graph def to_pickle(graph, file, protocol=HIGHEST_PROTOCOL): """Write this graph to a pickle object with :func:`networkx.write_gpickle`. Note that the pickle module has some incompatibilities between Python 2 and 3. To export a universally importable pickle, choose 0, 1, or 2. :param BELGraph graph: A BEL graph :param file: A file or filename to write to :type file: str or file :param int protocol: Pickling protocol to use. Defaults to ``HIGHEST_PROTOCOL``. .. seealso:: https://docs.python.org/3.6/library/pickle.html#data-stream-format """ raise_for_not_bel(graph) write_gpickle(graph, file, protocol=protocol) def from_pickle(path, check_version=True): """Read a graph from a gpickle file. :param path: File or filename to read. Filenames ending in .gz or .bz2 will be uncompressed. :type path: str or file :param bool check_version: Checks if the graph was produced by this version of PyBEL :return: A BEL graph :rtype: BELGraph """ graph = read_gpickle(path) raise_for_not_bel(graph) if check_version: raise_for_old_graph(graph) return graph pybel-0.12.1/src/pybel/io/indra.py000066400000000000000000000075721334645200200167000ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Conversion functions for BEL graphs with INDRA. After assembling a model with `INDRA `_, a list of :class:`indra.statements.Statement` can be converted to a :class:`pybel.BELGraph` with :class:`indra.assemblers.pybel.PybelAssembler`. .. code-block:: python from indra.assemblers.pybel import PybelAssembler import pybel stmts = [ # A list of INDRA statements ] pba = PybelAssembler( stmts, name='Graph Name', version='0.0.1', description='Graph Description' ) graph = pba.make_model() # Write to BEL file pybel.to_bel_path(belgraph, 'simple_pybel.bel') .. warning:: These functions are hard to unit test because they rely on a whole set of java dependencies and will likely not be for a while. """ import warnings from six.moves.cPickle import load __all__ = [ 'from_indra_statements', 'from_indra_pickle', 'to_indra_statements', 'from_biopax', ] def from_indra_statements(stmts, name=None, version=None, description=None, authors=None, contact=None, license=None, copyright=None, disclaimer=None): """Import a model from :mod:`indra`. :param list[indra.statements.Statement] stmts: A list of statements :param str name: The graph's name :param str version: The graph's version. Recommended to use `semantic versioning `_ or ``YYYYMMDD`` format. :param str description: A description of the graph :param str authors: The authors of this graph :param str contact: The contact email for this graph :param str license: The license for this graph :param str copyright: The copyright for this graph :param str disclaimer: The disclaimer for this graph :rtype: pybel.BELGraph """ from indra.assemblers.pybel import PybelAssembler pba = PybelAssembler( stmts=stmts, name=name, version=version, description=description, authors=authors, contact=contact, license=license, copyright=copyright, disclaimer=disclaimer, ) graph = pba.make_model() return graph def from_indra_pickle(path, name=None, version=None, description=None): """Import a model from :mod:`indra`. :param str path: Path to pickled list of :class:`indra.statements.Statement` :param str name: The name for the BEL graph :param str version: The version of the BEL graph :param str description: The description of the BEL graph :rtype: pybel.BELGraph """ with open(path, 'rb') as f: statements = load(f) return from_indra_statements( stmts=statements, name=name, version=version, description=description ) def to_indra_statements(graph): """Export this graph as a list of INDRA statements using `indra.sources.pybel.PybelProcessor`. :param pybel.BELGraph graph: A BEL graph :rtype: list[indra.statements.Statement] """ warnings.warn('export to INDRA is not yet complete') from indra.sources.bel import process_pybel_graph pbp = process_pybel_graph(graph) return pbp.statements def from_biopax(path, name=None, version=None, description=None): """Import a model encoded in Pathway Commons `BioPAX `_ via :mod:`indra`. :param str path: Path to a BioPAX OWL file :param str name: The name for the BEL graph :param str version: The version of the BEL graph :param str description: The description of the BEL graph :rtype: pybel.BELGraph .. warning:: Not compatible with all BioPAX! See INDRA documentation. """ from indra.sources.biopax import process_owl model = process_owl(path) return from_indra_statements( stmts=model.statements, name=name, version=version, description=description ) pybel-0.12.1/src/pybel/io/jgif.py000066400000000000000000000331251334645200200165130ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Conversion functions for BEL graphs with JGIF JSON. The JSON Graph Interchange Format (JGIF) is `specified `_ similarly to the Node-Link JSON. Interchange with this format provides compatibilty with other software and repositories, such as the `Causal Biological Network Database `_. """ import logging from collections import defaultdict from operator import methodcaller from pyparsing import ParseException from ..constants import ( ANNOTATIONS, CITATION, CITATION_REFERENCE, CITATION_TYPE, EVIDENCE, FUNCTION, METADATA_AUTHORS, METADATA_CONTACT, METADATA_INSERT_KEYS, METADATA_LICENSES, RELATION, UNQUALIFIED_EDGES, ) from ..parser import BELParser from ..parser.exc import NakedNameWarning from ..struct import BELGraph __all__ = [ 'from_cbn_jgif', 'from_jgif', 'to_jgif', ] log = logging.getLogger(__name__) annotation_map = { 'tissue': 'Tissue', 'disease': 'Disease', 'species_common_name': 'Species', 'cell': 'Cell', } species_map = { 'human': '9606', 'rat': '10116', 'mouse': '10090', } placeholder_evidence = "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting." EXPERIMENT_CONTEXT = 'experiment_context' def reformat_citation(citation): """Reformat a citation dictionary. :type citation: dict[str,str] :rtype: dict[str,str] """ return { CITATION_TYPE: citation['type'].strip(), CITATION_REFERENCE: citation['id'].strip() } def map_cbn(d): """Pre-processes the JSON from the CBN. - removes statements without evidence, or with placeholder evidence :param dict d: Raw JGIF from the CBN :return: Preprocessed JGIF :rtype: dict """ for i, edge in enumerate(d['graph']['edges']): if 'metadata' not in d['graph']['edges'][i]: continue if 'evidences' not in d['graph']['edges'][i]['metadata']: continue for j, evidence in enumerate(d['graph']['edges'][i]['metadata']['evidences']): if EXPERIMENT_CONTEXT not in evidence: continue # ctx = {k.strip().lower(): v.strip() for k, v in evidence[EXPERIMENT_CONTEXT].items() if v.strip()} new_context = {} for key, value in evidence[EXPERIMENT_CONTEXT].items(): if not value: log.debug('key %s without value', key) continue value = value.strip() if not value: log.debug('key %s without value', key) continue key = key.strip().lower() if key == 'species_common_name': new_context['Species'] = species_map[value.lower()] elif key in annotation_map: new_context[annotation_map[key]] = value else: new_context[key] = value ''' for k, v in annotation_map.items(): if k not in ctx: continue d['graph']['edges'][i]['metadata']['evidences'][j][EXPERIMENT_CONTEXT][v] = ctx[k] del d['graph']['edges'][i]['metadata']['evidences'][j][EXPERIMENT_CONTEXT][k] if 'species_common_name' in ctx: species_name = ctx['species_common_name'].strip().lower() d['graph']['edges'][i]['metadata']['evidences'][j][EXPERIMENT_CONTEXT]['Species'] = species_map[ species_name] del d['graph']['edges'][i]['metadata']['evidences'][j][EXPERIMENT_CONTEXT][ 'species_common_name'] ''' d['graph']['edges'][i]['metadata']['evidences'][j][EXPERIMENT_CONTEXT] = new_context return d def from_cbn_jgif(graph_jgif_dict): """Build a BEL graph from CBN JGIF. Map the JGIF used by the Causal Biological Network Database to standard namespace and annotations, then builds a BEL graph using :func:`pybel.from_jgif`. :param dict graph_jgif_dict: The JSON object representing the graph in JGIF format :rtype: BELGraph Example: >>> import requests >>> from pybel import from_cbn_jgif >>> apoptosis_url = 'http://causalbionet.com/Networks/GetJSONGraphFile?networkId=810385422' >>> graph_jgif_dict = requests.get(apoptosis_url).json() >>> graph = from_cbn_jgif(graph_jgif_dict) .. warning:: Handling the annotations is not yet supported, since the CBN documents do not refer to the resources used to create them. This may be added in the future, but the annotations must be stripped from the graph before uploading to the network store using :func:`pybel.struct.mutation.strip_annotations` """ graph_jgif_dict = map_cbn(graph_jgif_dict) graph_jgif_dict['graph']['metadata'].update({ METADATA_AUTHORS: 'Causal Biological Networks Database', METADATA_LICENSES: """ Please cite: - www.causalbionet.com - https://bionet.sbvimprover.com as well as any relevant publications. The sbv IMPROVER project, the website and the Symposia are part of a collaborative project designed to enable scientists to learn about and contribute to the development of a new crowd sourcing method for verification of scientific data and results. The current challenges, website and biological network models were developed and are maintained as part of a collaboration among Selventa, OrangeBus and ADS. The project is led and funded by Philip Morris International. For more information on the focus of Philip Morris International’s research, please visit www.pmi.com. """.replace('\n', '\t'), METADATA_CONTACT: 'CausalBiologicalNetworks.RD@pmi.com', }) graph = from_jgif(graph_jgif_dict) graph.namespace_url.update({ 'HGNC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20150601.belns', 'GOBP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/go-biological-process-20150601.belns', 'SFAM': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/selventa-protein-families/selventa-protein-families-20150601.belns', 'GOCC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-cellular-component/go-cellular-component-20170511.belns', 'MESHPP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/mesh-processes/mesh-processes-20150601.belns', 'MGI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/mgi-mouse-genes/mgi-mouse-genes-20150601.belns', 'RGD': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/rgd-rat-genes/rgd-rat-genes-20150601.belns', 'CHEBI': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20150601.belns', 'SCHEM': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/selventa-legacy-chemicals/selventa-legacy-chemicals-20150601.belns', 'EGID': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/entrez-gene-ids/entrez-gene-ids-20150601.belns', 'MESHD': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/mesh-diseases/mesh-diseases-20150601.belns', 'SDIS': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/selventa-legacy-diseases/selventa-legacy-diseases-20150601.belns', 'SCOMP': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/selventa-named-complexes/selventa-named-complexes-20150601.belns', 'MESHC': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/mesh-chemicals/mesh-chemicals-20170511.belns', 'GOBPID': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process-ids/go-biological-process-ids-20150601.belns', 'MESHCS': 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/mesh-cell-structures/mesh-cell-structures-20150601.belns', }) graph.annotation_url.update({ 'Cell': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/cell-line/cell-line-20150601.belanno', 'Disease': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/disease/disease-20150601.belanno', 'Species': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/species-taxonomy-id/species-taxonomy-id-20170511.belanno', 'Tissue': 'https://arty.scai.fraunhofer.de/artifactory/bel/annotation/mesh-anatomy/mesh-anatomy-20150601.belanno', }) return graph def from_jgif(graph_jgif_dict): """Build a BEL graph from a JGIF JSON object. :param dict graph_jgif_dict: The JSON object representing the graph in JGIF format :rtype: BELGraph """ graph = BELGraph() root = graph_jgif_dict['graph'] if 'label' in root: graph.name = root['label'] if 'metadata' in root: metadata = root['metadata'] for key in METADATA_INSERT_KEYS: if key in metadata: graph.document[key] = metadata[key] parser = BELParser(graph) parser.bel_term.addParseAction(parser.handle_term) for node in root['nodes']: node_label = node.get('label') if node_label is None: log.warning('node missing label: %s', node) continue try: parser.bel_term.parseString(node_label) except NakedNameWarning as e: log.info('Naked name: %s', e) except ParseException: log.info('Parse exception for %s', node_label) for i, edge in enumerate(root['edges']): relation = edge.get('relation') if relation is None: log.warning('no relation for edge: %s', edge) if relation in {'actsIn', 'translocates'}: continue # don't need legacy BEL format edge_metadata = edge.get('metadata') if edge_metadata is None: log.warning('no metadata for edge: %s', edge) continue bel_statement = edge.get('label') if bel_statement is None: log.debug('No BEL statement for edge %s', edge) evidences = edge_metadata.get('evidences') if relation in UNQUALIFIED_EDGES: pass # FIXME? else: if not evidences: # is none or is empty list log.debug('No evidence for edge %s', edge) continue for evidence in evidences: citation = evidence.get('citation') if not citation: continue if 'type' not in citation or 'id' not in citation: continue summary_text = evidence['summary_text'].strip() if not summary_text or summary_text == placeholder_evidence: continue parser.control_parser.clear() parser.control_parser.citation = reformat_citation(citation) parser.control_parser.evidence = summary_text parser.control_parser.annotations.update(evidence[EXPERIMENT_CONTEXT]) try: parser.parseString(bel_statement, line_number=i) except Exception as e: log.warning('JGIF relation parse error: %s for %s', e, bel_statement) return graph def to_jgif(graph): """Build a JGIF dictionary from a BEL graph. :param pybel.BELGraph graph: A BEL graph :return: A JGIF dictionary :rtype: dict .. warning:: Untested! This format is not general purpose and is therefore time is not heavily invested. If you want to use Cytoscape.js, we suggest using :func:`pybel.to_cx` instead. Example: >>> import pybel, os, json >>> graph_url = 'https://arty.scai.fraunhofer.de/artifactory/bel/knowledge/selventa-small-corpus/selventa-small-corpus-20150611.bel' >>> graph = pybel.from_url(graph_url) >>> graph_jgif_json = pybel.to_jgif(graph) >>> with open(os.path.expanduser('~/Desktop/small_corpus.json'), 'w') as f: ... json.dump(graph_jgif_json, f) """ node_bel = {} u_v_r_bel = {} nodes_entry = [] edges_entry = [] for i, node in enumerate(sorted(graph, key=methodcaller('as_bel'))): node_bel[node] = bel = node.as_bel() nodes_entry.append({ 'id': bel, 'label': bel, 'nodeId': i, 'bel_function_type': node[FUNCTION], 'metadata': {} }) for u, v in graph.edges(): relation_evidences = defaultdict(list) for data in graph[u][v].values(): if (u, v, data[RELATION]) not in u_v_r_bel: u_v_r_bel[u, v, data[RELATION]] = graph.edge_to_bel(u, v, data=data) bel = u_v_r_bel[u, v, data[RELATION]] evidence_dict = { 'bel_statement': bel, } if ANNOTATIONS in data: evidence_dict['experiment_context'] = data[ANNOTATIONS] if EVIDENCE in data: evidence_dict['summary_text'] = data[EVIDENCE] if CITATION in data: evidence_dict['citation'] = data[CITATION] relation_evidences[data[RELATION]].append(evidence_dict) for relation, evidences in relation_evidences.items(): edges_entry.append({ 'source': node_bel[u], 'target': node_bel[v], 'relation': relation, 'label': u_v_r_bel[u, v, relation], 'metadata': { 'evidences': evidences } }) return { 'graph': { 'metadata': graph.document, 'nodes': nodes_entry, 'edges': edges_entry } } pybel-0.12.1/src/pybel/io/line_utils.py000066400000000000000000000220641334645200200177430ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains helper functions for reading BEL scripts.""" import logging import re import time import six from pyparsing import ParseException from sqlalchemy.exc import OperationalError from tqdm import tqdm from ..constants import GRAPH_METADATA, INVERSE_DOCUMENT_KEYS, REQUIRED_METADATA from ..exceptions import PyBELWarning from ..manager import Manager from ..parser import BELParser, MetadataParser from ..parser.exc import ( BELSyntaxError, InconsistentDefinitionError, MalformedMetadataException, MissingMetadataException, VersionFormatWarning, ) from ..resources.document import split_file_to_annotations_and_definitions from ..resources.exc import ResourceError log = logging.getLogger(__name__) parse_log = logging.getLogger('pybel.parser') METADATA_LINE_RE = re.compile("(SET\s+DOCUMENT|DEFINE\s+NAMESPACE|DEFINE\s+ANNOTATION)") def parse_lines(graph, lines, manager=None, allow_nested=False, citation_clearing=True, use_tqdm=False, no_identifier_validation=False, disallow_unqualified_translocations=False, **kwargs): """Parse an iterable of lines into this graph. Delegates to :func:`parse_document`, :func:`parse_definitions`, and :func:`parse_statements`. :param BELGraph graph: A BEL graph :param iter[str] lines: An iterable over lines of BEL script :type manager: Optional[Manager] :param bool allow_nested: If true, turns off nested statement failures :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param bool use_tqdm: Use :mod:`tqdm` to show a progress bar? :param bool no_identifier_validation: If true, turns off namespace validation :param bool disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. .. warning:: These options allow concessions for parsing BEL that is either **WRONG** or **UNSCIENTIFIC**. Use them at risk to reproducibility and validity of your results. :param bool allow_naked_names: If true, turns off naked namespace failures :param bool allow_redefinition: If true, doesn't fail on second definition of same name or annotation :param bool allow_definition_failures: If true, allows parsing to continue if a terminology file download/parse fails :param Optional[list[str]] required_annotations: Annotations that are required for all statements """ docs, definitions, statements = split_file_to_annotations_and_definitions(lines) if manager is None: manager = Manager() metadata_parser = MetadataParser( manager, allow_redefinition=kwargs.get('allow_redefinition'), skip_validation=no_identifier_validation, ) parse_document( graph, docs, metadata_parser, ) parse_definitions( graph, definitions, metadata_parser, allow_failures=kwargs.get('allow_definition_failures'), use_tqdm=use_tqdm, ) bel_parser = BELParser( graph=graph, namespace_dict=metadata_parser.namespace_dict, annotation_dict=metadata_parser.annotation_dict, namespace_regex=metadata_parser.namespace_regex, annotation_regex=metadata_parser.annotation_regex, allow_nested=allow_nested, citation_clearing=citation_clearing, skip_validation=no_identifier_validation, allow_naked_names=kwargs.get('allow_naked_names'), disallow_unqualified_translocations=disallow_unqualified_translocations, required_annotations=kwargs.get('required_annotations'), ) parse_statements( graph, statements, bel_parser, use_tqdm=use_tqdm, ) log.info('Network has %d nodes and %d edges', graph.number_of_nodes(), graph.number_of_edges()) def parse_document(graph, lines, metadata_parser): """Parse the lines in the document section of a BEL script. :param BELGraph graph: A BEL graph :param iter[tuple[int, str]] lines: An enumerated iterable over the lines in the document section of a BEL script :param MetadataParser metadata_parser: A metadata parser """ parse_document_start_time = time.time() for line_number, line in lines: try: metadata_parser.parseString(line, line_number=line_number) except VersionFormatWarning as e: parse_log.warning('Line %07d - %s: %s', line_number, e.__class__.__name__, e) graph.add_warning(line_number, line, e) except Exception as e: parse_log.exception('Line %07d - Critical Failure - %s', line_number, line) six.raise_from(MalformedMetadataException(line_number, line), e) for required in REQUIRED_METADATA: required_metadatum = metadata_parser.document_metadata.get(required) if required_metadatum is not None: continue required_metadatum_key = INVERSE_DOCUMENT_KEYS[required] graph.warnings.insert(0, (0, '', MissingMetadataException(required_metadatum_key), {})) log.error('Missing required document metadata: %s', required_metadatum_key) graph.document.update(metadata_parser.document_metadata) log.info('Finished parsing document section in %.02f seconds', time.time() - parse_document_start_time) def parse_definitions(graph, lines, metadata_parser, allow_failures=False, use_tqdm=False): """Parse the lines in the definitions section of a BEL script. :param pybel.BELGraph graph: A BEL graph :param iter[tuple[int,str]] lines: An enumerated iterable over the lines in the definitions section of a BEL script :param MetadataParser metadata_parser: A metadata parser :param bool allow_failures: If true, allows parser to continue past strange failures :param bool use_tqdm: Use :mod:`tqdm` to show a progress bar? :raises: pybel.parser.parse_exceptions.InconsistentDefinitionError :raises: pybel.resources.exc.ResourceError :raises: sqlalchemy.exc.OperationalError """ parse_definitions_start_time = time.time() if use_tqdm: lines = tqdm(list(lines), desc='Definitions') for line_number, line in lines: try: metadata_parser.parseString(line, line_number=line_number) except InconsistentDefinitionError as e: parse_log.exception('Line %07d - Critical Failure - %s', line_number, line) raise e except ResourceError as e: parse_log.warning("Line %07d - Can't use resource - %s", line_number, line) raise e except OperationalError as e: parse_log.warning('Need to upgrade database. See ' 'http://pybel.readthedocs.io/en/latest/installation.html#upgrading') raise e except Exception as e: if not allow_failures: parse_log.warning('Line %07d - Critical Failure - %s', line_number, line) six.raise_from(MalformedMetadataException(line_number, line), e) graph.namespace_url.update(metadata_parser.namespace_url_dict) graph.namespace_pattern.update(metadata_parser.namespace_regex) graph.annotation_url.update(metadata_parser.annotation_url_dict) graph.annotation_pattern.update(metadata_parser.annotation_regex) graph.annotation_list.update({ keyword: metadata_parser.annotation_dict[keyword] for keyword in metadata_parser.annotation_lists }) graph.uncached_namespaces.update(metadata_parser.uncachable_namespaces) log.info('Finished parsing definitions section in %.02f seconds', time.time() - parse_definitions_start_time) def parse_statements(graph, lines, bel_parser, use_tqdm=False): """Parse a list of statements from a BEL Script. :param BELGraph graph: A BEL graph :param iter[tuple[int,str]] lines: An enumerated iterable over the lines in the statements section of a BEL script :param BELParser bel_parser: A BEL parser :param bool use_tqdm: Use :mod:`tqdm` to show a progress bar? Requires reading whole file to memory. """ parse_statements_start_time = time.time() if use_tqdm: lines = tqdm(list(lines), desc='Statements') for line_number, line in lines: try: bel_parser.parseString(line, line_number=line_number) except ParseException as e: parse_log.error('Line %07d - General Parser Failure: %s', line_number, line) graph.add_warning(line_number, line, BELSyntaxError(line_number, line, e.loc), bel_parser.get_annotations()) except PyBELWarning as e: parse_log.warning('Line %07d - %s: %s', line_number, e.__class__.__name__, e) graph.add_warning(line_number, line, e, bel_parser.get_annotations()) except Exception as e: parse_log.exception('Line %07d - General Failure: %s', line_number, line) graph.add_warning(line_number, line, e, bel_parser.get_annotations()) log.info('Parsed statements section in %.02f seconds with %d warnings', time.time() - parse_statements_start_time, len(graph.warnings)) pybel-0.12.1/src/pybel/io/lines.py000066400000000000000000000106251334645200200167060ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains IO functions for BEL scripts.""" import codecs import logging import os from .line_utils import parse_lines from ..resources.utils import download from ..struct import BELGraph __all__ = [ 'from_lines', 'from_path', 'from_url' ] log = logging.getLogger(__name__) def from_lines(lines, manager=None, allow_nested=False, citation_clearing=True, use_tqdm=False, disallow_unqualified_translocations=False, **kwargs): """Load a BEL graph from an iterable over the lines of a BEL script. :param iter[str] lines: An iterable of strings (the lines in a BEL script) :type manager: Optional[pybel.manager.Manager] :param bool allow_nested: if true, turn off nested statement failures :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param bool use_tqdm: If true, use tqdm for logging :param bool disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :rtype: BELGraph The remaining keyword arguments to :func:`pybel.io.line_utils.parse_lines`. """ graph = BELGraph() parse_lines( graph=graph, lines=lines, manager=manager, allow_nested=allow_nested, citation_clearing=citation_clearing, use_tqdm=use_tqdm, disallow_unqualified_translocations=disallow_unqualified_translocations, **kwargs ) return graph def from_path(path, manager=None, allow_nested=False, citation_clearing=True, encoding='utf-8', use_tqdm=False, disallow_unqualified_translocations=False, **kwargs): """Load a BEL graph from a file resource. This function is a thin wrapper around :func:`from_lines`. :param str path: A file path :type manager: Optional[pybel.manager.Manager] :param bool allow_nested: if true, turn off nested statement failures :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param str encoding: the encoding to use when reading this file. Is passed to :code:`codecs.open`. See the python `docs `_ for a list of standard encodings. For example, files starting with a UTF-8 BOM should use :code:`utf_8_sig` :param bool use_tqdm: If true, use tqdm for logging :param bool disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :rtype: pybel.BELGraph The remaining keyword arguments to :func:`pybel.io.line_utils.parse_lines`. """ log.info('Loading from path: %s', path) with codecs.open(os.path.expanduser(path), encoding=encoding) as file: return from_lines( lines=file, manager=manager, allow_nested=allow_nested, citation_clearing=citation_clearing, use_tqdm=use_tqdm, disallow_unqualified_translocations=disallow_unqualified_translocations, **kwargs ) def from_url(url, manager=None, allow_nested=False, citation_clearing=True, use_tqdm=False, **kwargs): """Load a BEL graph from a URL resource. This function is a thin wrapper around :func:`from_lines`. :param str url: A valid URL pointing to a BEL resource :param manager: database connection string to cache, pre-built :class:`Manager`, or None to use default cache :type manager: None or str or pybel.manager.Manager :param bool allow_nested: if true, turn off nested statement failures :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param bool use_tqdm: If true, use tqdm for logging :param dict kwargs: keyword arguments to :func:`pybel.io.line_utils.parse_lines` :rtype: BELGraph """ log.info('Loading from url: %s', url) res = download(url) lines = (line.decode('utf-8') for line in res.iter_lines()) return from_lines( lines=lines, manager=manager, allow_nested=allow_nested, citation_clearing=citation_clearing, use_tqdm=use_tqdm, **kwargs ) pybel-0.12.1/src/pybel/io/neo4j.py000066400000000000000000000055421334645200200166150ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Output functions for BEL graphs to Neo4j.""" from six import string_types from tqdm import tqdm from ..constants import ( ANNOTATIONS, CITATION, CITATION_REFERENCE, CITATION_TYPE, EVIDENCE, FUSION, MEMBERS, NAMESPACE, OBJECT, RELATION, SUBJECT, VARIANTS, ) from ..utils import flatten_dict __all__ = [ 'to_neo4j', ] def to_neo4j(graph, neo_connection, use_tqdm=False): """Upload a BEL graph to a Neo4j graph database using :mod:`py2neo`. :param pybel.BELGraph graph: A BEL Graph :param neo_connection: A :mod:`py2neo` connection object. Refer to the `py2neo documentation `_ for how to build this object. :type neo_connection: str or py2neo.Graph Example Usage: >>> import py2neo >>> import pybel >>> from pybel.examples import sialic_acid_graph >>> neo_graph = py2neo.Graph("http://localhost:7474/db/data/") # use your own connection settings >>> pybel.to_neo4j(sialic_acid_graph, neo_graph) """ import py2neo if isinstance(neo_connection, string_types): neo_connection = py2neo.Graph(neo_connection) tx = neo_connection.begin() node_map = {} nodes = list(graph) if use_tqdm: nodes = tqdm(nodes, desc='nodes') for node in nodes: if NAMESPACE not in node or VARIANTS in node or MEMBERS in node or FUSION in node: attrs = {'name': node.as_bel()} else: attrs = {'namespace': node.namespace} if node.name and node.identifier: attrs['name'] = node.name attrs['identifier'] = node.identifier elif node.identifier and not node.name: attrs['name'] = node.identifier elif node.name and not node.identifier: attrs['name'] = node.name node_map[node] = py2neo.Node(node.function, **attrs) tx.create(node_map[node]) edges = graph.edges(keys=True, data=True) if use_tqdm: edges = tqdm(edges, desc='edges') for u, v, key, node in edges: rel_type = node[RELATION] d = node.copy() del d[RELATION] attrs = {} annotations = d.pop(ANNOTATIONS, None) if annotations: for annotation, values in annotations.items(): attrs[annotation] = list(values) citation = d.pop(CITATION, None) if citation: attrs[CITATION] = '{}:{}'.format(citation[CITATION_TYPE], citation[CITATION_REFERENCE]) if EVIDENCE in d: attrs[EVIDENCE] = d[EVIDENCE] for side in (SUBJECT, OBJECT): side_data = d.get(side) if side_data: attrs.update(flatten_dict(side_data, parent_key=side)) rel = py2neo.Relationship(node_map[u], rel_type, node_map[v], key=key, **attrs) tx.create(rel) tx.commit() pybel-0.12.1/src/pybel/io/nodelink.py000066400000000000000000000126511334645200200174000ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Conversion functions for BEL graphs with Node-Link JSON.""" import json import os from operator import itemgetter, methodcaller from itertools import chain, count from .utils import ensure_version from ..constants import GRAPH_ANNOTATION_LIST, GRAPH_UNCACHED_NAMESPACES from ..struct import BELGraph from ..tokens import parse_result_to_dsl __all__ = [ 'to_json', 'to_json_file', 'to_json_path', 'to_jsons', 'from_json', 'from_json_file', 'from_json_path', 'from_jsons', ] def to_json(graph): """Convert this graph to a Node-Link JSON object. :param BELGraph graph: A BEL graph :return: A Node-Link JSON object representing the given graph :rtype: dict """ graph_json_dict = node_link_data(graph) # Convert annotation list definitions (which are sets) to canonicalized/sorted lists graph_json_dict['graph'][GRAPH_ANNOTATION_LIST] = { keyword: list(sorted(values)) for keyword, values in graph_json_dict['graph'][GRAPH_ANNOTATION_LIST].items() } # Convert set to list graph_json_dict['graph'][GRAPH_UNCACHED_NAMESPACES] = list(graph_json_dict['graph'][GRAPH_UNCACHED_NAMESPACES]) return graph_json_dict def to_json_path(graph, path, **kwargs): """Write this graph to the given path as a Node-Link JSON. :param BELGraph graph: A BEL graph :param str path: A file path """ with open(os.path.expanduser(path), 'w') as f: return to_json_file(graph, file=f, **kwargs) def to_json_file(graph, file, **kwargs): """Write this graph as Node-Link JSON to a file. :param BELGraph graph: A BEL graph :param file file: A write-supporting file or file-like object """ graph_json_dict = to_json(graph) json.dump(graph_json_dict, file, ensure_ascii=False, **kwargs) def to_jsons(graph, **kwargs): """Dump this graph as a Node-Link JSON object to a string. :param BELGraph graph: A BEL graph :return: A string representation of the Node-Link JSON produced for this graph by :func:`pybel.to_json` :rtype: str """ graph_json_str = to_json(graph) return json.dumps(graph_json_str, ensure_ascii=False, **kwargs) def from_json(graph_json_dict, check_version=True): """Build a graph from Node-Link JSON Object. :param dict graph_json_dict: A JSON dictionary representing a graph :param bool check_version: Checks if the graph was produced by this version of PyBEL :rtype: BELGraph """ graph = node_link_graph(graph_json_dict) return ensure_version(graph, check_version=check_version) def from_json_path(path, check_version=True): """Build a graph from a file containing Node-Link JSON. :param str path: A file path. Expands user. :param bool check_version: Checks if the graph was produced by this version of PyBEL :rtype: BELGraph """ with open(os.path.expanduser(path)) as f: return from_json_file(f, check_version=check_version) def from_json_file(file, check_version=True): """Build a graph from the Node-Link JSON contained in the given file. :param file file: A readable file or file-like :param bool check_version: Checks if the graph was produced by this version of PyBEL :rtype: BELGraph """ graph_json_dict = json.load(file) return from_json(graph_json_dict, check_version=check_version) def from_jsons(graph_json_str, check_version=True): """Read a BEL graph from a Node-Link JSON string. :param str graph_json_str: A Node-Link JSON string produced by PyBEL :param bool check_version: Checks if the graph was produced by this version of PyBEL :rtype: BELGraph """ graph_json_dict = json.loads(graph_json_str) return from_json(graph_json_dict, check_version=check_version) def node_link_data(graph): """Convert a BEL graph to a node-link format. Adapted from :func:`networkx.readwrite.json_graph.node_link_data` :param pybel.BELGraph graph: :rtype: dict """ nodes = sorted(graph, key=methodcaller('as_bel')) mapping = dict(zip(nodes, count())) return { 'directed': True, 'multigraph': True, 'graph': graph.graph, 'nodes': [ _augment_node_with_sha512(node) for node in nodes ], 'links': [ dict(chain( data.items(), [('source', mapping[u]), ('target', mapping[v]), ('key', key)] )) for u, v, key, data in graph.edges(keys=True, data=True) ] } def _augment_node_with_sha512(node): """ :type node: BaseEntity :rtype: dict """ v = node.copy() v['id'] = node.as_sha512() return v def node_link_graph(data): """Return graph from node-link data format. Adapted from :func:`networkx.readwrite.json_graph.node_link_graph` :param dict data: :rtype: BELGraph """ graph = BELGraph() graph.graph = data.get('graph', {}) mapping = [] for node_data in data['nodes']: _dsl = parse_result_to_dsl(node_data) node = graph.add_node_from_data(_dsl) mapping.append(node) for data in data['links']: src = data['source'] tgt = data['target'] key = data['key'] u = mapping[src] v = mapping[tgt] edgedata = { k: v for k, v in data.items() if k not in {'source', 'target', 'key'} } graph.add_edge(u, v, key=key, **edgedata) return graph pybel-0.12.1/src/pybel/io/utils.py000066400000000000000000000027761334645200200167440ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains helper functions for other IO functions.""" from .exc import ImportVersionWarning from ..constants import PYBEL_MINIMUM_IMPORT_VERSION from ..struct import BELGraph from ..utils import tokenize_version def raise_for_old_graph(graph): """Raise an ImportVersionWarning if the BEL graph was produced by a legacy version of PyBEL. :raises ImportVersionWarning: If the BEL graph was produced by a legacy version of PyBEL """ graph_version = tokenize_version(graph.pybel_version) if graph_version < PYBEL_MINIMUM_IMPORT_VERSION: raise ImportVersionWarning(graph_version, PYBEL_MINIMUM_IMPORT_VERSION) def raise_for_not_bel(graph): """Raise a TypeError if the argument is not a BEL graph. :raises TypeError: If the argument is not a BEL graph """ if not isinstance(graph, BELGraph): raise TypeError('Not a BELGraph: {}'.format(graph)) def ensure_version(graph, check_version=True): """Ensure that the graph was produced by a minimum of PyBEL v:data:`PYBEL_MINIMUM_IMPORT_VERSION`. This variable is defined by last release with a change in the graph data definition. :param BELGraph graph: A BEL Graph :param bool check_version: Should the version be checked, or should the graph just be returned without inspection :rtype: BELGraph :raises ImportVersionWarning: If the BEL graph was produced by a legacy version of PyBEL """ if check_version: raise_for_old_graph(graph) return graph pybel-0.12.1/src/pybel/io/web.py000066400000000000000000000066111334645200200163510ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module facilitates rudimentary data exchange with `BEL Commons `_.""" import logging import os import requests from .nodelink import from_json, to_json from ..constants import DEFAULT_SERVICE_URL, PYBEL_REMOTE_HOST, PYBEL_REMOTE_PASSWORD, PYBEL_REMOTE_USER, config from ..utils import get_version __all__ = [ 'to_web', 'from_web', ] log = logging.getLogger(__name__) RECIEVE_ENDPOINT = '/api/receive/' GET_ENDPOINT = '/api/network/{}/export/nodelink' def _get_config_or_env(name): return config.get(name) or os.environ.get(name) def _get_host(): """Find the host. Has three possibilities: 1. The PyBEL config entry ``PYBEL_REMOTE_HOST``, loaded in :mod:`pybel.constants` 2. The environment variable ``PYBEL_REMOTE_HOST`` 3. The default service URL, :data:`pybel.constants.DEFAULT_SERVICE_URL` """ return _get_config_or_env(PYBEL_REMOTE_HOST) or DEFAULT_SERVICE_URL def _get_user(): return _get_config_or_env(PYBEL_REMOTE_USER) def _get_password(): return _get_config_or_env(PYBEL_REMOTE_PASSWORD) def to_web(graph, host=None, user=None, password=None): """Send a graph to the receiver service and returns the :mod:`requests` response object. :param pybel.BELGraph graph: A BEL network :param Optional[str] host: The location of the BEL Commons server. Alternatively, looks up in PyBEL config with ``PYBEL_REMOTE_HOST`` or the environment as ``PYBEL_REMOTE_HOST`` Defaults to :data:`pybel.constants.DEFAULT_SERVICE_URL` :param Optional[str] user: Username for BEL Commons. Alternatively, looks up in PyBEL config with ``PYBEL_REMOTE_USER`` or the environment as ``PYBEL_REMOTE_USER`` :param Optional[str] password: Password for BEL Commons. Alternatively, looks up in PyBEL config with ``PYBEL_REMOTE_PASSWORD`` or the environment as ``PYBEL_REMOTE_PASSWORD`` :return: The response object from :mod:`requests` :rtype: requests.Response """ if host is None: host = _get_host() log.debug('using host: %s', host) if user is None: user = _get_user() if user is None: raise ValueError('no user found') if password is None: password = _get_password() if password is None: raise ValueError('no password found') url = host.rstrip('/') + RECIEVE_ENDPOINT response = requests.post( url, json=to_json(graph), headers={ 'content-type': 'application/json', 'User-Agent': 'PyBEL v{}'.format(get_version()), }, auth=(user, password) ) log.debug('received response: %s', response) return response def from_web(network_id, host=None): """Retrieve a public network from BEL Commons. In the future, this function may be extended to support authentication. :param int network_id: The BEL Commons network identifier :param Optional[str] host: The location of the BEL Commons server. Alternatively, looks up in PyBEL config with ``PYBEL_REMOTE_HOST`` or the environment as ``PYBEL_REMOTE_HOST`` Defaults to :data:`pybel.constants.DEFAULT_SERVICE_URL` :rtype: pybel.BELGraph """ if host is None: host = _get_host() url = host + GET_ENDPOINT.format(network_id) res = requests.get(url) graph_json = res.json() graph = from_json(graph_json) return graph pybel-0.12.1/src/pybel/language.py000066400000000000000000000334061334645200200167520ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Language constants for BEL. This module contains mappings between PyBEL's internal constants and BEL language keywords. """ import logging from .constants import ( ABUNDANCE, BIOPROCESS, COMPLEX, COMPOSITE, GENE, MIRNA, PATHOLOGY, PROTEIN, RNA, TRANSCRIBED_TO, TRANSLATED_TO, ) from .dsl import entity log = logging.getLogger(__name__) #: A dictionary of activity labels used in the ma() function in activity(p(X), ma(Y)) activity_labels = { 'catalyticActivity': 'cat', 'cat': 'cat', 'chaperoneActivity': 'chap', 'chap': 'chap', 'gtpBoundActivity': 'gtp', 'gtp': 'gtp', 'kinaseActivity': 'kin', 'kin': 'kin', 'peptidaseActivity': 'pep', 'pep': 'pep', 'phosphataseActivity': 'phos', 'phos': 'phos', 'ribosylationActivity': 'ribo', 'ribo': 'ribo', 'transcriptionalActivity': 'tscript', 'tscript': 'tscript', 'transportActivity': 'tport', 'tport': 'tport', 'molecularActivity': 'molecularActivity', # Added by PyBEL 'guanineNucleotideExchangeFactorActivity': 'gef', 'gef': 'gef', 'gtpaseActivatingProteinActivity': 'gap', 'gap': 'gap', } #: Maps the default BEL molecular activities to Gene Ontology Molecular Functions activity_mapping = { 'cat': entity(namespace='GO', name='catalytic activity', identifier='GO:0003824'), 'chap': entity(namespace='GO', name='protein binding involved in protein folding', identifier='GO:0044183'), 'gtp': entity(namespace='GO', name='GTP binding', identifier='GO:0005525'), 'kin': entity(namespace='GO', name='kinase activity', identifier='GO:0016301'), 'pep': entity(namespace='GO', name='peptidase activity', identifier='GO:0008233'), 'phos': entity(namespace='GO', name='phosphatase activity', identifier='GO:0016791'), 'ribo': entity(namespace='GO', name='NAD(P)+-protein-arginine ADP-ribosyltransferase activity', identifier='GO:0003956'), 'tscript': entity(namespace='GO', name='nucleic acid binding transcription factor activity', identifier='GO:0001071'), 'tport': entity(namespace='GO', name='transporter activity', identifier='GO:0005215'), 'molecularActivity': entity(namespace='GO', name='molecular_function', identifier='GO:0003674'), 'gef': entity(namespace='GO', name='guanyl-nucleotide exchange factor activity', identifier='GO:0005085'), 'gap': entity(namespace='GO', name='GTPase activating protein binding', identifier='GO:0032794'), } activities = list(activity_labels.keys()) #: Provides a mapping from BEL terms to PyBEL internal constants abundance_labels = { 'abundance': ABUNDANCE, 'a': ABUNDANCE, 'geneAbundance': GENE, 'g': GENE, 'microRNAAbundance': MIRNA, 'm': MIRNA, 'proteinAbundance': PROTEIN, 'p': PROTEIN, 'rnaAbundance': RNA, 'r': RNA, 'biologicalProcess': BIOPROCESS, 'bp': BIOPROCESS, 'pathology': PATHOLOGY, 'path': PATHOLOGY, 'composite': COMPOSITE, 'compositeAbundance': COMPOSITE, 'complex': COMPLEX, 'complexAbundance': COMPLEX } #: Maps the BEL abundance types to the Systems Biology Ontology abundance_sbo_mapping = { MIRNA: entity(namespace='SBO', name='microRNA', identifier='SBO:0000316'), BIOPROCESS: entity(namespace='SBO', name='process', identifier='SBO:0000375'), GENE: entity(namespace='SBO', name='gene', identifier='SBO:0000243'), RNA: entity(namespace='SBO', name='messenger RNA', identifier='SBO:0000278'), COMPLEX: entity(namespace='SBO', name='protein complex', identifier='SBO:0000297'), PATHOLOGY: entity(namespace='SBO', name='phenotype', identifier='SBO:0000358'), } relation_sbo_mapping = { TRANSLATED_TO: entity(namespace='SBO', name='translation', identifier='SBO:0000184'), TRANSCRIBED_TO: entity(namespace='SBO', name='transcription', identifier='SBO:0000183'), } amino_acid_dict = { 'A': 'Ala', 'R': 'Arg', 'N': 'Asn', 'D': 'Asp', 'C': 'Cys', 'E': 'Glu', 'Q': 'Gln', 'G': 'Gly', 'H': 'His', 'I': 'Ile', 'L': 'Leu', 'K': 'Lys', 'M': 'Met', 'F': 'Phe', 'P': 'Pro', 'S': 'Ser', 'T': 'Thr', 'W': 'Trp', 'Y': 'Tyr', 'V': 'Val', } dna_nucleotide_labels = { 'A': 'Adenine', 'T': 'Thymine', 'C': 'Cytosine', 'G': 'Guanine' } rna_nucleotide_labels = { 'a': 'adenine', 'u': 'uracil', 'c': 'cytosine', 'g': 'guanine' } #: A dictionary of default protein modifications to their preferred value pmod_namespace = { 'Ac': 'Ac', 'acetylation': 'Ac', 'ADPRib': 'ADPRib', 'ADP-ribosylation': 'ADPRib', 'adenosine diphosphoribosyl': 'ADPRib', 'Farn': 'Farn', 'farnesylation': 'Farn', 'Gerger': 'Gerger', 'geranylgeranylation': 'Gerger', 'Glyco': 'Glyco', 'glycosylation': 'Glyco', 'Hy': 'Hy', 'hydroxylation': 'Hy', 'ISG': 'ISG', 'ISGylation': 'ISG', 'ISG15-protein conjugation': 'ISG', 'Me': 'Me', 'methylation': 'Me', 'Me1': 'Me1', 'monomethylation': 'Me1', 'mono-methylation': 'Me1', 'Me2': 'Me2', 'dimethylation': 'Me2', 'di-methylation': 'Me2', 'Me3': 'Me3', 'trimethylation': 'Me3', 'tri-methylation': 'Me3', 'Myr': 'Myr', 'myristoylation': 'Myr', 'Nedd': 'Nedd', 'neddylation': 'Nedd', 'NGlyco': 'NGlyco', 'N-linked glycosylation': 'NGlyco', 'NO': 'NO', 'Nitrosylation': 'NO', 'OGlyco': 'OGlyco', 'O-linked glycosylation': 'OGlyco', 'Palm': 'Palm', 'palmitoylation': 'Palm', 'Ph': 'Ph', 'phosphorylation': 'Ph', 'Sulf': 'Sulf', 'sulfation': 'Sulf', 'sulphation': 'Sulf', 'sulfur addition': 'Sulf', 'sulphur addition': 'Sulf', 'sulfonation': 'sulfonation', 'sulphonation': 'sulfonation', 'Sumo': 'Sumo', 'SUMOylation': 'Sumo', 'Ub': 'Ub', 'ubiquitination': 'Ub', 'ubiquitinylation': 'Ub', 'ubiquitylation': 'Ub', 'UbK48': 'UbK48', 'Lysine 48-linked polyubiquitination': 'UbK48', 'UbK63': 'UbK63', 'Lysine 63-linked polyubiquitination': 'UbK63', 'UbMono': 'UbMono', 'monoubiquitination': 'UbMono', 'UbPoly': 'UbPoly', 'polyubiquitination': 'UbPoly', # PyBEL Variants 'Ox': "Ox", 'oxidation': 'Ox', } #: Use Gene Ontology children of GO_0006464: "cellular protein modification process" pmod_mappings = { 'Ac': { 'synonyms': ['Ac', 'acetylation'], 'xrefs': [ entity(namespace='SBO', identifier='SBO:0000215', name='acetylation'), entity(namespace='GO', identifier='GO:0006473', name='protein acetylation'), entity(namespace='MOD', identifier='MOD:00394'), ] }, 'ADPRib': { 'synonyms': ['ADPRib', 'ADP-ribosylation', 'ADPRib', 'ADP-rybosylation', 'adenosine diphosphoribosyl'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006471', name='protein ADP-ribosylation'), entity(namespace='MOD', identifier='MOD:00752'), ] }, 'Farn': { 'synonyms': ['Farn', 'farnesylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0018343', name='protein farnesylation'), entity(namespace='MOD', identifier='MOD:00437'), ] }, 'Gerger': { 'synonyms': ['Gerger', 'geranylgeranylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0018344', name='protein geranylgeranylation'), entity(namespace='MOD', identifier='MOD:00441'), ] }, 'Glyco': { 'synonyms': ['Glyco', 'glycosylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006486', name='protein glycosylation'), entity(namespace='MOD', identifier='MOD:00693'), ] }, 'Hy': { 'synonyms': ['Hy' 'hydroxylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0018126', name='protein hydroxylation'), entity(namespace='MOD', identifier='MOD:00677'), ] }, 'ISG': { 'synonyms': ['ISG', 'ISGylation', 'ISG15-protein conjugation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0032020', name='ISG15-protein conjugation'), ] }, 'Me': { 'synonyms': ['Me', 'methylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006479', name='protein methylation'), entity(namespace='MOD', identifier='MOD:00427'), ] }, 'Me1': { 'synonyms': ['Me1', 'monomethylation', 'mono-methylation'], 'xrefs': [ entity(namespace='MOD', identifier='MOD:00599', name='monomethylated residue'), ] }, 'Me2': { 'synonyms': ['Me2', 'dimethylation', 'di-methylation'], 'xrefs': [ entity(namespace='MOD', identifier='MOD:00429', name='dimethylated residue'), ] }, 'Me3': { 'synonyms': ['Me3', 'trimethylation', 'tri-methylation'], 'xrefs': [ entity(namespace='MOD', identifier='MOD:00430', name='trimethylated residue'), ] }, 'Myr': { 'synonyms': ['Myr', 'myristoylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0018377', name='protein myristoylation'), entity(namespace='MOD', identifier='MOD:00438'), ] }, 'Nedd': { 'synonyms': ['Nedd', 'neddylation', 'RUB1-protein conjugation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0045116', name='protein neddylation'), entity(namespace='MOD', identifier='MOD:01150'), ] }, 'NGlyco': { 'synonyms': ['NGlyco', 'N-linked glycosylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006487', name='protein N-linked glycosylation'), entity(namespace='MOD', identifier='MOD:00006'), ] }, 'NO': { 'synonyms': ['NO', 'Nitrosylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0017014', name='protein nitrosylation'), ] }, 'Ox': { 'synonyms': ["Ox", 'oxidation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0018158', name='protein oxidation'), ] }, 'OGlyco': { 'synonyms': ['OGlyco', 'O-linked glycosylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006493', name='protein O-linked glycosylation'), entity(namespace='MOD', identifier='MOD:00396'), ] }, 'Palm': { 'synonyms': ['Palm', 'palmitoylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0018345', name='protein palmitoylation'), entity(namespace='MOD', identifier='MOD:00440'), ] }, 'Ph': { 'synonyms': ['Ph', 'phosphorylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006468', name='protein phosphorylation'), entity(namespace='MOD', identifier='MOD:00696'), ] }, 'Sulf': { 'synonyms': ['Sulf', 'sulfation', 'sulphation', 'sulfur addition', 'sulphur addition'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006477', name='protein sulfation'), entity(namespace='MOD', identifier='MOD:00695'), ] }, 'sulfonation': { 'synonyms': ['sulfonation', 'sulphonation'], 'xrefs': [ entity(namespace='MOP', identifier='MOP:0000559', name='sulfonation'), ] }, 'Sumo': { 'synonyms': ['Sumo', 'SUMOylation', 'Sumoylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0016925', name='protein sumoylation'), entity(namespace='MOD', identifier='MOD:01149'), ] }, 'Ub': { 'synonyms': ['Ub', 'ubiquitination', 'ubiquitinylation', 'ubiquitylation'], 'xrefs': [ entity(namespace='SBO', identifier='SBO:0000224', name='ubiquitination'), entity(namespace='GO', identifier='GO:0016567', name='protein ubiquitination'), entity(namespace='MOD', identifier='MOD:01148'), ] }, 'UbK48': { 'synonyms': ['UbK48', 'Lysine 48-linked polyubiquitination'], 'xrefs': [ entity(namespace='GO', identifier='GO:0070936', name='protein K48-linked ubiquitination'), ] }, 'UbK63': { 'synonyms': ['UbK63', 'Lysine 63-linked polyubiquitination'], 'xrefs': [ entity(namespace='GO', identifier='GO:0070534', name='protein K63-linked ubiquitination'), ] }, 'UbMono': { 'synonyms': ['UbMono', 'monoubiquitination'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006513', name='protein monoubiquitination'), ] }, 'UbPoly': { 'synonyms': ['UbPoly', 'polyubiquitination'], 'xrefs': [ entity(namespace='GO', identifier='GO:0000209', name='protein polyubiquitination'), ] }, } #: A dictionary of legacy (BEL 1.0) default namespace protein modifications to their BEL 2.0 preferred value pmod_legacy_labels = { 'P': 'Ph', 'A': 'Ac', 'F': 'Farn', 'G': 'Glyco', 'H': 'Hy', 'M': 'Me', 'R': 'ADPRib', 'S': 'Sumo', 'U': 'Ub', 'O': 'Ox' } #: A dictionary of default gene modifications. This is a PyBEL variant to the BEL specification. gmod_namespace = { 'methylation': 'Me', 'Me': 'Me', 'M': 'Me' } #: Use Gene Ontology children of GO_0006304: "DNA modification" gmod_mappings = { 'Me': { 'synonyms': ['Me', 'M', 'methylation'], 'xrefs': [ entity(namespace='GO', identifier='GO:0006306', name='DNA methylation'), ] }, 'ADPRib': { 'synonyms': ['ADPRib'], 'xrefs': [ entity(namespace='GO', identifier='GO:0030592', name='DNA ADP-ribosylation'), ] } } BEL_DEFAULT_NAMESPACE_VERSION = '2.1.0' BEL_DEFAULT_NAMESPACE_URL = 'http://openbel.org/2.1.0.belns' # just needs something unique... will change later pybel-0.12.1/src/pybel/manager/000077500000000000000000000000001334645200200162215ustar00rootroot00000000000000pybel-0.12.1/src/pybel/manager/__init__.py000066400000000000000000000013721334645200200203350ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ The :mod:`pybel.manager` module serves as an interface between the BEL graph data structure and underlying relational databases. Its inclusion allows for the caching of namespaces and annotations for much faster lookup than downloading and parsing upon each compilation. """ from . import base_manager, cache_manager, citation_utils, database_io, make_json_serializable, models, query_manager from .base_manager import * from .cache_manager import * from .citation_utils import * from .database_io import * from .models import * from .query_manager import * __all__ = ( base_manager.__all__ + cache_manager.__all__ + citation_utils.__all__ + database_io.__all__ + models.__all__ + query_manager.__all__ ) pybel-0.12.1/src/pybel/manager/base_manager.py000066400000000000000000000075751334645200200212150ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains the base class for connection managers in SQLAlchemy""" from __future__ import unicode_literals import logging from sqlalchemy import create_engine from sqlalchemy.orm import scoped_session, sessionmaker from .models import Base from ..constants import config __all__ = [ 'BaseManager', 'build_engine_session', ] log = logging.getLogger(__name__) def build_engine_session(connection, echo=False, autoflush=None, autocommit=None, expire_on_commit=None, scopefunc=None): """Build an engine and a session. :param str connection: An RFC-1738 database connection string :param bool echo: Turn on echoing SQL :param Optional[bool] autoflush: Defaults to True if not specified in kwargs or configuration. :param Optional[bool] autocommit: Defaults to False if not specified in kwargs or configuration. :param Optional[bool] expire_on_commit: Defaults to False if not specified in kwargs or configuration. :param scopefunc: Scoped function to pass to :func:`sqlalchemy.orm.scoped_session` :rtype: tuple[Engine,Session] From the Flask-SQLAlchemy documentation: An extra key ``'scopefunc'`` can be set on the ``options`` dict to specify a custom scope function. If it's not provided, Flask's app context stack identity is used. This will ensure that sessions are created and removed with the request/response cycle, and should be fine in most cases. """ if connection is None: raise ValueError('can not build engine when connection is None') engine = create_engine(connection, echo=echo) if autoflush is None: autoflush = config.get('PYBEL_MANAGER_AUTOFLUSH', False) if autocommit is None: autocommit = config.get('PYBEL_MANAGER_AUTOCOMMIT', False) if expire_on_commit is None: expire_on_commit = config.get('PYBEL_MANAGER_AUTOEXPIRE', True) log.debug('auto flush: %s, auto commit: %s, expire on commmit: %s', autoflush, autocommit, expire_on_commit) #: A SQLAlchemy session maker session_maker = sessionmaker( bind=engine, autoflush=autoflush, autocommit=autocommit, expire_on_commit=expire_on_commit, ) #: A SQLAlchemy session object session = scoped_session( session_maker, scopefunc=scopefunc ) return engine, session class BaseManager(object): """A wrapper around a SQLAlchemy engine and session.""" #: The declarative base for this manager base = Base def __init__(self, engine, session): """Instantiate a manager from an engine and session.""" self.engine = engine self.session = session def create_all(self, checkfirst=True): """Create the PyBEL cache's database and tables. :param bool checkfirst: Check if the database exists before trying to re-make it """ self.base.metadata.create_all(bind=self.engine, checkfirst=checkfirst) def drop_all(self, checkfirst=True): """Drop all data, tables, and databases for the PyBEL cache. :param bool checkfirst: Check if the database exists before trying to drop it """ self.session.close() self.base.metadata.drop_all(bind=self.engine, checkfirst=checkfirst) def bind(self): """Bind the metadata to the engine and session.""" self.base.metadata.bind = self.engine self.base.query = self.session.query_property() def _count_model(self, model_cls): """Count the number of models in the database. :rtype: int """ return self.session.query(model_cls).count() def list_citations(self, model_cls): """List the models in the database. :rtype: list """ return self.session.query(model_cls).all() def __repr__(self): return '<{} connection={}>'.format(self.__class__.__name__, self.engine.url) pybel-0.12.1/src/pybel/manager/cache_manager.py000066400000000000000000001615001334645200200213330ustar00rootroot00000000000000# -*- coding: utf-8 -*- """The database manager for PyBEL. Under the hood, PyBEL caches namespace and annotation files for quick recall on later use. The user doesn't need to enable this option, but can specify a database location if they choose. """ from __future__ import unicode_literals import logging from copy import deepcopy import six import time from itertools import chain from six import string_types from sqlalchemy import and_, exists, func from sqlalchemy.orm import aliased from tqdm import tqdm from .base_manager import BaseManager, build_engine_session from .exc import EdgeAddError from .lookup_manager import LookupManager from .models import ( Author, Citation, Edge, Evidence, Modification, Namespace, NamespaceEntry, Network, Node, Property, edge_annotation, edge_property, network_edge, network_node, ) from .query_manager import QueryManager from .utils import extract_shared_optional, extract_shared_required, update_insert_values from ..constants import ( ACTIVITY, ANNOTATIONS, BEL_DEFAULT_NAMESPACE, CITATION, CITATION_AUTHORS, CITATION_DATE, CITATION_FIRST_AUTHOR, CITATION_ISSUE, CITATION_LAST_AUTHOR, CITATION_NAME, CITATION_PAGES, CITATION_REFERENCE, CITATION_TITLE, CITATION_TYPE, CITATION_TYPE_PUBMED, CITATION_VOLUME, DEGRADATION, EFFECT, EVIDENCE, FRAGMENT, FRAGMENT_MISSING, FRAGMENT_START, FRAGMENT_STOP, FUSION, FUSION_REFERENCE, FUSION_START, FUSION_STOP, GMOD, GOCC_KEYWORD, GOCC_LATEST, HGVS, IDENTIFIER, KIND, LINE, LOCATION, METADATA_INSERT_KEYS, MODIFIER, NAME, NAMESPACE, OBJECT, PARTNER_3P, PARTNER_5P, PMOD, PMOD_CODE, PMOD_POSITION, RANGE_3P, RANGE_5P, RELATION, SUBJECT, TRANSLOCATION, UNQUALIFIED_EDGES, VARIANTS, belns_encodings, get_cache_connection, ) from ..language import ( BEL_DEFAULT_NAMESPACE_URL, BEL_DEFAULT_NAMESPACE_VERSION, activity_mapping, gmod_mappings, pmod_mappings, ) from ..resources.definitions import get_bel_resource from ..struct import BELGraph, union from ..struct.summary.node_summary import get_names from ..utils import hash_citation, hash_dump, hash_evidence, parse_datetime __all__ = [ 'Manager', 'NetworkManager', ] log = logging.getLogger(__name__) DEFAULT_BELNS_ENCODING = ''.join(sorted(belns_encodings)) _optional_namespace_entries_mapping = { 'species': ('Namespace', 'SpeciesString'), 'query_url': ('Namespace', 'QueryValueURL'), 'domain': ('Namespace', 'DomainString'), } def _get_namespace_insert_values(bel_resource): namespace_insert_values = { 'name': bel_resource['Namespace']['NameString'], } namespace_insert_values.update(extract_shared_required(bel_resource, 'Namespace')) namespace_insert_values.update(extract_shared_optional(bel_resource, 'Namespace')) update_insert_values(bel_resource=bel_resource, mapping=_optional_namespace_entries_mapping, values=namespace_insert_values) return namespace_insert_values _annotation_mapping = { 'name': ('Citation', 'NameString') } def _get_annotation_insert_values(bel_resource): annotation_insert_values = extract_shared_required(bel_resource, 'AnnotationDefinition') annotation_insert_values.update(extract_shared_optional(bel_resource, 'AnnotationDefinition')) update_insert_values(bel_resource=bel_resource, mapping=_annotation_mapping, values=annotation_insert_values) return annotation_insert_values def not_resource_cachable(bel_resource): """Check if the BEL resource is cacheable. :param dict bel_resource: A dictionary returned by :func:`get_bel_resource`. """ return bel_resource['Processing'].get('CacheableFlag') not in {'yes', 'Yes', 'True', 'true'} def _clean_bel_namespace_values(bel_resource): bel_resource['Values'] = { name: (encoding if encoding else DEFAULT_BELNS_ENCODING) for name, encoding in bel_resource['Values'].items() if name } def _normalize_url(graph, keyword): # FIXME move to utilities and unit test """ :type graph: BELGraph :param str keyword: Namespace URL keyword :rtype: Optional[str] """ if keyword == BEL_DEFAULT_NAMESPACE and BEL_DEFAULT_NAMESPACE not in graph.namespace_url: return BEL_DEFAULT_NAMESPACE_URL if keyword == GOCC_KEYWORD and GOCC_KEYWORD not in graph.namespace_url: return GOCC_LATEST return graph.namespace_url.get(keyword) class NamespaceManager(BaseManager): """Manages BEL namespaces.""" def list_namespaces(self): """List all namespaces. :rtype: list[Namespace] """ return self.session.query(Namespace).all() def count_namespaces(self): """Count the number of namespaces in the database. :rtype: int """ return self.session.query(Namespace).count() def count_namespace_entries(self): """Count the number of namespace entries in the database. :rtype: int """ return self.session.query(NamespaceEntry).count() def drop_namespaces(self): """Drop all namespaces.""" for namespace in self.session.query(NamespaceEntry).all(): namespace.children[:] = [] self.session.commit() self.session.query(NamespaceEntry).delete() self.session.query(Namespace).delete() self.session.commit() def drop_namespace_by_url(self, url): """Drop the namespace at the given URL. Won't work if the edge store is in use. :param str url: The URL of the namespace to drop """ namespace = self.get_namespace_by_url(url) self.session.query(NamespaceEntry).filter(NamespaceEntry.namespace == namespace).delete() self.session.delete(namespace) self.session.commit() def get_namespace_by_url(self, url): """Look up a namespace by url. :param str url: The URL of the namespace :rtype: Optional[Namespace] """ return self.session.query(Namespace).filter(Namespace.url == url).one_or_none() def get_namespace_by_keyword_version(self, keyword, version): """Get a namespace with a given keyword and version. :param str keyword: The keyword to search :param str version: The version to search :rtype: Optional[Namespace] """ filt = and_(Namespace.keyword == keyword, Namespace.version == version) return self.session.query(Namespace).filter(filt).one_or_none() def ensure_default_namespace(self): """Get or create the BEL default namespace. :rtype: Namespace """ namespace = self.get_namespace_by_keyword_version(BEL_DEFAULT_NAMESPACE, BEL_DEFAULT_NAMESPACE_VERSION) if namespace is None: namespace = Namespace( name='BEL Default Namespace', contact='charles.hoyt@scai.fraunhofer.de', keyword=BEL_DEFAULT_NAMESPACE, version=BEL_DEFAULT_NAMESPACE_VERSION, url=BEL_DEFAULT_NAMESPACE_URL, ) for name in set(chain(pmod_mappings, gmod_mappings, activity_mapping)): entry = NamespaceEntry(name=name, namespace=namespace) self.session.add(entry) self.session.add(namespace) self.session.commit() return namespace def get_or_create_namespace(self, url): """Insert the namespace file at the given location to the cache. If not cachable, returns the dict of the values of this namespace. :param str url: the location of the namespace file :rtype: Namespace or dict :raises: pybel.resources.exc.ResourceError """ result = self.get_namespace_by_url(url) if result is not None: return result t = time.time() bel_resource = get_bel_resource(url) _clean_bel_namespace_values(bel_resource) values = bel_resource['Values'] if not_resource_cachable(bel_resource): log.debug('not caching namespace: %s (%d terms in %.2f seconds)', url, len(values), time.time() - t) log.debug('loaded uncached namespace: %s (%d)', url, len(values)) return values namespace_insert_values = _get_namespace_insert_values(bel_resource) namespace = Namespace( url=url, **namespace_insert_values ) namespace.entries = [ NamespaceEntry(name=name, encoding=encoding) for name, encoding in values.items() ] log.info('inserted namespace: %s (%d terms in %.2f seconds)', url, len(values), time.time() - t) self.session.add(namespace) self.session.commit() return namespace def get_namespace_by_keyword_pattern(self, keyword, pattern): """Get a namespace with a given keyword and pattern. :param str keyword: The keyword to search :param str pattern: The pattern to search :rtype: Optional[Namespace] """ filt = and_(Namespace.keyword == keyword, Namespace.pattern == pattern) return self.session.query(Namespace).filter(filt).one_or_none() def ensure_regex_namespace(self, keyword, pattern): """Get or create a regular expression namespace. :param str keyword: The keyword of a regular expression namespace :param str pattern: The pattern for a regular expression namespace :rtype: Namespace """ if pattern is None: raise ValueError('cannot have null pattern') namespace = self.get_namespace_by_keyword_pattern(keyword, pattern) if namespace is None: log.info('creating regex namespace: %s:%s', keyword, pattern) namespace = Namespace( keyword=keyword, pattern=pattern ) self.session.add(namespace) self.session.commit() return namespace def get_namespace_entry(self, url, name): """Get a given NamespaceEntry object. :param str url: The url of the namespace source :param str name: The value of the namespace from the given url's document :rtype: Optional[NamespaceEntry] """ entry_filter = and_(Namespace.url == url, NamespaceEntry.name == name) result = self.session.query(NamespaceEntry).join(Namespace).filter(entry_filter).all() if 0 == len(result): return if 1 < len(result): log.warning('result for get_namespace_entry is too long. Returning first of %s', [str(r) for r in result]) return result[0] def get_annotation_entry_by_name(self, url, name): """Get an annotation entry by URL and name. :param str url: The url of the annotation source :param str name: The name of the annotation entry from the given url's document :rtype: NamespaceEntry """ annotation_filter = and_(Namespace.url == url, NamespaceEntry.name == name) return self.session.query(NamespaceEntry).join(Namespace).filter(annotation_filter).one() def get_or_create_regex_namespace_entry(self, namespace, pattern, name): """Get a namespace entry from a regular expression. Need to commit after! :param str namespace: The name of the namespace :param str pattern: The regular expression pattern for the namespace :param str name: The entry to get :return: """ namespace = self.ensure_regex_namespace(namespace, pattern) n_filter = and_(Namespace.pattern == pattern, NamespaceEntry.name == name) name_model = self.session.query(NamespaceEntry).join(Namespace).filter(n_filter).one_or_none() if name_model is None: name_model = NamespaceEntry( namespace=namespace, name=name ) self.session.add(name_model) return name_model def list_annotations(self): """Return a list of all annotations. :rtype: list[Namespace] """ return self.session.query(Namespace).filter(Namespace.is_annotation).all() def count_annotations(self): """Count the number of annotations in the database. :rtype: int """ return self.session.query(Namespace).filter(Namespace.is_annotation).count() def count_annotation_entries(self): """Count the number of annotation entries in the database. :rtype: int """ return self.session.query(NamespaceEntry).filter(NamespaceEntry.is_annotation).count() def get_or_create_annotation(self, url): """Insert the namespace file at the given location to the cache. :param str url: the location of the namespace file :rtype: Namespace :raises: pybel.resources.exc.ResourceError """ result = self.get_namespace_by_url(url) if result is not None: return result t = time.time() bel_resource = get_bel_resource(url) result = Namespace( url=url, is_annotation=True, **_get_annotation_insert_values(bel_resource) ) result.entries = [ NamespaceEntry(name=name, identifier=label) for name, label in bel_resource['Values'].items() if name ] self.session.add(result) self.session.commit() log.info('inserted annotation: %s (%d terms in %.2f seconds)', url, len(bel_resource['Values']), time.time() - t) return result def get_annotation_entry_names(self, url): """Return a dict of annotations and their labels for the given annotation file. :param str url: the location of the annotation file :rtype: set[str] """ annotation = self.get_or_create_annotation(url) return set(annotation.get_entry_names()) def get_annotation_entries_by_names(self, url, names): """Get annotation entries by URL and names. :param str url: The url of the annotation source :param list[str] names: The names of the annotation entries from the given url's document :rtype: list[NamespaceEntry] """ annotation_filter = and_(Namespace.url == url, NamespaceEntry.name.in_(names)) return self.session.query(NamespaceEntry).join(Namespace).filter(annotation_filter).all() class NetworkManager(NamespaceManager): """Groups functions for inserting and querying networks in the database's network store.""" def count_networks(self): """Count the networks in the database. :rtype: int """ return self.session.query(func.count(Network.id)).scalar() def list_networks(self): """List all networks in the database. :rtype: list[Network] """ return self.session.query(Network).all() def list_recent_networks(self): """List the most recently created version of each network (by name). :rtype: list[Network] """ most_recent_times = ( self.session.query( Network.name.label('network_name'), func.max(Network.created).label('max_created') ) .group_by(Network.name) .subquery('most_recent_times') ) and_condition = and_( most_recent_times.c.network_name == Network.name, most_recent_times.c.max_created == Network.created ) most_recent_networks = self.session.query(Network).join(most_recent_times, and_condition) return most_recent_networks.all() def has_name_version(self, name, version): """Check if there exists a network with the name/version combination in the database. :param str name: The network name :param str version: The network version :rtype: bool """ return self.session.query(exists().where(and_(Network.name == name, Network.version == version))).scalar() def drop_networks(self): """Drop all networks.""" for network in self.session.query(Network).all(): self.drop_network(network) def drop_network_by_id(self, network_id): """Drop a network by its database identifier. :param int network_id: The network's database identifier """ network = self.session.query(Network).get(network_id) self.drop_network(network) def drop_network(self, network): """Drop a network, while also cleaning up any edges that are no longer part of any network. :type network: Network """ # get the IDs of the edges that will be orphaned by deleting this network # FIXME: this list could be a problem if it becomes very large; possible optimization is a temporary table in DB edge_ids = [result.edge_id for result in self.query_singleton_edges_from_network(network)] # delete the network-to-node mappings for this network self.session.query(network_node).filter(network_node.c.network_id == network.id).delete( synchronize_session=False) # delete the edge-to-property mappings for the to-be-orphaned edges self.session.query(edge_property).filter(edge_property.c.edge_id.in_(edge_ids)).delete( synchronize_session=False) # delete the edge-to-annotation mappings for the to-be-orphaned edges self.session.query(edge_annotation).filter(edge_annotation.c.edge_id.in_(edge_ids)).delete( synchronize_session=False) # delete the edge-to-network mappings for this network self.session.query(network_edge).filter(network_edge.c.network_id == network.id).delete( synchronize_session=False) # delete the now-orphaned edges self.session.query(Edge).filter(Edge.id.in_(edge_ids)).delete(synchronize_session=False) # delete the network self.session.query(Network).filter(Network.id == network.id).delete(synchronize_session=False) # commit it! self.session.commit() def query_singleton_edges_from_network(self, network): """Return a query selecting all edge ids that only belong to the given network. :type network: Network :rtype: sqlalchemy.orm.query.Query """ ne1 = aliased(network_edge, name='ne1') ne2 = aliased(network_edge, name='ne2') singleton_edge_ids_for_network = ( self.session.query(ne1.c.edge_id) .outerjoin(ne2, and_( ne1.c.edge_id == ne2.c.edge_id, ne1.c.network_id != ne2.c.network_id )) .filter(and_( ne1.c.network_id == network.id, ne2.c.edge_id == None )) ) return singleton_edge_ids_for_network def get_network_versions(self, name): """Return all of the versions of a network with the given name. :param str name: The name of the network to query :rtype: set[str] """ return { version for version, in self.session.query(Network.version).filter(Network.name == name).all() } def get_network_by_name_version(self, name, version): """Load most network with the given name and version. :param str name: The name of the network. :param str version: The version string of the network. :rtype: Optional[Network] """ name_version_filter = and_(Network.name == name, Network.version == version) network = self.session.query(Network).filter(name_version_filter).one_or_none() return network def get_graph_by_name_version(self, name, version): """Load most recently added graph with the given name, or allows for specification of version. :param str name: The name of the network. :param str version: The version string of the network. :rtype: Optional[BELGraph] """ network = self.get_network_by_name_version(name, version) if network is None: return return network.as_bel() def get_networks_by_name(self, name): """Get all networks with the given name. Useful for getting all versions of a given network. :param str name: The name of the network :rtype: list[Network] """ return self.session.query(Network).filter(Network.name.like(name)).all() def get_most_recent_network_by_name(self, name): """Get the most recently created network with the given name. :param str name: The name of the network :rtype: Optional[Network] """ network = self.session.query(Network).filter(Network.name == name).order_by(Network.created.desc()).first() return network def get_graph_by_most_recent(self, name): """Get the most recently created network with the given name as a :class:`pybel.BELGraph`. :param str name: The name of the network :rtype: Optional[BELGraph] """ network = self.get_most_recent_network_by_name(name) if network is None: return return network.as_bel() def get_network_by_id(self, network_id): """Get a network from the database by its identifier. :param int network_id: The network's database identifier :rtype: Network """ return self.session.query(Network).get(network_id) def get_graph_by_id(self, network_id): """Get a network from the database by its identifier and converts it to a BEL graph. :param int network_id: The network's database identifier :rtype: BELGraph """ network = self.get_network_by_id(network_id) log.debug('converting network [id=%d] %s to bel graph', network_id, network) return network.as_bel() def get_networks_by_ids(self, network_ids): """Get a list of networks with the given identifiers. Note: order is not necessarily preserved. :param iter[int] network_ids: The identifiers of networks in the database :rtype: list[Network] """ log.debug('getting networks by identifiers: %s', network_ids) return self.session.query(Network).filter(Network.id_in(network_ids)).all() def get_graphs_by_ids(self, network_ids): """Get a list of networks with the given identifiers and converts to BEL graphs. Note: order is not necessarily preserved. :param iter[int] network_ids: The identifiers of networks in the database :rtype: list[BELGraph] """ rv = [ self.get_graph_by_id(network_id) for network_id in network_ids ] log.debug('returning graphs for network identifiers: %s', network_ids) return rv def get_graph_by_ids(self, network_ids): """Get a combine BEL Graph from a list of network identifiers. :param list[int] network_ids: A list of network identifiers :rtype: BELGraph """ if len(network_ids) == 1: return self.get_graph_by_id(network_ids[0]) log.debug('getting graph by identifiers: %s', network_ids) graphs = self.get_graphs_by_ids(network_ids) log.debug('getting union of graphs: %s', network_ids) rv = union(graphs) return rv class InsertManager(NamespaceManager, LookupManager): """Manages inserting data into the edge store.""" def __init__(self, *args, **kwargs): super(InsertManager, self).__init__(*args, **kwargs) # A set of dictionaries that contains objects of the type described by the key self.object_cache_modification = {} self.object_cache_property = {} self.object_cache_node = {} self.object_cache_edge = {} self.object_cache_evidence = {} self.object_cache_citation = {} self.object_cache_author = {} def insert_graph(self, graph, store_parts=True, use_tqdm=False): """Insert a graph in the database and returns the corresponding Network model. :param BELGraph graph: A BEL graph :param bool store_parts: Should the graph be stored in the edge store? :param bool use_tqdm: Should progress be displayed with tqdm? :rtype: Network :raises: pybel.resources.exc.ResourceError """ if not graph.name: raise ValueError('Can not upload a graph without a name') if not graph.version: raise ValueError('Can not upload a graph without a version') log.debug('inserting %s v%s', graph.name, graph.version) t = time.time() self.ensure_default_namespace() namespace_urls = graph.namespace_url.values() if use_tqdm: namespace_urls = tqdm(namespace_urls, desc='namespaces') for namespace_url in namespace_urls: if namespace_url in graph.uncached_namespaces: continue self.get_or_create_namespace(namespace_url) for keyword, pattern in graph.namespace_pattern.items(): self.ensure_regex_namespace(keyword, pattern) annotation_urls = graph.annotation_url.values() if use_tqdm: annotation_urls = tqdm(annotation_urls, desc='annotations') for annotation_url in annotation_urls: self.get_or_create_annotation(annotation_url) network = Network(**{ key: value for key, value in graph.document.items() if key in METADATA_INSERT_KEYS }) network.store_bel(graph) if store_parts: network.nodes, network.edges = self._store_graph_parts(graph, use_tqdm=use_tqdm) self.session.add(network) self.session.commit() log.info('inserted %s v%s in %.2f seconds', graph.name, graph.version, time.time() - t) return network def _store_graph_parts(self, graph, use_tqdm=False): """Store the given graph into the edge store. :param BELGraph graph: A BEL Graph :rtype: tuple[list[Node],list[Edge]] :raises: pybel.resources.exc.ResourceError :raises: EdgeAddError """ names = get_names(graph) if 'GOCC' in names and 'GOCC' not in graph.namespace_url: # means it got thrown in there! self.get_or_create_namespace(GOCC_LATEST) log.debug('inserting %s into edge store', graph) log.debug('building node models') node_model_build_start = time.time() nodes = list(graph) if use_tqdm: nodes = tqdm(nodes, total=graph.number_of_nodes(), desc='nodes') node_model = {} for node in nodes: namespace = node.get(NAMESPACE) if graph.skip_storing_namespace(namespace): continue # already know this node won't be cached node_object = self.get_or_create_node(graph, node) if node_object is None: log.warning('can not add node %s', node) continue node_model[node] = node_object log.debug('built node models in %.2f seconds', time.time() - node_model_build_start) node_model_commit_start = time.time() node_models = list(node_model.values()) self.session.add_all(node_models) self.session.commit() log.debug('stored node models in %.2f seconds', time.time() - node_model_commit_start) log.debug('building edge models') edge_model_build_start = time.time() edges = graph.edges(keys=True, data=True) if use_tqdm: edges = tqdm(edges, total=graph.number_of_edges(), desc='edges') edge_models = list(self._get_edge_models(graph, node_model, edges)) log.debug('built edge models in %.2f seconds', time.time() - edge_model_build_start) edge_model_commit_start = time.time() self.session.add_all(edge_models) self.session.commit() log.debug('stored edge models in %.2f seconds', time.time() - edge_model_commit_start) return node_models, edge_models def _get_edge_models(self, graph, tuple_model, edges): for u, v, key, data in edges: source = tuple_model.get(u) if source is None or source.sha512 not in self.object_cache_node: log.debug('skipping uncached source node: %s', u) continue target = tuple_model.get(v) if target is None or target.sha512 not in self.object_cache_node: log.debug('skipping uncached target node: %s', v) continue relation = data[RELATION] if relation in UNQUALIFIED_EDGES: try: edge = self._add_unqualified_edge( source=source, target=target, bel=graph.edge_to_bel(u, v, data), key=key, data=data, ) if edge is None: continue except Exception as e: self.session.rollback() log.exception('error storing edge in database. edge data: %s', data) six.raise_from(EdgeAddError(e, u, v, key, data), e) else: yield edge elif EVIDENCE not in data or CITATION not in data: continue elif CITATION_TYPE not in data[CITATION] or CITATION_REFERENCE not in data[CITATION]: continue else: try: bel = graph.edge_to_bel(u, v, data) edge = self._add_qualified_edge( graph=graph, source=source, target=target, key=key, bel=bel, data=data, ) if edge is None: continue except Exception as e: self.session.rollback() log.exception('error storing edge in database. edge data: %s', data) six.raise_from(EdgeAddError(e, u, v, key, data), e) else: yield edge @staticmethod def _iter_from_annotations_dict(graph, annotations_dict): """Iterate over the key/value pairs in this edge data dictionary normalized to their source URLs. :param BELGraph graph: A BEL graph :param dict[str,dict[str,bool]] annotations_dict: A PyBEL edge data dictionary :rtype: iter[tuple[str,set[str]]] """ for key, names in annotations_dict.items(): if key in graph.annotation_url: url = graph.annotation_url[key] elif key in graph.annotation_list: continue # skip those elif key in graph.annotation_pattern: log.debug('pattern annotation in database not implemented yet not implemented') # FIXME continue else: raise ValueError('Graph resources does not contain keyword: {}'.format(key)) yield url, set(names) def _get_annotation_entries_from_data(self, graph, data): """Get the annotation entries from an edge data dictionary. :param BELGraph graph: A BEL graph :param dict data: A PyBEL edge data dictionary :rtype: Optional[list[AnnotationEntry]] """ annotations_dict = data.get(ANNOTATIONS) if annotations_dict is None: return return [ entry for url, names in self._iter_from_annotations_dict(graph, annotations_dict=annotations_dict) for entry in self.get_annotation_entries_by_names(url, names) ] def _add_qualified_edge(self, graph, source, target, key, bel, data): """Add a qualified edge to the network. :type graph: BELGraph :type source: Node :type target: Node :type key: str :type bel: str :type data: dict """ citation_dict = data[CITATION] citation = self.get_or_create_citation( type=citation_dict.get(CITATION_TYPE), reference=citation_dict.get(CITATION_REFERENCE), name=citation_dict.get(CITATION_NAME), title=citation_dict.get(CITATION_TITLE), volume=citation_dict.get(CITATION_VOLUME), issue=citation_dict.get(CITATION_ISSUE), pages=citation_dict.get(CITATION_PAGES), date=citation_dict.get(CITATION_DATE), first=citation_dict.get(CITATION_FIRST_AUTHOR), last=citation_dict.get(CITATION_LAST_AUTHOR), authors=citation_dict.get(CITATION_AUTHORS), ) evidence = self.get_or_create_evidence(citation, data[EVIDENCE]) properties = self.get_or_create_properties(graph, data) if properties is None: return annotations = self._get_annotation_entries_from_data(graph, data) return self.get_or_create_edge( source=source, target=target, relation=data[RELATION], bel=bel, sha512=key, evidence=evidence, properties=properties, annotations=annotations, ) def _add_unqualified_edge(self, source, target, key, bel, data): """Add an unqualified edge to the network. :type source: Node :type target: Node :type key: str :type bel: str :type data: dict """ return self.get_or_create_edge( source=source, target=target, relation=data[RELATION], bel=bel, sha512=key, ) def get_or_create_evidence(self, citation, text): """Create an entry and object for given evidence if it does not exist. :param Citation citation: Citation object obtained from :func:`get_or_create_citation` :param str text: Evidence text :rtype: Evidence """ sha512 = hash_evidence(text=text, type=str(citation.type), reference=str(citation.reference)) if sha512 in self.object_cache_evidence: evidence = self.object_cache_evidence[sha512] self.session.add(evidence) return evidence evidence = self.get_evidence_by_hash(sha512) if evidence is not None: self.object_cache_evidence[sha512] = evidence return evidence evidence = Evidence( text=text, citation=citation, sha512=sha512 ) self.session.add(evidence) self.object_cache_evidence[sha512] = evidence return evidence def get_or_create_node(self, graph, node_data): """Create an entry and object for given node if it does not exist. :param BELGraph graph: A BEL graph :param BaseEntity node_data: A PyBEL node tuple :rtype: Node """ sha512 = node_data.as_sha512() if sha512 in self.object_cache_node: return self.object_cache_node[sha512] bel = node_data.as_bel() node = self.get_node_by_hash(sha512) if node is not None: self.object_cache_node[sha512] = node return node node = Node( type=node_data.function, bel=bel, sha512=sha512, ) namespace = node_data.get(NAMESPACE) if namespace is None: pass elif namespace in graph.namespace_url: url = graph.namespace_url[namespace] name = node_data[NAME] entry = self.get_namespace_entry(url, name) if entry is None: log.debug('skipping node with identifier %s: %s', url, name) return self.session.add(entry) node.namespace_entry = entry elif namespace in graph.namespace_pattern: name = node_data[NAME] pattern = graph.namespace_pattern[namespace] entry = self.get_or_create_regex_namespace_entry(namespace, pattern, name) self.session.add(entry) node.namespace_entry = entry else: log.warning("No reference in BELGraph for namespace: {}".format(node_data[NAMESPACE])) return if VARIANTS in node_data or FUSION in node_data: node.is_variant = True node.has_fusion = FUSION in node_data modifications = self.get_or_create_modification(graph, node_data) if modifications is None: log.warning('could not create %s because had an uncachable modification', bel) return node.modifications = modifications self.session.add(node) self.object_cache_node[sha512] = node return node def drop_nodes(self): """Drop all nodes in the database.""" t = time.time() self.session.query(Node).delete() self.session.commit() log.info('dropped all nodes in %.2f seconds', time.time() - t) def drop_edges(self): """Drop all edges in the database""" t = time.time() self.session.query(Edge).delete() self.session.commit() log.info('dropped all edges in %.2f seconds', time.time() - t) def get_or_create_edge(self, source, target, relation, bel, sha512, evidence=None, annotations=None, properties=None): """Create an edge if it does not exist, or return it if it does. :param Node source: Source node of the relation :param Node target: Target node of the relation :param str relation: Type of the relation between source and target node :param str bel: BEL statement that describes the relation :param str sha512: The SHA512 hash of the edge as a string :param Evidence evidence: Evidence object that proves the given relation :param Optional[list[Property]] properties: List of all properties that belong to the edge :param Optional[list[AnnotationEntry]] annotations: List of all annotations that belong to the edge :rtype: Edge """ if sha512 in self.object_cache_edge: edge = self.object_cache_edge[sha512] self.session.add(edge) return edge edge = self.get_edge_by_hash(sha512) if edge is not None: self.object_cache_edge[sha512] = edge return edge edge = Edge( source=source, target=target, relation=relation, bel=bel, sha512=sha512, ) if evidence is not None: edge.evidence = evidence if properties is not None: edge.properties = properties if annotations is not None: edge.annotations = annotations self.session.add(edge) self.object_cache_edge[sha512] = edge return edge def get_or_create_citation(self, reference, type=None, name=None, title=None, volume=None, issue=None, pages=None, date=None, first=None, last=None, authors=None): """Create a citation if it does not exist, or return it if it does. :param str type: Citation type (e.g. PubMed) :param str reference: Identifier of the given citation (e.g. PubMed id) :param Optional[str] name: Name of the publication :param Optional[str] title: Title of article :param Optional[str] volume: Volume of publication :param Optional[str] issue: Issue of publication :param Optional[str] pages: Pages of issue :param Optional[str] date: Date of publication in ISO 8601 (YYYY-MM-DD) format :param Optional[str] first: Name of first author :param Optional[str] last: Name of last author :param authors: Either a list of authors separated by |, or an actual list of authors :type authors: None or str or list[str] :rtype: Citation """ if type is None: type = CITATION_TYPE_PUBMED sha512 = hash_citation(type=type, reference=reference) if sha512 in self.object_cache_citation: citation = self.object_cache_citation[sha512] self.session.add(citation) return citation citation = self.get_citation_by_hash(sha512) if citation is not None: self.object_cache_citation[sha512] = citation return citation citation = Citation( type=type, reference=reference, sha512=sha512, name=name, title=title, volume=volume, issue=issue, pages=pages ) if date is not None: citation.date = parse_datetime(date) if first is not None: citation.first = self.get_or_create_author(first) if last is not None: citation.last = self.get_or_create_author(last) if authors is not None: for author in (authors.split('|') if isinstance(authors, string_types) else authors): author_model = self.get_or_create_author(author) if author_model not in citation.authors: citation.authors.append(author_model) self.session.add(citation) self.object_cache_citation[sha512] = citation return citation def get_or_create_author(self, name): """Get an author by name, or creates one if it does not exist. :param str name: An author's name :rtype: Author """ author = self.object_cache_author.get(name) if author is not None: self.session.add(author) return author author = self.get_author_by_name(name) if author is not None: self.object_cache_author[name] = author return author author = self.object_cache_author[name] = Author.from_name(name=name) self.session.add(author) return author def get_modification_by_hash(self, sha512): """Get a modification by a SHA512 hash. :param str sha512: A SHA512 hash of a modification :rtype: Optional[Modification] """ return self.session.query(Modification).filter(Modification.sha512 == sha512).one_or_none() def get_or_create_modification(self, graph, node_data): """Create a list of node modification objects that belong to the node described by node_data. Return None if the list can not be constructed, and the node should also be skipped. :param BELGraph graph: A BEL graph :param dict node_data: Describes the given node and contains is_variant information :return: A list of modification objects belonging to the given node :rtype: Optional[list[Modification]] """ modification_list = [] if FUSION in node_data: mod_type = FUSION node_data = node_data[FUSION] p3_namespace_url = graph.namespace_url[node_data[PARTNER_3P][NAMESPACE]] if p3_namespace_url in graph.uncached_namespaces: log.warning('uncached namespace %s in fusion()', p3_namespace_url) return p3_name = node_data[PARTNER_3P][NAME] p3_namespace_entry = self.get_namespace_entry(p3_namespace_url, p3_name) if p3_namespace_entry is None: log.warning('Could not find namespace entry %s %s', p3_namespace_url, p3_name) return # FIXME raise? p5_namespace_url = graph.namespace_url[node_data[PARTNER_5P][NAMESPACE]] if p5_namespace_url in graph.uncached_namespaces: log.warning('uncached namespace %s in fusion()', p5_namespace_url) return p5_name = node_data[PARTNER_5P][NAME] p5_namespace_entry = self.get_namespace_entry(p5_namespace_url, p5_name) if p5_namespace_entry is None: log.warning('Could not find namespace entry %s %s', p5_namespace_url, p5_name) return # FIXME raise? fusion_dict = { 'type': mod_type, 'p3_partner': p3_namespace_entry, 'p5_partner': p5_namespace_entry, } node_range_3p = node_data.get(RANGE_3P) if node_range_3p and FUSION_REFERENCE in node_range_3p: fusion_dict.update({ 'p3_reference': node_range_3p[FUSION_REFERENCE], 'p3_start': node_range_3p[FUSION_START], 'p3_stop': node_range_3p[FUSION_STOP], }) node_range_5p = node_data.get(RANGE_5P) if node_range_5p and FUSION_REFERENCE in node_range_5p: fusion_dict.update({ 'p5_reference': node_range_5p[FUSION_REFERENCE], 'p5_start': node_range_5p[FUSION_START], 'p5_stop': node_range_5p[FUSION_STOP], }) modification_list.append(fusion_dict) else: for variant in node_data[VARIANTS]: mod_type = variant[KIND].strip() if mod_type == HGVS: modification_list.append({ 'type': mod_type, 'variantString': variant[IDENTIFIER] }) elif mod_type == FRAGMENT: if FRAGMENT_MISSING in variant: modification_list.append({ 'type': mod_type, }) else: modification_list.append({ 'type': mod_type, 'p3_start': variant[FRAGMENT_START], 'p3_stop': variant[FRAGMENT_STOP] }) elif mod_type in {GMOD, PMOD}: variant_identifier = variant[IDENTIFIER] namespace_url = _normalize_url(graph, variant_identifier[NAMESPACE]) if namespace_url in graph.uncached_namespaces: log.warning('uncached namespace %s in fusion()', namespace_url) return mod_entry = self.get_namespace_entry(namespace_url, variant_identifier[NAME]) if mod_type == GMOD: modification_list.append({ 'type': mod_type, 'identifier': mod_entry }) if mod_type == PMOD: modification_list.append({ 'type': mod_type, 'identifier': mod_entry, 'residue': variant[PMOD_CODE].strip() if PMOD_CODE in variant else None, 'position': variant[PMOD_POSITION] if PMOD_POSITION in variant else None }) modifications = [] for modification in modification_list: mod_hash = hash_dump(modification) mod = self.object_cache_modification.get(mod_hash) if mod is None: mod = self.get_modification_by_hash(mod_hash) if not mod: modification['sha512'] = mod_hash mod = Modification(**modification) self.object_cache_modification[mod_hash] = mod modifications.append(mod) return modifications def get_property_by_hash(self, property_hash): """Get a property by its hash if it exists. :param str property_hash: The hash of the property to search :rtype: Optional[Property] """ return self.session.query(Property).filter(Property.sha512 == property_hash).one_or_none() def _make_property_from_dict(self, property_def): """Build an edge property from a dictionary. :param property_def: :rtype: Property """ property_hash = hash_dump(property_def) edge_property_model = self.object_cache_property.get(property_hash) if edge_property_model is None: edge_property_model = self.get_property_by_hash(property_hash) if not edge_property_model: property_def['sha512'] = property_hash edge_property_model = Property(**property_def) self.object_cache_property[property_hash] = edge_property_model return edge_property_model def get_or_create_properties(self, graph, edge_data): # TODO make for just single property then loop with other fn. """Create a list of edge subject/object property models. Return None if the property cannot be constructed due to missing cache entries. :param BELGraph graph: A BEL graph :param dict edge_data: Describes the context of the given edge. :return: A list of all subject and object properties of the edge :rtype: Optional[list[Property]] """ property_list = [] for participant in (SUBJECT, OBJECT): participant_data = edge_data.get(participant) if participant_data is None: continue location = participant_data.get(LOCATION) if location is not None: location_property_dict = { 'is_subject': participant == SUBJECT, 'modifier': LOCATION } location_namespace = location[NAMESPACE] if location_namespace == GOCC_KEYWORD and GOCC_KEYWORD not in graph.namespace_url: namespace_url = GOCC_LATEST else: namespace_url = graph.namespace_url[location_namespace] if namespace_url in graph.uncached_namespaces: log.warning('uncached namespace %s in loc() on line %s', location_namespace, edge_data.get(LINE)) return participant_name = location[NAME] location_property_dict['effect'] = self.get_namespace_entry(namespace_url, participant_name) if location_property_dict['effect'] is None: raise IndexError('did not get {}: {}'.format(namespace_url, participant_name)) property_list.append(location_property_dict) modifier = participant_data.get(MODIFIER) if modifier is not None: modifier_property_dict = { 'is_subject': participant == SUBJECT, 'modifier': modifier } if modifier == TRANSLOCATION: for effect_type, effect_value in participant_data.get(EFFECT, {}).items(): tmp_dict = deepcopy(modifier_property_dict) tmp_dict['relative_key'] = effect_type if NAMESPACE not in effect_value: tmp_dict['propValue'] = effect_value raise ValueError('shouldnt use propValue') else: effect_namespace = effect_value[NAMESPACE] if effect_namespace == GOCC_KEYWORD and GOCC_KEYWORD not in graph.namespace_url: namespace_url = GOCC_LATEST elif effect_namespace in graph.namespace_url: namespace_url = graph.namespace_url[effect_namespace] else: log.warning('namespace not enumerated in modifier %s', effect_namespace) return if namespace_url in graph.uncached_namespaces: log.warning('uncached namespace %s in tloc() on line %s ', effect_namespace, edge_data.get(LINE)) return effect_name = effect_value[NAME] tmp_dict['effect'] = self.get_namespace_entry(namespace_url, effect_name) if tmp_dict['effect'] is None: log.warning('could not find tloc() %s %s', namespace_url, effect_name) return # FIXME raise? property_list.append(tmp_dict) elif modifier == ACTIVITY: effect = participant_data.get(EFFECT) if effect is not None: namespace_url = _normalize_url(graph, effect[NAMESPACE]) if namespace_url in graph.uncached_namespaces: log.warning('uncached namespace %s in fusion()', namespace_url) return modifier_property_dict['effect'] = self.get_namespace_entry(namespace_url, effect[NAME]) property_list.append(modifier_property_dict) elif modifier == DEGRADATION: property_list.append(modifier_property_dict) else: raise ValueError('unknown modifier: {}'.format(modifier)) return [ self._make_property_from_dict(property_def) for property_def in property_list ] class _Manager(QueryManager, InsertManager, NetworkManager): """A wrapper around PyBEL managers that can be directly instantiated with an engine and session.""" def count_citations(self): return self._count_model(Citation) def list_citations(self): return self._list_model(Citation) class Manager(_Manager): """A manager for the PyBEL database.""" def __init__(self, connection=None, engine=None, session=None, **kwargs): """Create a connection to database and a persistent session using SQLAlchemy. A custom default can be set as an environment variable with the name :data:`pybel.constants.PYBEL_CONNECTION`, using an `RFC-1738 `_ string. For example, a MySQL string can be given with the following form: :code:`mysql+pymysql://:@/?charset=utf8[&]` A SQLite connection string can be given in the form: ``sqlite:///~/Desktop/cache.db`` Further options and examples can be found on the SQLAlchemy documentation on `engine configuration `_. :param Optional[str] connection: An RFC-1738 database connection string. If ``None``, tries to load from the environment variable ``PYBEL_CONNECTION`` then from the config file ``~/.config/pybel/config.json`` whose value for ``PYBEL_CONNECTION`` defaults to :data:`pybel.constants.DEFAULT_CACHE_LOCATION`. :param engine: Optional engine to use. Must be specified with a session and no connection. :param session: Optional session to use. Must be specified with an engine and no connection. :param bool echo: Turn on echoing sql :param Optional[bool] autoflush: Defaults to True if not specified in kwargs or configuration. :param Optional[bool] autocommit: Defaults to False if not specified in kwargs or configuration. :param Optional[bool] expire_on_commit: Defaults to False if not specified in kwargs or configuration. :param scopefunc: Scoped function to pass to :func:`sqlalchemy.orm.scoped_session` From the Flask-SQLAlchemy documentation: An extra key ``'scopefunc'`` can be set on the ``options`` dict to specify a custom scope function. If it's not provided, Flask's app context stack identity is used. This will ensure that sessions are created and removed with the request/response cycle, and should be fine in most cases. Allowed Usages: Instantiation with connection string as positional argument >>> my_connection = 'sqlite:///~/Desktop/cache.db' >>> manager = Manager(my_connection) Instantiation with connection string as positional argument with keyword arguments >>> my_connection = 'sqlite:///~/Desktop/cache.db' >>> manager = Manager(my_connection, echo=True) Instantiation with connection string as keyword argument >>> my_connection = 'sqlite:///~/Desktop/cache.db' >>> manager = Manager(connection=my_connection) Instantiation with connection string as keyword argument with keyword arguments >>> my_connection = 'sqlite:///~/Desktop/cache.db' >>> manager = Manager(connection=my_connection, echo=True) Instantiation with user-supplied engine and session objects as keyword arguments >>> my_engine, my_session = ... # magical creation! See SQLAlchemy documentation >>> manager = Manager(engine=my_engine, session=my_session) """ if connection and (engine or session): raise ValueError('can not specify connection with engine/session') if engine is None and session is None: if connection is None: connection = get_cache_connection() engine, session = build_engine_session(connection=connection, **kwargs) elif engine is None or session is None: raise ValueError('need both engine and session to be specified') elif kwargs: raise ValueError('keyword arguments should not be used with engine/session') super(Manager, self).__init__(engine=engine, session=session) self.create_all() pybel-0.12.1/src/pybel/manager/citation_utils.py000066400000000000000000000202441334645200200216270ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Citation utilities for the database manager.""" import logging import re import time from datetime import datetime import requests from six.moves import zip_longest from ..constants import CITATION, CITATION_REFERENCE, CITATION_TYPE_PUBMED from ..struct.filters import filter_edges from ..struct.filters.edge_predicates import has_pubmed from ..struct.summary.provenance import get_pubmed_identifiers __all__ = [ 'get_citations_by_pmids', 'enrich_pubmed_citations', ] log = logging.getLogger(__name__) EUTILS_URL_FMT = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&id={}" re1 = re.compile('^[12][0-9]{3} [a-zA-Z]{3} \d{1,2}$') re2 = re.compile('^[12][0-9]{3} [a-zA-Z]{3}$') re3 = re.compile('^[12][0-9]{3}$') re4 = re.compile('^[12][0-9]{3} [a-zA-Z]{3}-[a-zA-Z]{3}$') re5 = re.compile('^([12][0-9]{3}) (Spring|Fall|Winter|Summer)$') re6 = re.compile('^[12][0-9]{3} [a-zA-Z]{3} \d{1,2}-(\d{1,2})$') re7 = re.compile('^[12][0-9]{3} [a-zA-Z]{3} \d{1,2}-([a-zA-Z]{3} \d{1,2})$') season_map = {'Spring': '03', 'Summer': '06', 'Fall': '09', 'Winter': '12'} def sanitize_date(publication_date): """Sanitize lots of different date strings into ISO-8601. :param str publication_date: :rtype: str """ if re1.search(publication_date): return datetime.strptime(publication_date, '%Y %b %d').strftime('%Y-%m-%d') if re2.search(publication_date): return datetime.strptime(publication_date, '%Y %b').strftime('%Y-%m-01') if re3.search(publication_date): return publication_date + "-01-01" if re4.search(publication_date): return datetime.strptime(publication_date[:-4], '%Y %b').strftime('%Y-%m-01') s = re5.search(publication_date) if s: year, season = s.groups() return '{}-{}-01'.format(year, season_map[season]) s = re6.search(publication_date) if s: return datetime.strptime(publication_date, '%Y %b %d-{}'.format(s.groups()[0])).strftime('%Y-%m-%d') s = re7.search(publication_date) if s: return datetime.strptime(publication_date, '%Y %b %d-{}'.format(s.groups()[0])).strftime('%Y-%m-%d') def grouper(n, iterable, fillvalue=None): """Group iterables into tuples. grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx """ args = [iter(iterable)] * n return zip_longest(*args, fillvalue=fillvalue) def clean_pubmed_identifiers(pmids): """Clean a list of PubMed identifiers with string strips, deduplicates, and sorting. :param iter[str] pmids: An iterable of PubMed identifiers :return: """ return sorted({str(pmid).strip() for pmid in pmids}) def get_pubmed_citation_response(pubmed_identifiers): """Get the response from PubMed E-Utils for a given list of PubMed identifiers. :param list[str] pubmed_identifiers: :rtype: dict """ pubmed_identifiers = list(pubmed_identifiers) url = EUTILS_URL_FMT.format(','.join( pubmed_identifier for pubmed_identifier in pubmed_identifiers if pubmed_identifier )) response = requests.get(url) return response.json() def enrich_citation_model(manager, citation, p): """Enrich a citation model with the information from PubMed. :param pybel.manager.Manager manager: :param Citation citation: A citation model :param dict p: The dictionary from PubMed E-Utils corresponding to d["result"][pmid] :rtype: bool """ if 'error' in p: log.warning('Error downloading PubMed') return False citation.name = p['fulljournalname'] citation.title = p['title'] citation.volume = p['volume'] citation.issue = p['issue'] citation.pages = p['pages'] citation.first = manager.get_or_create_author(p['sortfirstauthor']) citation.last = manager.get_or_create_author(p['lastauthor']) if 'authors' in p: for author in p['authors']: author_model = manager.get_or_create_author(author['name']) if author_model not in citation.authors: citation.authors.append(author_model) publication_date = p['pubdate'] sanitized_publication_date = sanitize_date(publication_date) if sanitized_publication_date: citation.date = datetime.strptime(sanitized_publication_date, '%Y-%m-%d') else: log.info('result had date with strange format: %s', publication_date) return True def get_citations_by_pmids(manager, pmids, group_size=None, sleep_time=None): """Get citation information for the given list of PubMed identifiers using the NCBI's eUtils service. :type manager: pybel.Manager :param pmids: an iterable of PubMed identifiers :type pmids: iter[str] or iter[int] :param int group_size: The number of PubMed identifiers to query at a time. Defaults to 200 identifiers. :param int sleep_time: Number of seconds to sleep between queries. Defaults to 1 second. :return: A dictionary of {pmid: pmid data dictionary} or a pair of this dictionary and a set ot erroneous pmids if return_errors is :data:`True` :rtype: tuple[dict[str,dict],set[str]] """ group_size = group_size if group_size is not None else 200 sleep_time = sleep_time if sleep_time is not None else 1 pmids = clean_pubmed_identifiers(pmids) log.info('Ensuring %d PubMed identifiers', len(pmids)) result = {} unenriched_pmids = {} for pmid in pmids: citation = manager.get_or_create_citation(type=CITATION_TYPE_PUBMED, reference=pmid) if not citation.date or not citation.name or not citation.authors: unenriched_pmids[pmid] = citation continue result[pmid] = citation.to_json() manager.session.commit() log.debug('Found %d PubMed identifiers in database', len(pmids) - len(unenriched_pmids)) if not unenriched_pmids: return result, set() number_unenriched = len(unenriched_pmids) log.info('Querying PubMed for %d identifiers', number_unenriched) errors = set() t = time.time() for pmid_group_index, pmid_list in enumerate(grouper(group_size, unenriched_pmids), start=1): pmid_list = list(pmid_list) log.info('Getting group %d having %d PubMed identifiers', pmid_group_index, len(pmid_list)) response = get_pubmed_citation_response(pmid_list) response_pmids = response['result']['uids'] for pmid in response_pmids: p = response['result'][pmid] citation = unenriched_pmids[pmid] successful_enrichment = enrich_citation_model(manager, citation, p) if not successful_enrichment: log.warning("Error downloading PubMed identifier: %s", pmid) errors.add(pmid) continue result[pmid] = citation.to_json() manager.session.add(citation) manager.session.commit() # commit in groups # Don't want to hit that rate limit time.sleep(sleep_time) log.info('retrieved %d PubMed identifiers in %.02f seconds', len(unenriched_pmids), time.time() - t) return result, errors def enrich_pubmed_citations(manager, graph, group_size=None, sleep_time=None): """Overwrite all PubMed citations with values from NCBI's eUtils lookup service. Sets authors as list, so probably a good idea to run :func:`pybel_tools.mutation.serialize_authors` before exporting. :type manager: pybel.manager.Manager :type graph: pybel.BELGraph :param Optional[int] group_size: The number of PubMed identifiers to query at a time. Defaults to 200 identifiers. :param Optional[int] sleep_time: Number of seconds to sleep between queries. Defaults to 1 second. :return: A set of PMIDs for which the eUtils service crashed :rtype: set[str] """ pmids = get_pubmed_identifiers(graph) pmid_data, errors = get_citations_by_pmids(manager, pmids=pmids, group_size=group_size, sleep_time=sleep_time) for u, v, k in filter_edges(graph, has_pubmed): pmid = graph[u][v][k][CITATION][CITATION_REFERENCE].strip() if pmid not in pmid_data: log.warning('Missing data for PubMed identifier: %s', pmid) errors.add(pmid) continue graph[u][v][k][CITATION].update(pmid_data[pmid]) return errors pybel-0.12.1/src/pybel/manager/database_io.py000066400000000000000000000035201334645200200210260ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Conversion functions for BEL graphs with a SQL database.""" import logging from sqlalchemy.exc import IntegrityError, OperationalError from .cache_manager import Manager __all__ = [ 'to_database', 'from_database' ] log = logging.getLogger(__name__) def to_database(graph, manager=None, store_parts=True, use_tqdm=False): """Store a graph in a database. :param BELGraph graph: A BEL graph :type manager: Optional[pybel.manager.Manager] :param bool store_parts: Should the graph be stored in the edge store? :return: If successful, returns the network object from the database. :rtype: Optional[Network] """ if manager is None: manager = Manager() try: return manager.insert_graph(graph, store_parts=store_parts, use_tqdm=use_tqdm) except (IntegrityError, OperationalError): manager.session.rollback() log.exception('Error storing graph') except Exception as e: manager.session.rollback() raise e def from_database(name, version=None, manager=None): """Load a BEL graph from a database. If name and version are given, finds it exactly with :meth:`pybel.manager.Manager.get_network_by_name_version`. If just the name is given, finds most recent with :meth:`pybel.manager.Manager.get_network_by_name_version` :param str name: The name of the graph :param Optional[str] version: The version string of the graph. If not specified, loads most recent graph added with this name :type manager: Optional[pybel.manager.Manager] :return: A BEL graph loaded from the database :rtype: Optional[BELGraph] """ if manager is None: manager = Manager() if version is None: return manager.get_graph_by_most_recent(name) return manager.get_graph_by_name_version(name, version) pybel-0.12.1/src/pybel/manager/defaults.py000066400000000000000000000224571334645200200204140ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Default namespaces and annotations from OpenBEL and Fraunhofer SCAI. This file contains a listing of the default namespaces released in each version of OpenBEL, and other common namespaces used to load into a new PyBEL namespace store. Resources: .. seealso:: Overview on OpenBEL namespaces https://wiki.openbel.org/display/BELNA/Namespaces+Overview .. seealso:: Building custom namespaces http://openbel-framework.readthedocs.io/en/latest/tutorials/building_custom_namespaces.html """ __all__ = [ 'default_namespaces_2012', 'default_namespaces_2013', 'default_namespaces_2015', 'default_namespaces', 'fraunhofer_namespaces', 'default_annotations_2012', 'default_annotations_2013', 'default_annotations_2015', 'default_annotations', 'fraunhofer_annotations', 'default_equivalences_2012', 'default_equivalences_2013', 'default_equivalences_2015', 'default_equivalences', ] BEL_FRAMEWORK_BASE = 'http://resources.openbel.org/belframework' BEL_FRAMEWORK_2012 = BEL_FRAMEWORK_BASE + '/1.0' BEL_FRAMEWORK_2013 = BEL_FRAMEWORK_BASE + '/20131211' BEL_FRAMEWORK_2015 = BEL_FRAMEWORK_BASE + '/20150611' BEL_FRAMEWORK_2012_NAMESPACE = BEL_FRAMEWORK_2012 + '/namespace/' BEL_FRAMEWORK_2013_NAMESPACE = BEL_FRAMEWORK_2013 + '/namespace/' BEL_FRAMEWORK_2015_NAMESPACE = BEL_FRAMEWORK_2015 + '/namespace/' BEL_FRAMEWORK_2012_ANNOTATION = BEL_FRAMEWORK_2012 + '/annotation/' BEL_FRAMEWORK_2013_ANNOTATION = BEL_FRAMEWORK_2013 + '/annotation/' BEL_FRAMEWORK_2015_ANNOTATION = BEL_FRAMEWORK_2015 + '/annotation/' BEL_FRAMEWORK_2012_EQUIVALENCE = BEL_FRAMEWORK_2012 + '/equivalence/' BEL_FRAMEWORK_2013_EQUIVALENCE = BEL_FRAMEWORK_2013 + '/equivalence/' BEL_FRAMEWORK_2015_EQUIVALENCE = BEL_FRAMEWORK_2015 + '/equivalence/' BEL_FRAMEWORK_2015_DATE = '20150601' FRAUNHOFER_RESOURCES_BASE = 'https://arty.scai.fraunhofer.de/artifactory/bel' FRAUNHOFER_RESOURCES_NAMESPACE = FRAUNHOFER_RESOURCES_BASE + '/namespace' FRAUNHOFER_RESOURCES_ANNOTATION = FRAUNHOFER_RESOURCES_BASE + '/annotation' default_namespaces_2012_names = [ 'affy-hg-u133-plus2', 'affy-hg-u133ab', 'affy-hg-u95av2', 'affy-mg-u74abc', 'affy-moe430ab', 'affy-mouse430-2', 'affy-mouse430a-2', 'affy-rae230ab-2', 'affy-rat230-2', 'chebi-ids', 'chebi-names', 'entrez-gene-ids-hmr', 'go-biological-processes-accession-numbers', 'go-biological-processes-names', 'go-cellular-component-accession-numbers', 'go-cellular-component-terms', 'hgnc-approved-symbols', 'mesh-biological-processes', 'mesh-cellular-locations', 'mesh-diseases', 'mgi-approved-symbols', 'rgd-approved-symbols', 'selventa-legacy-chemical-names', 'selventa-legacy-diseases', 'selventa-named-human-complexes', 'selventa-named-human-protein-families', 'selventa-named-mouse-complexes', 'selventa-named-mouse-protein-families', 'selventa-named-rat-complexes', 'selventa-named-rat-protein-families', 'swissprot-accession-numbers', 'swissprot-entry-names', ] default_namespaces_2012 = [ '{}{}.belns'.format(BEL_FRAMEWORK_2012_NAMESPACE, name) for name in default_namespaces_2012_names ] default_namespaces_2013_names = [ 'affy-probeset-ids', 'chebi-ids', 'chebi', 'disease-ontology-ids', 'disease-ontology', 'entrez-gene-ids', 'go-biological-process-ids', 'go-biological-process', 'go-cellular-component-ids', 'go-cellular-component', 'hgnc-human-genes', 'mesh-cellular-structures', 'mesh-diseases', 'mesh-processes', 'mgi-mouse-genes', 'rgd-rat-genes', 'selventa-legacy-chemicals', 'selventa-legacy-diseases', 'selventa-named-complexes', 'selventa-protein-families', 'swissprot-ids', 'swissprot', ] default_namespaces_2013 = [ '{}{}.belns'.format(BEL_FRAMEWORK_2013_NAMESPACE, name) for name in default_namespaces_2013_names ] namespaces_2015 = [ 'affy-probeset-ids', 'chebi-ids', 'chebi', 'disease-ontology-ids', 'disease-ontology', 'entrez-gene-ids', 'go-biological-process-ids', 'go-biological-process', 'go-cellular-component-ids', 'go-cellular-component', 'hgnc-human-genes', 'mesh-cellular-structures-ids', 'mesh-cellular-structures', 'mesh-chemicals-ids', 'mesh-chemicals', 'mesh-diseases-ids', 'mesh-diseases', 'mesh-processes-ids', 'mesh-processes', 'mgi-mouse-genes', 'rgd-rat-genes', 'selventa-legacy-chemicals', 'selventa-legacy-diseases', 'selventa-named-complexes', 'selventa-protein-families', 'swissprot-ids', 'swissprot' ] default_namespaces_2015 = [ '{}{}.belns'.format(BEL_FRAMEWORK_2015_NAMESPACE, namespace) for namespace in namespaces_2015 ] fraunhofer_namespaces = [ '{base}/{module}/{module}-{date}.belns'.format( base=FRAUNHOFER_RESOURCES_NAMESPACE, module=namespace, date=BEL_FRAMEWORK_2015_DATE ) for namespace in namespaces_2015 ] default_namespaces = default_namespaces_2012 + default_namespaces_2013 + default_namespaces_2015 default_annotations_2012_names = [ 'atcc-cell-line', 'mesh-body-region', 'mesh-cardiovascular-system', 'mesh-cell-structure', 'mesh-cell', 'mesh-digestive-system', 'mesh-disease', 'mesh-embryonic-structure', 'mesh-endocrine-system', 'mesh-fluid-and-secretion', 'mesh-hemic-and-immune-system', 'mesh-integumentary-system', 'mesh-musculoskeletal-system', 'mesh-nervous-system', 'mesh-respiratory-system', 'mesh-sense-organ', 'mesh-stomatognathic-system', 'mesh-tissue', 'mesh-urogenital-system', 'species-taxonomy-id', ] default_annotations_2012 = [ '{}{}.belanno'.format(BEL_FRAMEWORK_2012_ANNOTATION, name) for name in default_annotations_2012_names ] annotations_current = [ 'anatomy', 'cell-line', 'cell-structure', 'cell', 'disease', 'mesh-anatomy', 'mesh-diseases', 'species-taxonomy-id', ] default_annotations_2013 = [ '{}{}.belanno'.format(BEL_FRAMEWORK_2013_ANNOTATION, annotation) for annotation in annotations_current ] default_annotations_2015 = [ '{}{}.belanno'.format(BEL_FRAMEWORK_2015_ANNOTATION, annotation) for annotation in annotations_current ] fraunhofer_annotations = [ '{base}/{module}/{module}-{date}.belns'.format( base=FRAUNHOFER_RESOURCES_ANNOTATION, module=annotation, date=BEL_FRAMEWORK_2015_DATE ) for annotation in annotations_current ] default_annotations = default_annotations_2012 + default_annotations_2013 + default_annotations_2015 default_equivalences_2012_names = [ 'affy-hg-u133-plus2', 'affy-hg-u133ab', 'affy-hg-u95av2', 'affy-mg-u74abc', 'affy-moe430ab', 'affy-mouse430-2', 'affy-mouse430a-2', 'affy-rae230ab-2', 'affy-rat230-2', 'chebi-ids', 'chebi-names', 'entrez-gene-ids-hmr', 'go-biological-processes-accession-numbers', 'go-biological-processes-names', 'go-cellular-component-accession-numbers', 'go-cellular-component-terms', 'hgnc-approved-symbols', 'mesh-biological-processes', 'mesh-cellular-locations', 'mesh-diseases', 'mgi-approved-symbols', 'rgd-approved-symbols', 'selventa-named-human-complexes', 'selventa-named-human-protein-families', 'selventa-named-mouse-complexes', 'selventa-named-mouse-protein-families', 'selventa-named-rat-complexes', 'selventa-named-rat-protein-families', 'swissprot-accession-numbers', 'swissprot-entry-names' ] default_equivalences_2012 = [ '{}{}.beleq'.format(BEL_FRAMEWORK_2012_EQUIVALENCE, name) for name in default_equivalences_2012_names ] default_equivalences_2013_names = [ 'affy-probeset-ids', 'chebi-ids', 'chebi', 'disease-ontology-ids', 'disease-ontology', 'entrez-gene-ids', 'go-biological-process-ids', 'go-biological-process', 'go-cellular-component-ids', 'go-cellular-component', 'hgnc-human-genes', 'mesh-cellular-structures', 'mesh-diseases', 'mesh-processes', 'mgi-mouse-genes', 'rgd-rat-genes', 'selventa-legacy-chemicals', 'selventa-legacy-diseases', 'selventa-named-complexes', 'selventa-protein-families', 'swissprot-ids', 'swissprot' ] default_equivalences_2013 = [ '{}{}.beleq'.format(BEL_FRAMEWORK_2013_EQUIVALENCE, name) for name in default_equivalences_2013_names ] default_equivalences_2015_names = [ 'affy-probeset-ids', 'chebi-ids', 'chebi', 'disease-ontology-ids', 'disease-ontology', 'entrez-gene-ids', 'go-biological-process-ids', 'go-biological-process', 'go-cellular-component-ids', 'go-cellular-component', 'hgnc-human-genes', 'mesh-cellular-structures-ids', 'mesh-cellular-structures', 'mesh-chemicals-ids', 'mesh-chemicals', 'mesh-diseases-ids', 'mesh-diseases', 'mesh-processes-ids', 'mesh-processes', 'mgi-mouse-genes', 'rgd-rat-genes', 'selventa-legacy-chemicals', 'selventa-legacy-diseases', 'selventa-named-complexes', 'selventa-protein-families', 'swissprot-ids', 'swissprot' ] default_equivalences_2015 = [ '{}{}.beleq'.format(BEL_FRAMEWORK_2015_EQUIVALENCE, name) for name in default_equivalences_2015_names ] default_equivalences = default_equivalences_2012 + default_equivalences_2013 + default_equivalences_2015 pybel-0.12.1/src/pybel/manager/exc.py000066400000000000000000000022501334645200200173510ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Exceptions for the manager.""" from ..constants import LINE from ..exceptions import PyBELWarning MSG = "Error adding edge {line_s} to database. Check this line in the file and make sure the citation, " \ "evidence, and annotations all use valid UTF-8 characters: {source} {target} {key} {data} with " \ "original error:\n {error}" class EdgeAddError(PyBELWarning): """When there's a problem inserting an edge.""" def __init__(self, e, u, v, key, data): # noqa: D107 super(EdgeAddError, self).__init__(e, u, v, key, data) self.error = e self.source = u self.target = v self.key = key self.data = data def __str__(self): line_s = 'from line {} '.format(self.line) if LINE in self.data else '' return MSG.format( line_s=line_s, source=self.source, target=self.target, key=self.key, data=self.data, error=self.error, ) @property def line(self): """Return the BEL script's line on which this error occurred. :rtype: str """ return self.data.get(LINE) pybel-0.12.1/src/pybel/manager/lookup_manager.py000066400000000000000000000071101334645200200215750ustar00rootroot00000000000000# -*- coding: utf-8 -*- from .base_manager import BaseManager from .models import Author, Citation, Edge, Evidence, Node from ..constants import CITATION_TYPE_PUBMED from ..utils import hash_citation class LookupManager(BaseManager): """Groups functions for looking up entries by hashes.""" def get_node_by_hash(self, node_hash): """Look up a node by the hash of a PyBEL node tuple. :param str node_hash: The hash of a PyBEL node tuple from :func:`pybel.utils.hash_node` :rtype: Optional[Node] """ return self.session.query(Node).filter(Node.sha512 == node_hash).one_or_none() def get_nodes_by_hashes(self, node_hashes): """Look up several nodes by hashes of their PyBEL node tuples. :param List[str] node_hashes: The hashes of PyBEL node tuples from :func:`pybel.utils.hash_node` :rtype: List[Node] """ return self.session.query(Node).filter(Node.sha512.in_(node_hashes)).all() def get_node_by_dsl(self, node_dict): """Look up a node by its data dictionary by hashing it then using :func:`get_node_by_hash`. :param node_dict: A PyBEL node data dictionary :type node_dict: pybel.dsl.BaseEntity :rtype: Optional[Node] """ return self.get_node_by_hash(node_dict.as_sha512()) def get_edge_by_hash(self, edge_hash): """Look up an edge by the hash of a PyBEL edge data dictionary. :param str edge_hash: The hash of a PyBEL edge data dictionary from :func:`pybel.utils.hash_edge` :rtype: Optional[Edge] """ return self.session.query(Edge).filter(Edge.sha512 == edge_hash).one_or_none() def get_edges_by_hashes(self, edge_hashes): """Look up several edges by hashes of their PyBEL edge data dictionaries. :param List[str] edge_hashes: The hashes of PyBEL edge data dictionaries from :func:`pybel.utils.hash_edge` :rtype: List[Edge] """ return self.session.query(Edge).filter(Edge.sha512.in_(edge_hashes)).all() def get_citation_by_pmid(self, pubmed_identifier): """Get a citation object by its PubMed identifier. :param str pubmed_identifier: The PubMed identifier :rtype: Optional[Citation] """ return self.get_citation_by_reference(reference=pubmed_identifier, type=CITATION_TYPE_PUBMED) def get_citation_by_reference(self, type, reference): """Get a citation object by its type and reference. :param str type: The reference type :param str reference: The identifier in the source (e.g., PubMed identifier) :rtype: Optional[Citation] """ citation_hash = hash_citation(type=type, reference=reference) return self.get_citation_by_hash(citation_hash) def get_citation_by_hash(self, citation_hash): """Get a citation object by its hash. :param str citation_hash: The hash of the citation :rtype: Optional[Citation] """ return self.session.query(Citation).filter(Citation.sha512 == citation_hash).one_or_none() def get_author_by_name(self, name): """Get an author by name, if it exists in the database. :param str name: An author's name :rtype: Optional[Author] """ return self.session.query(Author).filter(Author.has_name(name)).one_or_none() def get_evidence_by_hash(self, evidence_hash): """Look up an evidence by its hash. :param str evidence_hash: :rtype: Optional[Evidence] """ return self.session.query(Evidence).filter(Evidence.sha512 == evidence_hash).one_or_none() pybel-0.12.1/src/pybel/manager/make_json_serializable.py000066400000000000000000000011001334645200200232570ustar00rootroot00000000000000""" Module that monkey-patches json module when it's imported so JSONEncoder.default() automatically checks for a special "to_json()" method and uses it to encode the object if found. Provided by user martineau at: http://stackoverflow.com/questions/18478287/making-object-json-serializable-with-regular-encoder/18561055#18561055 """ from json import JSONEncoder def _default(self, obj): return getattr(obj.__class__, "to_json", _default.default)(obj) _default.default = JSONEncoder().default # Save unmodified default. JSONEncoder.default = _default # replacement pybel-0.12.1/src/pybel/manager/models.py000066400000000000000000001015151334645200200200610ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains the SQLAlchemy database models that support the definition cache and graph cache.""" import datetime import hashlib from collections import defaultdict from sqlalchemy import ( Boolean, Column, Date, DateTime, ForeignKey, Integer, LargeBinary, String, Table, Text, UniqueConstraint, ) from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import backref, relationship from .utils import int_or_str from ..constants import ( ANNOTATIONS, BELNS_ENCODING_STR, CITATION, CITATION_AUTHORS, CITATION_DATE, CITATION_FIRST_AUTHOR, CITATION_LAST_AUTHOR, CITATION_NAME, CITATION_PAGES, CITATION_REFERENCE, CITATION_TITLE, CITATION_TYPE, CITATION_TYPE_PUBMED, CITATION_VOLUME, COMPLEX, COMPOSITE, EFFECT, EVIDENCE, FRAGMENT, FUSION, GMOD, HAS_COMPONENT, HAS_PRODUCT, HAS_REACTANT, HGVS, IDENTIFIER, LOCATION, METADATA_AUTHORS, METADATA_CONTACT, METADATA_COPYRIGHT, METADATA_DESCRIPTION, METADATA_DISCLAIMER, METADATA_LICENSES, METADATA_NAME, METADATA_VERSION, MODIFIER, NAME, NAMESPACE, OBJECT, PARTNER_3P, PARTNER_5P, PMOD, RANGE_3P, RANGE_5P, REACTION, RELATION, SUBJECT, ) from ..dsl import ( FUNC_TO_DSL, FUNC_TO_FUSION_DSL, complex_abundance, composite_abundance, fragment, fusion_range, gmod, hgvs, missing_fusion_range, named_complex_abundance, pmod, reaction, ) from ..io.gpickle import from_bytes, to_bytes __all__ = [ 'Base', 'Namespace', 'NamespaceEntry', 'Network', 'Node', 'Modification', 'Author', 'Citation', 'Evidence', 'Edge', 'Property', 'edge_annotation', 'edge_property', 'network_edge', 'network_node', ] NAME_TABLE_NAME = 'pybel_name' NAMESPACE_TABLE_NAME = 'pybel_namespace' NAME_HIERARCHY_TABLE_NAME = 'pybel_name_hierarchy' NODE_TABLE_NAME = 'pybel_node' MODIFICATION_TABLE_NAME = 'pybel_modification' NODE_MODIFICATION_TABLE_NAME = 'pybel_node_modification' PROPERTY_TABLE_NAME = 'pybel_property' EDGE_TABLE_NAME = 'pybel_edge' EDGE_ANNOTATION_TABLE_NAME = 'pybel_edge_name' EDGE_PROPERTY_TABLE_NAME = 'pybel_edge_property' AUTHOR_TABLE_NAME = 'pybel_author' CITATION_TABLE_NAME = 'pybel_citation' AUTHOR_CITATION_TABLE_NAME = 'pybel_author_citation' EVIDENCE_TABLE_NAME = 'pybel_evidence' NETWORK_TABLE_NAME = 'pybel_network' NETWORK_NODE_TABLE_NAME = 'pybel_network_node' NETWORK_EDGE_TABLE_NAME = 'pybel_network_edge' NETWORK_NAMESPACE_TABLE_NAME = 'pybel_network_namespace' NETWORK_ANNOTATION_TABLE_NAME = 'pybel_network_annotation' LONGBLOB = 4294967295 Base = declarative_base() name_hierarchy = Table( NAME_HIERARCHY_TABLE_NAME, Base.metadata, Column('left_id', Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), primary_key=True), Column('right_id', Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), primary_key=True) ) class Namespace(Base): """Represents a BEL Namespace.""" __tablename__ = NAMESPACE_TABLE_NAME id = Column(Integer, primary_key=True) uploaded = Column(DateTime, nullable=False, default=datetime.datetime.utcnow, doc='The date of upload') # logically the "namespace" keyword = Column(String(255), nullable=True, index=True, doc='Keyword that is used in a BEL file to identify a specific namespace') # A namespace either needs a URL or a pattern pattern = Column(String(255), nullable=True, unique=True, index=True, doc="Contains regex pattern for value identification.") miriam_id = Column(String(16), nullable=True, doc='MIRIAM resource identifier matching the regular expression ``^MIR:001\d{5}$``') miriam_name = Column(String(255), nullable=True) miriam_namespace = Column(String(255), nullable=True) miriam_uri = Column(String(255), nullable=True) miriam_description = Column(Text, nullable=True) version = Column(String(255), nullable=True, doc='Version of the namespace') url = Column(String(255), nullable=True, unique=True, index=True, doc='BELNS Resource location as URL') name = Column(String(255), nullable=True, doc='Name of the given namespace') domain = Column(String(255), nullable=True, doc='Domain for which this namespace is valid') species = Column(String(255), nullable=True, doc='Taxonomy identifiers for which this namespace is valid') description = Column(Text, nullable=True, doc='Optional short description of the namespace') created = Column(DateTime, nullable=True, doc='DateTime of the creation of the namespace definition file') query_url = Column(Text, nullable=True, doc='URL that can be used to query the namespace (externally from PyBEL)') author = Column(String(255), nullable=True, doc='The author of the namespace') license = Column(String(255), nullable=True, doc='License information') contact = Column(String(255), nullable=True, doc='Contact information') citation = Column(String(255), nullable=True) citation_description = Column(Text, nullable=True) citation_version = Column(String(255), nullable=True) citation_published = Column(Date, nullable=True) citation_url = Column(String(255), nullable=True) is_annotation = Column(Boolean) def __str__(self): return self.keyword def get_entry_names(self): """Get all entry names. :rtype: set[str] """ return {entry.name for entry in self.entries} def to_values(self): """Return this namespace as a dictionary of names to their encodings. Encodings are represented as a string, and lookup operations take constant time O(8). :rtype: dict[str,str] """ return { entry.name: entry.encoding if entry.encoding else BELNS_ENCODING_STR for entry in self.entries } def to_tree_list(self): """Returns an edge set of the tree represented by this namespace's hierarchy :rtype: set[tuple[str,str]] """ return { (parent.name, child.name) for parent in self.entries for child in parent.children } def to_json(self, include_id=False): """Returns the most useful entries as a dictionary :param bool include_id: If true, includes the model identifier :rtype: dict[str,str] """ result = { 'keyword': self.keyword, 'name': self.name, 'version': self.version, } if self.url: result['url'] = self.url else: result['pattern'] = self.pattern if include_id: result['id'] = self.id return result class NamespaceEntry(Base): """Represents a name within a BEL namespace.""" __tablename__ = NAME_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(1023), index=True, nullable=False, doc='Name that is defined in the corresponding namespace definition file') identifier = Column(String(255), index=True, nullable=True, doc='The database accession number') encoding = Column(String(8), nullable=True, doc='The biological entity types for which this name is valid') namespace_id = Column(Integer, ForeignKey('{}.id'.format(NAMESPACE_TABLE_NAME)), nullable=False, index=True) namespace = relationship(Namespace, backref=backref('entries', lazy='dynamic')) is_name = Column(Boolean) is_annotation = Column(Boolean) children = relationship( 'NamespaceEntry', secondary=name_hierarchy, primaryjoin=(id == name_hierarchy.c.left_id), secondaryjoin=(id == name_hierarchy.c.right_id), ) def to_json(self, include_id=False): """Describe the namespaceEntry as dictionary of Namespace-Keyword and Name. :param bool include_id: If true, includes the model identifier :rtype: dict[str,str] """ result = { NAMESPACE: self.namespace.keyword, } if self.name: result[NAME] = self.name if self.identifier: result[IDENTIFIER] = self.identifier if include_id: result['id'] = self.id return result @classmethod def name_contains(cls, name_query): """Make a filter if the name contains a certain substring. :param str name_query: """ return cls.name.contains(name_query) def __str__(self): return '[{namespace_id}]{namespace_name}:[{identifier}]{name}'.format( namespace_id=self.namespace.id, namespace_name=self.namespace.keyword, identifier=self.identifier, name=self.name, ) network_edge = Table( NETWORK_EDGE_TABLE_NAME, Base.metadata, Column('network_id', Integer, ForeignKey('{}.id'.format(NETWORK_TABLE_NAME)), primary_key=True), Column('edge_id', Integer, ForeignKey('{}.id'.format(EDGE_TABLE_NAME)), primary_key=True) ) network_node = Table( NETWORK_NODE_TABLE_NAME, Base.metadata, Column('network_id', Integer, ForeignKey('{}.id'.format(NETWORK_TABLE_NAME)), primary_key=True), Column('node_id', Integer, ForeignKey('{}.id'.format(NODE_TABLE_NAME)), primary_key=True) ) class Network(Base): """Represents a collection of edges, specified by a BEL Script.""" __tablename__ = NETWORK_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255), nullable=False, index=True, doc='Name of the given Network (from the BEL file)') version = Column(String(255), nullable=False, doc='Release version of the given Network (from the BEL file)') authors = Column(Text, nullable=True, doc='Authors of the underlying BEL file') contact = Column(String(255), nullable=True, doc='Contact email from the underlying BEL file') description = Column(Text, nullable=True, doc='Descriptive text from the underlying BEL file') copyright = Column(Text, nullable=True, doc='Copyright information') disclaimer = Column(Text, nullable=True, doc='Disclaimer information') licenses = Column(Text, nullable=True, doc='License information') created = Column(DateTime, nullable=False, default=datetime.datetime.utcnow) blob = Column(LargeBinary(LONGBLOB), doc='A pickled version of this network') nodes = relationship('Node', secondary=network_node, lazy='dynamic', backref=backref('networks', lazy='dynamic')) edges = relationship('Edge', secondary=network_edge, lazy='dynamic', backref=backref('networks', lazy='dynamic')) __table_args__ = ( UniqueConstraint(name, version), ) def to_json(self, include_id=False): """Return this network as JSON. :param bool include_id: If true, includes the model identifier :rtype: dict[str,str] """ result = { METADATA_NAME: self.name, METADATA_VERSION: self.version, } if self.created: result['created'] = str(self.created) if include_id: result['id'] = self.id if self.authors: result[METADATA_AUTHORS] = self.authors if self.contact: result[METADATA_CONTACT] = self.contact if self.description: result[METADATA_DESCRIPTION] = self.description if self.copyright: result[METADATA_COPYRIGHT] = self.copyright if self.disclaimer: result[METADATA_DISCLAIMER] = self.disclaimer if self.licenses: result[METADATA_LICENSES] = self.licenses return result @classmethod def name_contains(cls, name_query): """Build a filter for networks whose names contain the query. :param str name_query: """ return cls.name.contains(name_query) @classmethod def description_contains(cls, description_query): """Build a filter for networks whose descriptions contain the query. :param str description_query: """ return cls.description.contains(description_query) @classmethod def id_in(cls, network_ids): """Build a filter for networks whose identifiers appear in the given sequence. :param iter[int] network_ids: """ return cls.id.in_(network_ids) def __repr__(self): return '{} v{}'.format(self.name, self.version) def __str__(self): return repr(self) def as_bel(self): """Get this network and loads it into a :class:`BELGraph`. :rtype: pybel.BELGraph """ return from_bytes(self.blob) def store_bel(self, graph): """Insert a BEL graph. :param pybel.BELGraph graph: A BEL Graph """ self.blob = to_bytes(graph) node_modification = Table( NODE_MODIFICATION_TABLE_NAME, Base.metadata, Column('node_id', Integer, ForeignKey('{}.id'.format(NODE_TABLE_NAME)), primary_key=True), Column('modification_id', Integer, ForeignKey('{}.id'.format(MODIFICATION_TABLE_NAME)), primary_key=True) ) class Modification(Base): """The modifications that are present in the network are stored in this table.""" __tablename__ = MODIFICATION_TABLE_NAME id = Column(Integer, primary_key=True) type = Column(String(255), nullable=False, doc='Type of the stored modification e.g. Fusion, gmod, pmod, etc') variantString = Column(String(255), nullable=True, doc='HGVS string if sequence modification') p3_partner_id = Column(Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), nullable=True) p3_partner = relationship(NamespaceEntry, foreign_keys=[p3_partner_id]) p3_reference = Column(String(10), nullable=True) p3_start = Column(String(255), nullable=True) p3_stop = Column(String(255), nullable=True) p5_partner_id = Column(Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), nullable=True) p5_partner = relationship(NamespaceEntry, foreign_keys=[p5_partner_id]) p5_reference = Column(String(10), nullable=True) p5_start = Column(String(255), nullable=True) p5_stop = Column(String(255), nullable=True) identifier_id = Column(Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), nullable=True) identifier = relationship(NamespaceEntry, foreign_keys=[identifier_id]) residue = Column(String(3), nullable=True, doc='Three letter amino acid code if PMOD') position = Column(Integer, nullable=True, doc='Position of PMOD or GMOD') sha512 = Column(String(255), index=True) def _fusion_to_json(self): """Convert this modification to a FUSION data dictionary. Don't use this without checking ``self.type == FUSION`` first. :rtype: dict """ if self.p5_reference: range_5p = fusion_range( reference=str(self.p5_reference), start=int_or_str(self.p5_start), stop=int_or_str(self.p5_stop), ) else: range_5p = missing_fusion_range() if self.p3_reference: range_3p = fusion_range( reference=str(self.p3_reference), start=int_or_str(self.p3_start), stop=int_or_str(self.p3_stop), ) else: range_3p = missing_fusion_range() return { PARTNER_5P: self.p5_partner.to_json(), # just the identifier pair PARTNER_3P: self.p3_partner.to_json(), # just the identifier pair RANGE_5P: range_5p, RANGE_3P: range_3p, } def to_json(self): """Recreate a is_variant dictionary for :class:`BELGraph`. :return: Dictionary that describes a variant or a fusion. :rtype: Variant or FusionBase """ if self.type == FUSION: return self._fusion_to_json() if self.type == FRAGMENT: return fragment( start=int_or_str(self.p3_start), stop=int_or_str(self.p3_stop), ) if self.type == HGVS: return hgvs(str(self.variantString)) if self.type == GMOD: return gmod( namespace=self.identifier.namespace.keyword, name=self.identifier.name, identifier=self.identifier.identifier, ) if self.type == PMOD: return pmod( namespace=self.identifier.namespace.keyword, name=self.identifier.name, identifier=self.identifier.identifier, code=self.residue, position=self.position ) raise TypeError('unhandled type ({}) for modification {}'.format(self.type, self)) class Node(Base): """Represents a BEL Term.""" __tablename__ = NODE_TABLE_NAME id = Column(Integer, primary_key=True) type = Column(String(255), nullable=False, doc='The type of the represented biological entity e.g. Protein or Gene') is_variant = Column(Boolean, default=False, doc='Identifies weather or not the given node is a variant') has_fusion = Column(Boolean, default=False, doc='Identifies weather or not the given node is a fusion') bel = Column(String(255), nullable=False, doc='Canonical BEL term that represents the given node') sha512 = Column(String(255), nullable=True, index=True) namespace_entry_id = Column(Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), nullable=True) namespace_entry = relationship(NamespaceEntry, foreign_keys=[namespace_entry_id]) modifications = relationship(Modification, secondary=node_modification, lazy='dynamic', backref=backref('nodes', lazy='dynamic')) @classmethod def bel_contains(cls, bel_query): """Build a filter for nodes whose BEL contain the query. :type bel_query: str """ return cls.bel.contains(bel_query) def __str__(self): return self.bel def __repr__(self): return ''.format(self.sha512[:10], self.bel) def _get_list_by_relation(self, relation): return [ edge.target.to_json() for edge in self.out_edges.filter(Edge.relation == relation) ] def as_bel(self): """Serialize this node as a PyBEL DSL object. :rtype: pybel.dsl.BaseEntity """ func = self.type if self.has_fusion: j = self.modifications[0].to_json() fusion_dsl = FUNC_TO_FUSION_DSL[func] member_dsl = FUNC_TO_DSL[func] partner_5p = member_dsl(**j[PARTNER_5P]) partner_3p = member_dsl(**j[PARTNER_3P]) return fusion_dsl( partner_5p=partner_5p, partner_3p=partner_3p, range_5p=j.get(RANGE_5P), range_3p=j.get(RANGE_3P), ) if func == REACTION: return reaction( reactants=self._get_list_by_relation(HAS_REACTANT), products=self._get_list_by_relation(HAS_PRODUCT) ) if func in {COMPLEX, COMPOSITE}: members = self._get_list_by_relation(HAS_COMPONENT) if self.type == COMPOSITE: return composite_abundance(members) if self.namespace_entry and members: return complex_abundance( members=members, namespace=self.namespace_entry.namespace.keyword, name=self.namespace_entry.name, identifier=self.namespace_entry.identifier, ) if self.namespace_entry and not members: return named_complex_abundance( namespace=self.namespace_entry.namespace.keyword, name=self.namespace_entry.name, identifier=self.namespace_entry.identifier, ) if members: return complex_abundance(members=members) raise ValueError('complex can not be nameless and have no members') dsl = FUNC_TO_DSL[func] if self.is_variant: return dsl( namespace=self.namespace_entry.namespace.keyword, name=self.namespace_entry.name, identifier=self.namespace_entry.identifier, variants=[ modification.to_json() for modification in self.modifications ] ) return dsl( namespace=self.namespace_entry.namespace.keyword, name=self.namespace_entry.name, identifier=self.namespace_entry.identifier, ) def to_json(self): return self.as_bel() author_citation = Table( AUTHOR_CITATION_TABLE_NAME, Base.metadata, Column('author_id', Integer, ForeignKey('{}.id'.format(AUTHOR_TABLE_NAME)), primary_key=True), Column('citation_id', Integer, ForeignKey('{}.id'.format(CITATION_TABLE_NAME)), primary_key=True) ) class Author(Base): """Contains all author names.""" __tablename__ = AUTHOR_TABLE_NAME id = Column(Integer, primary_key=True) name = Column(String(255), nullable=False, unique=True, index=True) sha512 = Column(String(255), nullable=False, index=True, unique=True) @classmethod def from_name(cls, name): """Create an author by name, automatically populating the hash.""" return Author(name=name, sha512=cls.hash_name(name)) @staticmethod def hash_name(name): """Hash a name. :param str name: Name of an author :rtype: str """ return hashlib.sha512(name.encode('utf-8')).hexdigest() @classmethod def name_contains(cls, name_query): """Build a filter for authors whose names contain the given query. :type name_query: str """ return cls.name.contains(name_query) @classmethod def has_name(cls, name): """Build a filter for if an author has a name. :type name: str """ return cls.sha512 == cls.hash_name(name) @classmethod def has_name_in(cls, names): """Build a filter if the author has any of the given names""" return cls.sha512.in_({ cls.hash_name(name) for name in names }) def __str__(self): return self.name class Citation(Base): """The information about the citations that are used to prove a specific relation are stored in this table.""" __tablename__ = CITATION_TABLE_NAME id = Column(Integer, primary_key=True) type = Column(String(16), nullable=False, doc='Type of the stored publication e.g. PubMed') reference = Column(String(255), nullable=False, doc='Reference identifier of the publication e.g. PubMed_ID') sha512 = Column(String(255), index=True) name = Column(String(255), nullable=True, doc='Journal name') title = Column(Text, nullable=True, doc='Title of the publication') volume = Column(Text, nullable=True, doc='Volume of the journal') issue = Column(Text, nullable=True, doc='Issue within the volume') pages = Column(Text, nullable=True, doc='Pages of the publication') date = Column(Date, nullable=True, doc='Publication date') first_id = Column(Integer, ForeignKey('{}.id'.format(AUTHOR_TABLE_NAME)), nullable=True, doc='First author') first = relationship(Author, foreign_keys=[first_id]) last_id = Column(Integer, ForeignKey('{}.id'.format(AUTHOR_TABLE_NAME)), nullable=True, doc='Last author') last = relationship(Author, foreign_keys=[last_id]) authors = relationship(Author, secondary=author_citation, backref='citations') __table_args__ = ( UniqueConstraint(CITATION_TYPE, CITATION_REFERENCE), ) def __str__(self): return '{}:{}'.format(self.type, self.reference) @property def is_pubmed(self): """Return if this is a PubMed citation. :rtype: bool """ return CITATION_TYPE_PUBMED == self.type @property def is_enriched(self): """Return if this citation has been enriched for name, title, and other metadata. :rtype: bool """ return self.title is not None and self.name is not None def to_json(self, include_id=False): """Create a citation dictionary that is used to recreate the edge data dictionary of a :class:`BELGraph`. :param bool include_id: If true, includes the model identifier :return: Citation dictionary for the recreation of a :class:`BELGraph`. :rtype: dict[str,str] """ result = { CITATION_REFERENCE: self.reference, CITATION_TYPE: self.type } if include_id: result['id'] = self.id if self.name: result[CITATION_NAME] = self.name if self.title: result[CITATION_TITLE] = self.title if self.volume: result[CITATION_VOLUME] = self.volume if self.pages: result[CITATION_PAGES] = self.pages if self.date: result[CITATION_DATE] = self.date.strftime('%Y-%m-%d') if self.first: result[CITATION_FIRST_AUTHOR] = self.first.name if self.last: result[CITATION_LAST_AUTHOR] = self.last.name if self.authors: result[CITATION_AUTHORS] = sorted( author.name for author in self.authors ) return result class Evidence(Base): """This table contains the evidence text that proves a specific relationship and refers the source that is cited.""" __tablename__ = EVIDENCE_TABLE_NAME id = Column(Integer, primary_key=True) text = Column(Text, nullable=False, doc='Supporting text from a given publication') citation_id = Column(Integer, ForeignKey('{}.id'.format(CITATION_TABLE_NAME)), nullable=False) citation = relationship(Citation, backref=backref('evidences')) sha512 = Column(String(255), index=True) def __str__(self): return '{}:{}'.format(self.citation, self.sha512[:8]) def to_json(self, include_id=False): """Create a dictionary that is used to recreate the edge data dictionary for a :class:`BELGraph`. :param bool include_id: If true, includes the model identifier :return: Dictionary containing citation and evidence for a :class:`BELGraph` edge. :rtype: dict """ result = { CITATION: self.citation.to_json(), EVIDENCE: self.text } if include_id: result['id'] = self.id return result edge_annotation = Table( EDGE_ANNOTATION_TABLE_NAME, Base.metadata, Column('edge_id', Integer, ForeignKey('{}.id'.format(EDGE_TABLE_NAME)), primary_key=True), Column('name_id', Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), primary_key=True) ) edge_property = Table( EDGE_PROPERTY_TABLE_NAME, Base.metadata, Column('edge_id', Integer, ForeignKey('{}.id'.format(EDGE_TABLE_NAME)), primary_key=True), Column('property_id', Integer, ForeignKey('{}.id'.format(PROPERTY_TABLE_NAME)), primary_key=True) ) class Property(Base): """The property table contains additional information that is used to describe the context of a relation.""" __tablename__ = PROPERTY_TABLE_NAME id = Column(Integer, primary_key=True) is_subject = Column(Boolean, doc='Identifies which participant of the edge if affected by the given property') modifier = Column(String(255), doc='The modifier: one of activity, degradation, location, or translocation') relative_key = Column(String(255), nullable=True, doc='Relative key of effect e.g. to_tloc or from_tloc') sha512 = Column(String(255), index=True) effect_id = Column(Integer, ForeignKey('{}.id'.format(NAME_TABLE_NAME)), nullable=True) effect = relationship(NamespaceEntry) @property def side(self): """Return either :data:`pybel.constants.SUBJECT` or :data:`pybel.constants.OBJECT`. :rtype: str """ return SUBJECT if self.is_subject else OBJECT def to_json(self): """Create a property dict that is used to recreate an edge dictionary for a :class:`BELGraph`. :return: Property dictionary of an edge that is participant (sub/obj) related. :rtype: dict """ participant = self.side prop_dict = { participant: { MODIFIER: self.modifier # FIXME this is probably wrong for location } } if self.modifier == LOCATION: prop_dict[participant] = { LOCATION: self.effect.to_json() } if self.relative_key: # for translocations prop_dict[participant][EFFECT] = { self.relative_key: self.effect.to_json() } elif self.effect: # for activities prop_dict[participant][EFFECT] = self.effect.to_json() # degradations don't have modifications return prop_dict class Edge(Base): """Relationships between BEL nodes and their properties, annotations, and provenance.""" __tablename__ = EDGE_TABLE_NAME id = Column(Integer, primary_key=True) bel = Column(Text, nullable=False, doc='Valid BEL statement that represents the given edge') relation = Column(String(255), nullable=False) source_id = Column(Integer, ForeignKey('{}.id'.format(NODE_TABLE_NAME)), nullable=False) source = relationship(Node, foreign_keys=[source_id], backref=backref('out_edges', lazy='dynamic', cascade='all, delete-orphan')) target_id = Column(Integer, ForeignKey('{}.id'.format(NODE_TABLE_NAME)), nullable=False) target = relationship(Node, foreign_keys=[target_id], backref=backref('in_edges', lazy='dynamic', cascade='all, delete-orphan')) evidence_id = Column(Integer, ForeignKey('{}.id'.format(EVIDENCE_TABLE_NAME)), nullable=True) evidence = relationship(Evidence, backref=backref('edges', lazy='dynamic')) annotations = relationship(NamespaceEntry, secondary=edge_annotation, lazy="dynamic", backref=backref('edges', lazy='dynamic')) properties = relationship(Property, secondary=edge_property, lazy="dynamic") # , cascade='all, delete-orphan') sha512 = Column(String(255), index=True, doc='The hash of the source, target, and associated metadata') def __str__(self): return self.bel def __repr__(self): return ''.format(self.sha512[:10], self.bel) def get_annotations_json(self): """Format the annotations properly. :rtype: Optional[dict[str,dict[str,bool]] """ annotations = defaultdict(dict) for entry in self.annotations: annotations[entry.namespace.keyword][entry.name] = True return dict(annotations) or None def get_data_json(self): """Get the PyBEL edge data dictionary this edge represents. :rtype: dict """ data = { RELATION: self.relation, } annotations = self.get_annotations_json() if annotations: data[ANNOTATIONS] = annotations if self.evidence: data.update(self.evidence.to_json()) for prop in self.properties: # FIXME this is also probably broken for translocations or mixed activity/degrad if prop.side not in data: data[prop.side] = prop.to_json() else: data[prop.side].update(prop.to_json()) return data def to_json(self, include_id=False): """Create a dictionary of one BEL Edge that can be used to create an edge in a :class:`BELGraph`. :param bool include_id: Include the database identifier? :return: Dictionary that contains information about an edge of a :class:`BELGraph`. Including participants and edge data information. :rtype: dict """ result = { 'source': self.source.to_json(), 'target': self.target.to_json(), 'key': self.sha512, 'data': self.get_data_json(), } if include_id: result['id'] = self.id return result def insert_into_graph(self, graph): """Insert this edge into a BEL graph. :param pybel.BELGraph graph: A BEL graph """ u = graph.add_node_from_data(self.source.to_json()) v = graph.add_node_from_data(self.target.to_json()) graph.add_edge(u, v, key=self.sha512, **self.get_data_json()) pybel-0.12.1/src/pybel/manager/query_manager.py000066400000000000000000000245441334645200200214430ustar00rootroot00000000000000# -*- coding: utf-8 -*- """The query manager for the database.""" import datetime from collections import Iterable from six import string_types from sqlalchemy import and_, func, or_ from .lookup_manager import LookupManager from .models import Author, Citation, Edge, Evidence, Namespace, NamespaceEntry, Node from ..constants import CITATION_TYPE_PUBMED from ..struct import BELGraph from ..utils import parse_datetime __all__ = [ 'QueryManager', ] def graph_from_edges(edges, **kwargs): """Build a BEL graph from edges. :param iter[Edge] edges: An iterable of edges from the database :param kwargs: Arguments to pass to :class:`pybel.BELGraph` :rtype: BELGraph """ graph = BELGraph(**kwargs) for edge in edges: edge.insert_into_graph(graph) return graph class QueryManager(LookupManager): """An extension to the Manager to make queries over the database.""" def count_nodes(self): """Count the number of nodes in the database. :rtype: int """ return self.session.query(func.count(Node.id)).scalar() def get_dsl_by_hash(self, node_hash): """Look up a node by the hash and returns the corresponding PyBEL node tuple. :param str node_hash: The hash of a PyBEL node tuple from :func:`pybel.utils.hash_node` :rtype: Optional[BaseEntity] """ node = self.get_node_by_hash(node_hash) if node is None: return return node.to_json() def query_nodes(self, bel=None, type=None, namespace=None, name=None): """Query nodes in the database. :param str bel: BEL term that describes the biological entity. e.g. ``p(HGNC:APP)`` :param str type: Type of the biological entity. e.g. Protein :param str namespace: Namespace keyword that is used in BEL. e.g. HGNC :param str name: Name of the biological entity. e.g. APP :rtype: list[Node] """ q = self.session.query(Node) if bel: q = q.filter(Node.bel.like(bel)) if type: q = q.filter(Node.type.like(type)) if namespace or name: q = q.join(NamespaceEntry) if namespace: q = q.join(Namespace).filter(Namespace.keyword.like(namespace)) if name: q = q.filter(NamespaceEntry.name.like(name)) return q.all() def count_edges(self): """Count the number of edges in the database. :rtype: int """ return self.session.query(func.count(Edge.id)).scalar() def get_edges_with_citation(self, citation): """Get the edges with the given citation. :param Citation citation: :rtype: iter[Edge] """ return self.session.query(Edge).join(Evidence).filter(Evidence.citation == citation) def get_edges_with_citations(self, citations): """Get edges with one of the given citations. :param iter[Citation] citations: :rtype: list[Edge] """ return self.session.query(Edge).join(Evidence).filter(Evidence.citation.in_(citations)).all() def search_edges_with_evidence(self, evidence): """Search edges with the given evidence. :param str evidence: A string to search evidences. Can use wildcard percent symbol (%). :rtype: list[Edge] """ return self.session.query(Edge).join(Evidence).filter(Evidence.text.like(evidence)).all() def search_edges_with_bel(self, bel): """Search edges with given BEL. :param str bel: A BEL string to use as a search :rtype: list[Edge] """ return self.session.query(Edge).filter(Edge.bel.like(bel)).all() def get_edges_with_annotation(self, annotation, value): """Search edges with the given annotation/value pair. :param str annotation: :param str value: :rtype: list[Edge] """ query = self.session.query(Edge).join(NamespaceEntry, Edge.annotations).join(Namespace) query = query.filter(Namespace.keyword == annotation).filter(NamespaceEntry.name == value) return query.all() @staticmethod def _add_edge_function_filter(query, edge_node_id, node_type): """See usage in self.query_edges.""" return query.join(Node, edge_node_id == Node.id).filter(Node.type == node_type) def query_edges(self, bel=None, source_function=None, source=None, target_function=None, target=None, relation=None): """Query edges in the database. :param str bel: BEL statement that represents the desired edge. :param str source_function: Filter source nodes with the given BEL function :param source: BEL term of source node e.g. ``p(HGNC:APP)`` or :class:`Node` object. :type source: str or Node :param str target_function: Filter target nodes with the given BEL function :param target: BEL term of target node e.g. ``p(HGNC:APP)`` or :class:`Node` object. :type target: str or Node :param str relation: The relation that should be present between source and target node. :rtype: list[Edge] """ if bel: return self.search_edges_with_bel(bel) query = self.session.query(Edge) if relation: query = query.filter(Edge.relation.like(relation)) if source_function: query = self._add_edge_function_filter(query, Edge.source_id, source_function) if target_function: query = self._add_edge_function_filter(query, Edge.target_id, target_function) if source: if isinstance(source, string_types): source = self.query_nodes(bel=source) if len(source) == 0: return [] source = source[0] # FIXME what if this matches multiple? query = query.filter(Edge.source == source) elif isinstance(source, Node): query = query.filter(Edge.source == source) else: raise TypeError('Invalid type of {}: {}'.format(source, source.__class__.__name__)) if target: if isinstance(target, string_types): targets = self.query_nodes(bel=target) target = targets[0] # FIXME what if this matches multiple? query = query.filter(Edge.target == target) elif isinstance(target, Node): query = query.filter(Edge.target == target) else: raise TypeError('Invalid type of {}: {}'.format(target, target.__class__.__name__)) return query.all() def query_citations(self, type=None, reference=None, name=None, author=None, date=None, evidence_text=None): """Query citations in the database. :param str type: Type of the citation. e.g. PubMed :param str reference: The identifier used for the citation. e.g. PubMed_ID :param str name: Title of the citation. :param str or list[str] author: The name or a list of names of authors participated in the citation. :param date: Publishing date of the citation. :type date: str or datetime.date :param str evidence_text: :rtype: list[Citation] """ query = self.session.query(Citation) if author is not None: query = query.join(Author, Citation.authors) if isinstance(author, string_types): query = query.filter(Author.name.like(author)) elif isinstance(author, Iterable): query = query.filter(Author.has_name_in(set(author))) else: raise TypeError if type and not reference: query = query.filter(Citation.type.like(type)) elif reference and type: query = query.filter(Citation.reference == reference) elif reference and not type: raise ValueError('reference specified without type') if name: query = query.filter(Citation.name.like(name)) if date: if isinstance(date, datetime.date): query = query.filter(Citation.date == date) elif isinstance(date, string_types): query = query.filter(Citation.date == parse_datetime(date)) if evidence_text: query = query.join(Evidence).filter(Evidence.text.like(evidence_text)) return query.all() def query_edges_by_pubmed_identifiers(self, pubmed_identifiers): """Get all edges annotated to the documents identified by the given PubMed identifiers. :param list[str] pubmed_identifiers: A list of PubMed document identifiers :rtype: list[Edge] """ fi = and_(Citation.type == CITATION_TYPE_PUBMED, Citation.reference.in_(pubmed_identifiers)) return self.session.query(Edge).join(Evidence).join(Citation).filter(fi).all() @staticmethod def _edge_both_nodes(nodes): """Get edges where both the source and target are in the list of nodes. :param list[Node] nodes: A list of node identifiers """ node_ids = [node.id for node in nodes] return and_( Edge.source_id.in_(node_ids), Edge.target_id.in_(node_ids), ) def query_induction(self, nodes): """Get all edges between any of the given nodes. :param list[Node] nodes: A list of nodes (length > 2) :rtype: list[Edge] """ if len(nodes) < 2: raise ValueError('not enough nodes given to induce over') return self.session.query(Edge).filter(self._edge_both_nodes(nodes)).all() @staticmethod def _edge_one_node(nodes): """Get edges where either the source or target are in the list of nodes. :param list[Node] nodes: A list of node identifiers Note: doing this with the nodes directly is not yet supported by SQLAlchemy .. code-block:: python return or_( Edge.source.in_(nodes), Edge.target.in_(nodes), ) """ node_ids = [node.id for node in nodes] return or_( Edge.source_id.in_(node_ids), Edge.target_id.in_(node_ids), ) def query_neighbors(self, nodes): """Get all edges incident to any of the given nodes. :param list[Node] nodes: A list of nodes :rtype: list[Edge] """ return self.session.query(Edge).filter(self._edge_one_node(nodes)).all() pybel-0.12.1/src/pybel/manager/utils.py000066400000000000000000000046631334645200200177440ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for the PyBEL database manager.""" from ..utils import parse_datetime def extract_shared_required(config, definition_header='Namespace'): """Get the required annotations shared by BEL namespace and annotation resource documents. :param dict config: The configuration dictionary representing a BEL resource :param str definition_header: ``Namespace`` or ``AnnotationDefinition`` :rtype: dict """ return { 'keyword': config[definition_header]['Keyword'], 'created': parse_datetime(config[definition_header]['CreatedDateTime']), } def extract_shared_optional(bel_resource, definition_header='Namespace'): """Get the optional annotations shared by BEL namespace and annotation resource documents. :param dict bel_resource: A configuration dictionary representing a BEL resource :param str definition_header: ``Namespace`` or ``AnnotationDefinition`` :rtype: dict """ shared_mapping = { 'description': (definition_header, 'DescriptionString'), 'version': (definition_header, 'VersionString'), 'author': ('Author', 'NameString'), 'license': ('Author', 'CopyrightString'), 'contact': ('Author', 'ContactInfoString'), 'citation': ('Citation', 'NameString'), 'citation_description': ('Citation', 'DescriptionString'), 'citation_version': ('Citation', 'PublishedVersionString'), 'citation_url': ('Citation', 'ReferenceURL'), } result = {} update_insert_values(bel_resource, shared_mapping, result) if 'PublishedDate' in bel_resource.get('Citation', {}): result['citation_published'] = parse_datetime(bel_resource['Citation']['PublishedDate']) return result def update_insert_values(bel_resource, mapping, values): """Update the value dictionary with a BEL resource dictionary. :param dict bel_resource: :param dict[str,tuple[str,str]] mapping: :param dict[str,str] values: """ for database_column, (section, key) in mapping.items(): if section in bel_resource and key in bel_resource[section]: values[database_column] = bel_resource[section][key] def int_or_str(v): """Safe converts an string represent an integer to an integer or passes through none. :type v: Optional[str] :rtype: None or str or int """ if v is None: return try: return int(v) except ValueError: return v pybel-0.12.1/src/pybel/parser/000077500000000000000000000000001334645200200161035ustar00rootroot00000000000000pybel-0.12.1/src/pybel/parser/__init__.py000066400000000000000000000004751334645200200202220ustar00rootroot00000000000000# -*- coding: utf-8 -*- """The :mod:`pybel.parser` module contains utilities for parsing BEL documents and BEL statements.""" from .modifiers import * from .parse_bel import BELParser from .parse_control import ControlParser from .parse_identifier import IdentifierParser from .parse_metadata import MetadataParser pybel-0.12.1/src/pybel/parser/baseparser.py000066400000000000000000000033751334645200200206140ustar00rootroot00000000000000# -*- coding: utf-8 -*- """The base parser class shared by several BEL parsers.""" import logging import time log = logging.getLogger(__name__) __all__ = ['BaseParser'] class BaseParser(object): """This abstract class represents a language backed by a PyParsing statement. Multiple parsers can be easily chained together when they are all inheriting from this base class. """ def __init__(self, language, streamline=False): """Build a parser wrapper using a PyParsing language. :param language: The PyParsing language to use :param bool streamline: Should the language be streamlined on instantiation? """ self.language = language #: The parser holds an internal state of the current line self.line_number = 0 if streamline: self.streamline() def parse_lines(self, lines): """Parse multiple lines in succession. :return: An list of the resulting parsed lines' tokens """ return [ self.parseString(line, line_number) for line_number, line in enumerate(lines) ] def parseString(self, line, line_number=0): """Parse a string with the language represented by this parser. :param str line: A string representing an instance of this parser's language :param int line_number: The current line number of the parser """ self.line_number = line_number return self.language.parseString(line) def streamline(self): """Streamline the language represented by this parser to make queries run faster.""" t = time.time() self.language.streamline() log.info('streamlined %s in %.02f seconds', self.__class__.__name__, time.time() - t) pybel-0.12.1/src/pybel/parser/exc.py000066400000000000000000000353531334645200200172450ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Exceptions for the BEL parser. A message for "General Parser Failure" is displayed when a problem was caused due to an unforseen error. The line number and original statement are printed for the user to debug. """ from ..exceptions import PyBELWarning class PyBelParserWarning(PyBELWarning): """Base PyBEL parser exception, which holds the line and position where a parsing problem occurred""" def __init__(self, line_number, line, position, *args): """ :param int line_number: The line number on which this warning occurred :param str line: The content of the line :param int position: The position within the line where the warning occurred :param args: Additional arguments to supply to the super class """ super(PyBelParserWarning, self).__init__(line_number, line, position, *args) self.line_number = line_number self.line = line self.position = position def __str__(self): return 'General Parser Failure on line {} at pos {}: {}'.format(self.line_number, self.position, self.line) class BELSyntaxError(PyBelParserWarning, SyntaxError): """For general syntax errors""" class InconsistentDefinitionError(PyBelParserWarning): """Base PyBEL error for redefinition""" def __init__(self, line_number, line, position, definition): super(InconsistentDefinitionError, self).__init__(line_number, line, position, definition) self.definition = definition def __str__(self): return 'Tried to redefine {} with: {}'.format(self.definition, self.line) class RedefinedNamespaceError(InconsistentDefinitionError): """Raised when a namespace is redefined""" class RedefinedAnnotationError(InconsistentDefinitionError): """Raised when an annotation is redefined""" # Naming Warnings class NameWarning(PyBelParserWarning): """The base class for errors related to nomenclature.""" def __init__(self, line_number, line, position, name, *args): """Build a NameWarning. :param int line_number: The line number on which the warning occurred :param str line: The line on which the warning occurred :param int position: The position in the line that caused the warning :param str name: The name that caused the warning """ super(NameWarning, self).__init__(line_number, line, position, name, *args) self.name = name class NakedNameWarning(NameWarning): """Raised when there is an identifier without a namespace. Enable lenient mode to suppress""" def __str__(self): return '[pos:{}] "{}" should be qualified with a valid namespace'.format(self.position, self.name) class MissingDefaultNameWarning(NameWarning): """Raised if reference to value not in default namespace""" def __str__(self): return '"{}" is not in the default namespace'.format(self.name) class NamespaceIdentifierWarning(NameWarning): """The base class for warnings related to namespace:name identifiers""" def __init__(self, line_number, line, position, namespace, name): """ :param int line_number: The line number of the line that caused the exception :param str line: The line that caused the exception :param int position: The line's position of the exception :param str namespace: The namespace of the identifier :param str name: The name of the identifier """ super(NamespaceIdentifierWarning, self).__init__(line_number, line, position, name, namespace) self.namespace = namespace class UndefinedNamespaceWarning(NamespaceIdentifierWarning): """Raised if reference made to undefined namespace""" def __str__(self): return '"{}" is not a defined namespace'.format(self.namespace) class MissingNamespaceNameWarning(NamespaceIdentifierWarning): """Raised if reference to value not in namespace""" def __str__(self): return '"{}" is not in the {} namespace'.format(self.name, self.namespace) class MissingNamespaceRegexWarning(NamespaceIdentifierWarning): """Raised if reference not matching regex""" def __str__(self): return '''"{}" doesn't match the regex for {} namespace'''.format(self.name, self.namespace) class AnnotationWarning(PyBelParserWarning): """Base exception for annotation warnings""" def __init__(self, line_number, line, position, annotation, *args): """Build an AnnotationWarning. :param int line_number: The line number on which the warning occurred :param str line: The line on which the warning occurred :param int position: The position in the line that caused the warning :param str annotation: The annotation name that caused the warning """ super(AnnotationWarning, self).__init__(line_number, line, position, annotation, *args) self.annotation = annotation class UndefinedAnnotationWarning(AnnotationWarning): """Raised when an undefined annotation is used""" def __str__(self): return '''"{}" is not defined'''.format(self.annotation) class MissingAnnotationKeyWarning(AnnotationWarning): """Raised when trying to unset an annotation that is not set""" def __str__(self): return '''"{}" is not set, so it can't be unset'''.format(self.annotation) class AnnotationIdentifierWarning(AnnotationWarning): """Base exception for annotation:value pairs""" def __init__(self, line_number, line, position, annotation, value): super(AnnotationIdentifierWarning, self).__init__(line_number, line, position, annotation, value) self.value = value class IllegalAnnotationValueWarning(AnnotationIdentifierWarning): """Raised when an annotation has a value that does not belong to the original set of valid annotation values.""" def __str__(self): return '"{}" is not defined in the {} annotation'.format(self.value, self.annotation) class MissingAnnotationRegexWarning(AnnotationIdentifierWarning): """Raised if annotation doesn't match regex""" def __str__(self): return '''"{}" doesn't match the regex for {} annotation'''.format(self.value, self.annotation) # Provenance Warnings class VersionFormatWarning(PyBelParserWarning): """Raised if the version string doesn't adhere to semantic versioning or ``YYYYMMDD`` format""" def __init__(self, line_number, line, position, version_string): super(VersionFormatWarning, self).__init__(line_number, line, position, version_string) self.version_string = version_string def __str__(self): return ( 'Version string "{}" neither is a date like YYYYMMDD nor adheres to semantic versioning.' ' See http://semver.org/'.format(self.version_string) ) class MetadataException(PyBELWarning): """Base exception for issues with document metadata""" def __init__(self, line_number, line, *args): super(MetadataException, self).__init__(line_number, line, *args) self.line = line self.line_number = line_number def __str__(self): return '[line:{}] Invalid metadata - "{}"'.format(self.line_number, self.line) class MalformedMetadataException(MetadataException): """Raised when an invalid metadata line is encountered""" class InvalidMetadataException(PyBelParserWarning): """Raised when an incorrect document metadata key is used. Valid document metadata keys are: - ``Authors`` - ``ContactInfo`` - ``Copyright`` - ``Description`` - ``Disclaimer`` - ``Licenses`` - ``Name`` - ``Version`` .. seealso:: BEL specification on the `properties section `_ """ def __init__(self, line_number, line, position, key, value): super(InvalidMetadataException, self).__init__(line_number, line, position, key, value) self.key = key self.value = value def __str__(self): return 'Invalid document metadata key: {}'.format(self.key) class MissingMetadataException(PyBELWarning): """Raised when a BEL Script is missing critical metadata.""" def __init__(self, key): super(MissingMetadataException, self).__init__(key) self.key = key def __str__(self): return 'Missing required document metadata: {}'.format(self.key) class InvalidCitationLengthException(PyBelParserWarning): """Base exception raised when the format for a citation is wrong.""" class CitationTooShortException(InvalidCitationLengthException): """Raised when a citation does not have the minimum of {type, name, reference}.""" def __str__(self): return "[pos:{}] Citation is missing required fields: {}".format(self.position, self.line) class CitationTooLongException(InvalidCitationLengthException): """Raised when a citation has more than the allowed entries, {type, name, reference, date, authors, comments}.""" def __str__(self): return "[pos:{}] Citation contains too many entries: {}".format(self.position, self.line) class MissingCitationException(PyBelParserWarning): """Raised when trying to parse a BEL statement, but no citation is currently set. This might be due to a previous error in the formatting of a citation. Though it's not a best practice, some BEL curators set other annotations before the citation. If this is the case in your BEL document, and you're absolutly sure that all UNSET statements are correctly written, you can use ``citation_clearing=True`` as a keyword argument in any of the IO functions in :func:`pybel.from_lines`, :func:`pybel.from_url`, or :func:`pybel.from_path`. """ def __str__(self): return "Missing citation; can't add: {}".format(self.line) class MissingSupportWarning(PyBelParserWarning): """Raised when trying to parse a BEL statement, but no evidence is currently set. All BEL statements must be qualified with evidence. If your data is serialized from a database and provenance information is not readily accessible, consider referencing the publication for the database, or a url pointing to the data from either a programmatically or human-readable endpoint. """ def __str__(self): return "Missing evidence; can't add: {}".format(self.line) class MissingAnnotationWarning(PyBelParserWarning): """Raised when trying to parse a BEL statement and a required annotation is not present.""" def __init__(self, line_number, line, position, required_annotations): super(MissingAnnotationWarning, self).__init__(line_number, line, position, required_annotations) self.required_annotations = required_annotations def __str__(self): return 'Missing annotations: {}'.format(', '.join(sorted(self.required_annotations))) class InvalidCitationType(PyBelParserWarning): """Raise when a citation is set with an incorrect type. Valid citation types include: - ``Book`` - ``PubMed`` - ``Journal`` - ``Online Resource`` - ``URL`` - ``DOI`` - ``Other`` .. seealso:: OpenBEL wiki on `citations `_ """ def __init__(self, line_number, line, position, citation_type): super(InvalidCitationType, self).__init__(line_number, line, position, citation_type) self.citation_type = citation_type def __str__(self): return '[pos:{}] "{}" is not a valid citation type'.format(self.position, self.citation_type) class InvalidPubMedIdentifierWarning(PyBelParserWarning): """Raised when a citation is set whose type is ``PubMed`` but whose database identifier is not a valid integer.""" def __init__(self, line_number, line, position, reference): super(InvalidPubMedIdentifierWarning, self).__init__(line_number, line, position, reference) self.reference = reference def __str__(self): return '[pos:{}] "{}" is not a valid PubMed identifier'.format(self.position, self.reference) # BEL Syntax Warnings class MalformedTranslocationWarning(PyBelParserWarning): """Raised when there is a translocation statement without location information.""" def __init__(self, line_number, line, position, tokens): super(MalformedTranslocationWarning, self).__init__(line_number, line, position, tokens) self.tokens = tokens def __str__(self): return '[pos:{}] Unqualified translocation: {} {}'.format(self.position, self.line, self.tokens) class PlaceholderAminoAcidWarning(PyBelParserWarning): """Raised when an invalid amino acid code is given. One example might be the usage of X, which is a colloquial signifier for a truncation in a given position. Text mining efforts for knowledge extraction make this mistake often. X might also signify a placeholder amino acid. """ def __init__(self, line_number, line, position, code): super(PlaceholderAminoAcidWarning, self).__init__(line_number, line, position, code) self.code = code def __str__(self): return '[pos:{}] Placeholder amino acid found: {}'.format(self.position, self.code) class NestedRelationWarning(PyBelParserWarning): """Raised when encountering a nested statement. See our the docs for an explanation of why we explicitly do not support nested statements. """ def __str__(self): return 'Nesting is not supported. Split this statement: {}'.format(self.line) class LexicographyWarning(PyBELWarning): """Raised when encountering improper capitalization of namespace/annotation names.""" # Semantic Warnings class InvalidFunctionSemantic(PyBelParserWarning): """Raised when an invalid function is used for a given node. For example, an HGNC symbol for a protein-coding gene YFG cannot be referenced as an miRNA with ``m(HGNC:YFG)`` """ def __init__(self, line_number, line, position, function, namespace, name, allowed_functions): super(InvalidFunctionSemantic, self).__init__(line_number, line, position, function, namespace, name, allowed_functions) self.function = function self.namespace = namespace self.name = name self.allowed_functions = allowed_functions def __str__(self): return "{} {}:{} should be encoded as one of: {}".format( self.function, self.namespace, self.name, ', '.join(self.allowed_functions) ) class RelabelWarning(PyBelParserWarning): """Raised when a node is relabeled""" def __init__(self, line_number, line, position, node, old_label, new_label): super(RelabelWarning, self).__init__(line_number, line, position, node, old_label, new_label) self.node = node self.old_label = old_label self.new_label = new_label def __str__(self): return 'Tried to relabel {} from {} to {}'.format(self.node, self.old_label, self.new_label) pybel-0.12.1/src/pybel/parser/modifiers/000077500000000000000000000000001334645200200200645ustar00rootroot00000000000000pybel-0.12.1/src/pybel/parser/modifiers/__init__.py000066400000000000000000000011001334645200200221650ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Parsers for modifications to abundances.""" from .fragment import get_fragment_language from .fusion import get_fusion_language, get_legacy_fusion_langauge from .gene_modification import get_gene_modification_language from .gene_substitution import get_gene_substitution_language from .location import get_location_language from .protein_modification import get_protein_modification_language from .protein_substitution import get_protein_substitution_language from .truncation import get_truncation_language from .variant import get_hgvs_language pybel-0.12.1/src/pybel/parser/modifiers/constants.py000066400000000000000000000016161334645200200224560ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Contains constants""" from pyparsing import Keyword, MatchFirst, oneOf from ..exc import PlaceholderAminoAcidWarning from ... import language aa_single = oneOf(list(language.amino_acid_dict.keys())) aa_single.setParseAction(lambda s, l, t: [language.amino_acid_dict[t[0]]]) aa_triple = oneOf(list(language.amino_acid_dict.values())) #: In biological literature, the X is used to denote a truncation. Text mining efforts often encode X as an amino #: acid, for which we will throw an error using :func:`handle_aa_placeholder` aa_placeholder = Keyword('X') def handle_aa_placeholder(line, position, tokens): """Raises an exception when encountering a placeholder amino acid, ``X``""" raise PlaceholderAminoAcidWarning(-1, line, position, tokens[0]) aa_placeholder.setParseAction(handle_aa_placeholder) amino_acid = MatchFirst([aa_triple, aa_single, aa_placeholder]) pybel-0.12.1/src/pybel/parser/modifiers/fragment.py000066400000000000000000000052431334645200200222450ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Fragments ~~~~~~~~~ The addition of a fragment results in an entry called :data:`pybel.constants.VARIANTS` in the data dictionary associated with a given node. This entry is a list with dictionaries describing each of the variants. All variants have the entry :data:`pybel.constants.KIND` to identify whether it is a PTM, gene modification, fragment, or HGVS variant. The :data:`pybel.constants.KIND` value for a fragment is :data:`pybel.constants.FRAGMENT`. Each fragment contains an identifier, which is a dictionary with the namespace and name, and can optionally include the position ('pos') and/or amino acid code ('code'). For example, the node :code:`p(HGNC:GSK3B, frag(45_129))` is represented with the following: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B', VARIANTS: [ { KIND: FRAGMENT, FRAGMENT_START: 45, FRAGMENT_STOP: 129 } ] } Additionally, nodes can have an asterick (*) or question mark (?) representing unbound or unknown fragments, respectively. A fragment may also be unknown, such as in the node :code:`p(HGNC:GSK3B, frag(?))`. This is represented with the key :data:`pybel.constants.FRAGMENT_MISSING` and the value of '?' like: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B', VARIANTS: [ { KIND: FRAGMENT, FRAGMENT_MISSING: '?', } ] } .. seealso:: - BEL 2.0 specification on `proteolytic fragments (2.2.3) `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_fragment_language` """ from pyparsing import And, Keyword, Optional, Suppress, pyparsing_common as ppc from ..utils import WCW, nest, one_of_tags, quote from ...constants import FRAGMENT, FRAGMENT_DESCRIPTION, FRAGMENT_MISSING, FRAGMENT_START, FRAGMENT_STOP, KIND __all__ = [ 'get_fragment_language', ] fragment_tag = one_of_tags(tags=['frag', 'fragment'], canonical_tag=FRAGMENT, name=KIND) fragment_range = (ppc.integer | '?')(FRAGMENT_START) + '_' + (ppc.integer | '?' | '*')(FRAGMENT_STOP) missing_fragment = Keyword('?')(FRAGMENT_MISSING) def get_fragment_language(): _fragment_value_inner = fragment_range | missing_fragment(FRAGMENT_MISSING) _fragment_value = ( _fragment_value_inner | And([Suppress('"'), _fragment_value_inner, Suppress('"')]) ) language = fragment_tag + nest( _fragment_value + Optional(WCW + quote(FRAGMENT_DESCRIPTION))) return language pybel-0.12.1/src/pybel/parser/modifiers/fusion.py000066400000000000000000000073521334645200200217500ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Fusions ~~~~~~~ Gene, RNA, protein, and miRNA fusions are all represented with the same underlying data structure. Below it is shown with uppercase letters referring to constants from :code:`pybel.constants` and. For example, :code:`g(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626))` is represented as: .. code:: { FUNCTION: GENE, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'BCR'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'JAK2'}, RANGE_5P: { FUSION_REFERENCE: 'c', FUSION_START: '?', FUSION_STOP: 1875 }, RANGE_3P: { FUSION_REFERENCE: 'c', FUSION_START: 2626, FUSION_STOP: '?' } } } .. seealso:: - BEL 2.0 specification on `fusions (2.6.1) `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_fusion_language` - PyBEL module :py:class:`pybel.parser.modifiers.get_legacy_fusion_language` """ from pyparsing import Group, Keyword, Optional, Suppress, oneOf, pyparsing_common, pyparsing_common as ppc, replaceWith from ..utils import WCW, nest from ...constants import ( FUSION, FUSION_MISSING, FUSION_REFERENCE, FUSION_START, FUSION_STOP, PARTNER_3P, PARTNER_5P, RANGE_3P, RANGE_5P, ) __all__ = [ 'fusion_tags', 'get_fusion_language', 'get_legacy_fusion_langauge', ] fusion_tags = oneOf(['fus', 'fusion']).setParseAction(replaceWith(FUSION)) reference_seq = oneOf(['r', 'p', 'c']) coordinate = pyparsing_common.integer | '?' missing = Keyword('?') range_coordinate_unquoted = ( missing(FUSION_MISSING) | ( reference_seq(FUSION_REFERENCE) + Suppress('.') + coordinate(FUSION_START) + Suppress('_') + coordinate(FUSION_STOP) ) ) def get_fusion_language(identifier, permissive=True): range_coordinate = Suppress('"') + range_coordinate_unquoted + Suppress('"') if permissive: # permissive to wrong quoting range_coordinate = range_coordinate | range_coordinate_unquoted return fusion_tags + nest( Group(identifier)(PARTNER_5P), Group(range_coordinate)(RANGE_5P), Group(identifier)(PARTNER_3P), Group(range_coordinate)(RANGE_3P), ) def get_legacy_fusion_langauge(identifier, reference): break_start = (ppc.integer | '?').setParseAction(fusion_break_handler_wrapper(reference, start=True)) break_end = (ppc.integer | '?').setParseAction(fusion_break_handler_wrapper(reference, start=False)) res = ( identifier(PARTNER_5P) + WCW + fusion_tags + nest( identifier(PARTNER_3P) + Optional( WCW + Group(break_start)(RANGE_5P) + WCW + Group(break_end)(RANGE_3P) ) ) ) res.setParseAction(fusion_legacy_handler) return res def fusion_legacy_handler(line, position, tokens): if RANGE_5P not in tokens: tokens[RANGE_5P] = {FUSION_MISSING: '?'} if RANGE_3P not in tokens: tokens[RANGE_3P] = {FUSION_MISSING: '?'} return tokens def fusion_break_handler_wrapper(reference, start): def fusion_break_handler(line, position, tokens): if tokens[0] == '?': tokens[FUSION_MISSING] = '?' return tokens else: # The break point is specified as an integer tokens[FUSION_REFERENCE] = reference tokens[FUSION_START if start else FUSION_STOP] = '?' tokens[FUSION_STOP if start else FUSION_START] = int(tokens[0]) return tokens return fusion_break_handler pybel-0.12.1/src/pybel/parser/modifiers/gene_modification.py000066400000000000000000000040021334645200200240750ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Gene Modification ~~~~~~~~~~~~~~~~~ PyBEL introduces the gene modification tag, gmod(), to allow for the encoding of epigenetic modifications. Its syntax follows the same style s the pmod() tags for proteins, and can include the following values: - M - Me - methylation - A - Ac - acetylation For example, the node :code:`g(HGNC:GSK3B, gmod(M))` is represented with the following: .. code:: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'GSK3B', VARIANTS: [ { KIND: GMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'Me' } } ] } The addition of this function does not preclude the use of all other standard functions in BEL; however, other compilers probably won't support these standards. If you agree that this is useful, please contribute to discussion in the OpenBEL community. .. seealso:: - PyBEL module :py:func:`pybel.parser.modifiers.get_gene_modification_language` """ from pyparsing import Group, MatchFirst, oneOf from ..utils import nest, one_of_tags from ... import language from ...constants import BEL_DEFAULT_NAMESPACE, GMOD, IDENTIFIER, KIND, NAME, NAMESPACE __all__ = [ 'get_gene_modification_language', ] def _handle_gmod_default(line, position, tokens): tokens[NAMESPACE] = BEL_DEFAULT_NAMESPACE tokens[NAME] = language.gmod_namespace[tokens[0]] return tokens gmod_tag = one_of_tags(tags=['gmod', 'geneModification'], canonical_tag=GMOD, name=KIND) gmod_default_ns = oneOf(list(language.gmod_namespace.keys())).setParseAction(_handle_gmod_default) def get_gene_modification_language(identifier_qualified): """ :param pyparsing.ParseElement identifier_qualified: :rtype: pyparsing.ParseElement """ gmod_identifier = MatchFirst([ Group(identifier_qualified), Group(gmod_default_ns), ]) return gmod_tag + nest( gmod_identifier(IDENTIFIER) ) pybel-0.12.1/src/pybel/parser/modifiers/gene_substitution.py000066400000000000000000000040501334645200200242070ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Gene Substitution ~~~~~~~~~~~~~~~~~ Gene substitutions are legacy statements defined in BEL 1.0. BEL 2.0 recommends using HGVS strings. Luckily, the information contained in a BEL 1.0 encoding, such as :code:`g(HGNC:APP,sub(G,275341,C))` can be automatically translated to the appropriate HGVS :code:`g(HGNC:APP, var(c.275341G>C))`, assuming that all substitutions are using the reference coding gene sequence for numbering and not the genomic reference. The previous statements both produce the underlying data: .. code:: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'APP', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'c.275341G>C' } ] } .. seealso:: - BEL 2.0 specification on `gene substitutions `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_gene_substitution_language` """ import logging from pyparsing import oneOf, pyparsing_common as ppc from ..utils import nest, one_of_tags from ... import language from ...constants import GSUB_POSITION, GSUB_REFERENCE, GSUB_VARIANT, HGVS, IDENTIFIER, KIND __all__ = [ 'get_gene_substitution_language', ] log = logging.getLogger(__name__) dna_nucleotide = oneOf(list(language.dna_nucleotide_labels.keys())) gsub_tag = one_of_tags(tags=['sub', 'substitution'], canonical_tag=HGVS, name=KIND) def _handle_gsub(line, position, tokens): upgraded = 'c.{}{}>{}'.format(tokens[GSUB_POSITION], tokens[GSUB_REFERENCE], tokens[GSUB_VARIANT]) log.debug('legacy sub() %s upgraded to %s', line, upgraded) tokens[IDENTIFIER] = upgraded del tokens[GSUB_POSITION] del tokens[GSUB_REFERENCE] del tokens[GSUB_VARIANT] return tokens def get_gene_substitution_language(): language = gsub_tag + nest( dna_nucleotide(GSUB_REFERENCE), ppc.integer(GSUB_POSITION), dna_nucleotide(GSUB_VARIANT), ) language.setParseAction(_handle_gsub) return language pybel-0.12.1/src/pybel/parser/modifiers/location.py000066400000000000000000000036021334645200200222470ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Location data also is added into the information in the edge for the node (subject or object) for which it was annotated. :code:`p(HGNC:GSK3B, pmod(P, S, 9), loc(GOCC:lysozome)) pos act(p(HGNC:GSK3B), ma(kin))` becomes: .. code:: { SUBJECT: { LOCATION: { NAMESPACE: 'GOCC', NAME: 'lysozome' } }, RELATION: POSITIVE_CORRELATION, OBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAMESPACE: BEL_DEFAULT_NAMESPACE NAME: 'kin', } }, EVIDENCE: '...', CITATION: { ... } } The addition of the :code:`location()` element in BEL 2.0 allows for the unambiguous expression of the differences between the process of hypothetical :code:`HGNC:A` moving from one place to another and the existence of hypothetical :code:`HGNC:A` in a specific location having different effects. In BEL 1.0, this action had its own node, but this introduced unnecessary complexity to the network and made querying more difficult. This calls for thoughtful consideration of the following two statements: - :code:`tloc(p(HGNC:A), fromLoc(GOCC:intracellular), toLoc(GOCC:"cell membrane")) -> p(HGNC:B)` - :code:`p(HGNC:A, location(GOCC:"cell membrane")) -> p(HGNC:B)` .. seealso:: - BEL 2.0 specification on `cellular location (2.2.4) `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_location_language` """ from pyparsing import Group, Suppress, oneOf from ..utils import nest from ...constants import LOCATION __all__ = [ 'get_location_language', ] location_tag = Suppress(oneOf(['loc', 'location'])) def get_location_language(identifier): return Group( location_tag + nest(identifier) )(LOCATION) pybel-0.12.1/src/pybel/parser/modifiers/protein_modification.py000066400000000000000000000073731334645200200246550ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Protein Modification ~~~~~~~~~~~~~~~~~~~~ The addition of a post-translational modification (PTM) tag results in an entry called 'variants' in the data dictionary associated with a given node. This entry is a list with dictionaries describing each of the variants. All variants have the entry 'kind' to identify whether it is a PTM, gene modification, fragment, or HGVS variant. The 'kind' value for PTM is 'pmod'. Each PMOD contains an identifier, which is a dictionary with the namespace and name, and can optionally include the position ('pos') and/or amino acid code ('code'). For example, the node :code:`p(HGNC:GSK3B, pmod(P, S, 9))` is represented with the following: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B', VARIANTS: [ { KIND: PMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE NAME: 'Ph', }, PMOD_CODE: 'Ser', PMOD_POSITION: 9 } ] } As an additional example, in :code:`p(HGNC:MAPK1, pmod(Ph, Thr, 202), pmod(Ph, Tyr, 204))`, MAPK is phosphorylated twice to become active. This results in the following: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'MAPK1', VARIANTS: [ { KIND: PMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE NAME: 'Ph', }, PMOD_CODE: 'Thr', PMOD_POSITION: 202 }, { KIND: PMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE NAME: 'Ph', }, PMOD_CODE: 'Tyr', PMOD_POSITION: 204 } ] } .. seealso:: - BEL 2.0 specification on `protein modifications `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_protein_modification_language` """ import logging from pyparsing import Group, MatchFirst, Optional, oneOf, pyparsing_common as ppc from .constants import amino_acid from ..utils import WCW, nest, one_of_tags from ...constants import BEL_DEFAULT_NAMESPACE, IDENTIFIER, KIND, NAME, NAMESPACE, PMOD, PMOD_CODE, PMOD_POSITION from ...language import pmod_legacy_labels, pmod_namespace __all__ = [ 'get_protein_modification_language', ] log = logging.getLogger(__name__) def _handle_pmod_default_ns(line, position, tokens): tokens[NAMESPACE] = BEL_DEFAULT_NAMESPACE tokens['name'] = pmod_namespace[tokens[0]] return tokens def _handle_pmod_legacy_ns(line, position, tokens): upgraded = pmod_legacy_labels[tokens[0]] log.log(5, 'legacy pmod() value %s upgraded to %s', line, upgraded) tokens[NAMESPACE] = BEL_DEFAULT_NAMESPACE tokens[NAME] = upgraded return tokens pmod_tag = one_of_tags(tags=['pmod', 'proteinModification'], canonical_tag=PMOD, name=KIND) pmod_default_ns = oneOf(list(pmod_namespace)).setParseAction(_handle_pmod_default_ns) pmod_legacy_ns = oneOf(list(pmod_legacy_labels)).setParseAction(_handle_pmod_legacy_ns) def get_protein_modification_language(identifier_qualified): """ :param pyparsing.ParseElement identifier_qualified: :rtype: pyparsing.ParseElement """ pmod_identifier = MatchFirst([ identifier_qualified, pmod_default_ns, pmod_legacy_ns ]) return pmod_tag + nest( Group(pmod_identifier)(IDENTIFIER) + Optional( WCW + amino_acid(PMOD_CODE) + Optional( WCW + ppc.integer(PMOD_POSITION) ) ) ) pybel-0.12.1/src/pybel/parser/modifiers/protein_substitution.py000066400000000000000000000040051334645200200247510ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Protein Substitution ~~~~~~~~~~~~~~~~~~~~ Protein substitutions are legacy statements defined in BEL 1.0. BEL 2.0 recommends using HGVS strings. Luckily, the information contained in a BEL 1.0 encoding, such as :code:`p(HGNC:APP,sub(R,275,H))` can be automatically translated to the appropriate HGVS :code:`p(HGNC:APP, var(p.Arg275His))`, assuming that all substitutions are using the reference protein sequence for numbering and not the genomic reference. The previous statements both produce the underlying data: .. code:: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'APP', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'p.Arg275His' } ] } .. seealso:: - BEL 2.0 specification on `protein substitutions `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_protein_substitution_language` """ import logging from pyparsing import pyparsing_common as ppc from .constants import amino_acid from ..utils import nest, one_of_tags from ...constants import ( HGVS, IDENTIFIER, KIND, PSUB_POSITION, PSUB_REFERENCE, PSUB_VARIANT, ) __all__ = [ 'get_protein_substitution_language', ] log = logging.getLogger(__name__) psub_tag = one_of_tags(tags=['sub', 'substitution'], canonical_tag=HGVS, name=KIND) def _handle_psub(line, position, tokens): upgraded = 'p.{}{}{}'.format(tokens[PSUB_REFERENCE], tokens[PSUB_POSITION], tokens[PSUB_VARIANT]) log.log(5, 'sub() in p() is deprecated: %s. Upgraded to %s', line, upgraded) tokens[IDENTIFIER] = upgraded del tokens[PSUB_REFERENCE] del tokens[PSUB_POSITION] del tokens[PSUB_VARIANT] return tokens def get_protein_substitution_language(): language = psub_tag + nest( amino_acid(PSUB_REFERENCE), ppc.integer(PSUB_POSITION), amino_acid(PSUB_VARIANT), ) language.setParseAction(_handle_psub) return language pybel-0.12.1/src/pybel/parser/modifiers/truncation.py000066400000000000000000000042741334645200200226330ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Truncations ~~~~~~~~~~~ Truncations in the legacy BEL 1.0 specification are automatically translated to BEL 2.0 with HGVS nomenclature. :code:`p(HGNC:AKT1, trunc(40))` becomes :code:`p(HGNC:AKT1, var(p.40*))` and is represented with the following dictionary: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'p.40*' } ] } Unfortunately, the HGVS nomenclature requires the encoding of the terminal amino acid which is exchanged for a stop codon, and this information is not required by BEL 1.0. For this example, the proper encoding of the truncation at position also includes the information that the 40th amino acid in the AKT1 is Cys. Its BEL encoding should be :code:`p(HGNC:AKT1, var(p.Cys40*))`. Temporary support has been added to compile these statements, but it's recommended they are upgraded by reexamining the supporting text, or looking up the amino acid sequence. .. seealso:: - BEL 2.0 specification on `truncations `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_truncation_language` """ import logging from pyparsing import pyparsing_common as ppc from ..utils import nest, one_of_tags from ...constants import HGVS, IDENTIFIER, KIND, TRUNCATION_POSITION __all__ = [ 'get_truncation_language', ] log = logging.getLogger(__name__) truncation_tag = one_of_tags(tags=['trunc', 'truncation'], canonical_tag=HGVS, name=KIND) def _handle_trunc_legacy(line, position, tokens): # FIXME this isn't correct HGVS nomenclature, but truncation isn't forward compatible without more information upgraded = 'p.{}*'.format(tokens[TRUNCATION_POSITION]) log.warning('trunc() is deprecated. Re-encode with reference terminal amino acid in HGVS: %s', line) tokens[IDENTIFIER] = upgraded del tokens[TRUNCATION_POSITION] return tokens def get_truncation_language(): language = truncation_tag + nest(ppc.integer(TRUNCATION_POSITION)) language.setParseAction(_handle_trunc_legacy) return language pybel-0.12.1/src/pybel/parser/modifiers/variant.py000066400000000000000000000022201334645200200220760ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ HGVS Variants ~~~~~~~~~~~~~ For example, the node :code:`p(HGNC:GSK3B, var(p.Gly123Arg))` is represented with the following: .. code:: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'p.Gly123Arg' } ] } .. seealso:: - BEL 2.0 specification on `variants `_ - HVGS `conventions `_ - PyBEL module :py:class:`pybel.parser.modifiers.get_hgvs_language` """ from pyparsing import Word, alphanums from ..utils import nest, one_of_tags, quote from ...constants import HGVS, IDENTIFIER, KIND __all__ = [ 'get_hgvs_language', ] variant_tags = one_of_tags(tags=['var', 'variant'], canonical_tag=HGVS, name=KIND) variant_characters = Word(alphanums + '._*=?>') def get_hgvs_language(): """ :rtype: pyparsing.ParseElement """ hgvs = (variant_characters | quote)(IDENTIFIER) language = variant_tags + nest(hgvs) return language pybel-0.12.1/src/pybel/parser/parse_bel.py000066400000000000000000001173611334645200200204220ustar00rootroot00000000000000# -*- coding: utf-8 -*- """A parser for BEL. This module handles parsing BEL relations and validation of semantics. """ import itertools as itt import logging from pyparsing import And, Group, Keyword, MatchFirst, Optional, StringEnd, Suppress, delimitedList, oneOf, replaceWith from .baseparser import BaseParser from .exc import ( InvalidFunctionSemantic, MalformedTranslocationWarning, MissingAnnotationWarning, MissingCitationException, MissingSupportWarning, NestedRelationWarning, RelabelWarning, ) from .modifiers import ( get_fragment_language, get_fusion_language, get_gene_modification_language, get_gene_substitution_language, get_hgvs_language, get_legacy_fusion_langauge, get_location_language, get_protein_modification_language, get_protein_substitution_language, get_truncation_language, ) from .parse_control import ControlParser from .parse_identifier import IdentifierParser from .utils import WCW, nest, one_of_tags, quote, triple from .. import language from ..constants import ( ABUNDANCE, ACTIVITY, ASSOCIATION, BEL_DEFAULT_NAMESPACE, BIOPROCESS, CAUSES_NO_CHANGE, CELL_SECRETION, CELL_SURFACE_EXPRESSION, COMPLEX, COMPOSITE, DECREASES, DEGRADATION, DIRECTLY_DECREASES, DIRECTLY_INCREASES, DIRTY, EFFECT, EQUIVALENT_TO, FROM_LOC, FUNCTION, FUSION, GENE, HAS_COMPONENT, HAS_MEMBER, IDENTIFIER, INCREASES, IS_A, LINE, LOCATION, MEMBERS, MIRNA, MODIFIER, NAME, NAMESPACE, NEGATIVE_CORRELATION, OBJECT, PART_OF, PATHOLOGY, POSITIVE_CORRELATION, PRODUCTS, PROTEIN, REACTANTS, REACTION, REGULATES, RELATION, RNA, SUBJECT, TARGET, TO_LOC, TRANSCRIBED_TO, TRANSLATED_TO, TRANSLOCATION, TWO_WAY_RELATIONS, VARIANTS, belns_encodings, ) from ..dsl import cell_surface_expression, secretion from ..tokens import parse_result_to_dsl __all__ = [ 'BELParser', 'modifier_po_to_dict', ] log = logging.getLogger('pybel.parser') ########################### # 2.1 Abundance Functions # ########################### #: 2.1.1 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xabundancea> general_abundance_tags = one_of_tags(['a', 'abundance'], ABUNDANCE, FUNCTION) #: 2.1.2 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcomplexA complex_tag = one_of_tags(['complex', 'complexAbundance'], COMPLEX, FUNCTION) #: 2.1.3 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XcompositeA composite_abundance_tag = one_of_tags(['composite', 'compositeAbundance'], COMPOSITE, FUNCTION) #: 2.1.4 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XgeneA gene_tag = one_of_tags(['g', 'geneAbundance'], GENE, FUNCTION) #: 2.1.5 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmicroRNAA mirna_tag = one_of_tags(['m', 'microRNAAbundance'], MIRNA, FUNCTION) #: 2.1.6 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XproteinA protein_tag = one_of_tags(['p', 'proteinAbundance'], PROTEIN, FUNCTION) #: 2.1.7 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XrnaA rna_tag = one_of_tags(['r', 'rnaAbundance'], RNA, FUNCTION) ###################### # Modifier Functions # ###################### # `2.2.1 `_ # See below (needs identifier) #: `2.2.2 `_ variant = get_hgvs_language() #: `2.2.3 `_ fragment = get_fragment_language() # `2.2.4 `_ # See below (needs identifier) #: DEPRECATED #: psub = get_protein_substitution_language() #: DEPRECATED #: http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_sequence_variations> gsub = get_gene_substitution_language() #: DEPRECATED #: http://openbel.org/language/version_1.0/bel_specification_version_1.0.html#_truncated_proteins> trunc = get_truncation_language() ############################### # 2.3 & 2.4 Process Functions # ############################### #: 2.3.1 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_biologicalprocess_bp biological_process_tag = one_of_tags(['bp', 'biologicalProcess'], BIOPROCESS, FUNCTION) #: 2.3.2 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_pathology_path pathology_tag = one_of_tags(['o', 'path', 'pathology'], PATHOLOGY, FUNCTION) #: 2.3.3 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#Xactivity activity_tag = one_of_tags(['act', 'activity'], ACTIVITY, MODIFIER) #: 2.4.1 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#XmolecularA molecular_activity_tags = Suppress(oneOf(['ma', 'molecularActivity'])) ################################ # 2.5 Transformation Functions # ################################ #: 2.5.1a http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_translocation_tloc translocation_tag = one_of_tags(['translocation', 'tloc'], TRANSLOCATION, MODIFIER) #: 2.5.1b http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellsecretion_sec cell_secretion_tag = one_of_tags(['sec', 'cellSecretion'], CELL_SECRETION, MODIFIER) #: 2.5.1c http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_cellsurfaceexpression_surf cell_surface_expression_tag = one_of_tags(['surf', 'cellSurfaceExpression'], CELL_SURFACE_EXPRESSION, MODIFIER) #: 2.5.2 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_degradation_deg degradation_tags = one_of_tags(['deg', 'degradation'], DEGRADATION, MODIFIER) #: 2.5.3 http://openbel.org/language/version_2.0/bel_specification_version_2.0.html#_reaction_rxn reaction_tags = one_of_tags(['reaction', 'rxn'], REACTION, FUNCTION) ##################### # BEL Relationships # ##################### #: `3.1.1 `_ increases_tag = oneOf(['->', '→', 'increases']).setParseAction(replaceWith(INCREASES)) #: `3.1.2 `_ directly_increases_tag = one_of_tags(['=>', '⇒', 'directlyIncreases'], DIRECTLY_INCREASES) #: `3.1.3 `_ decreases_tag = one_of_tags(['-|', 'decreases'], DECREASES) #: `3.1.4 `_ directly_decreases_tag = one_of_tags(['=|', 'directlyDecreases'], DIRECTLY_DECREASES) #: `3.1.5 `_ rate_limit_tag = Keyword('rateLimitingStepOf') #: `3.1.6 `_ causes_no_change_tag = one_of_tags(['cnc', 'causesNoChange'], CAUSES_NO_CHANGE) #: `3.1.7 `_ regulates_tag = one_of_tags(['reg', 'regulates'], REGULATES) #: `3.2.1 `_ negative_correlation_tag = one_of_tags(['neg', 'negativeCorrelation'], NEGATIVE_CORRELATION) #: `3.2.2 `_ positive_correlation_tag = one_of_tags(['pos', 'positiveCorrelation'], POSITIVE_CORRELATION) #: `3.2.3 `_ association_tag = one_of_tags(['--', 'association'], ASSOCIATION) #: `3.3.1 `_ orthologous_tag = Keyword('orthologous') #: `3.3.2 `_ transcribed_tag = oneOf([':>', 'transcribedTo']).setParseAction(replaceWith(TRANSCRIBED_TO)) #: `3.3.3 `_ translated_tag = oneOf(['>>', 'translatedTo']).setParseAction(replaceWith(TRANSLATED_TO)) #: `3.4.1 `_ has_member_tag = Keyword('hasMember') #: `3.4.2 `_ has_members_tag = Keyword('hasMembers') #: `3.4.3 `_ has_component_tag = Keyword('hasComponent') #: `3.4.4 `_ has_components_tag = Keyword('hasComponents') #: `3.4.5 `_ is_a_tag = Keyword(IS_A) #: `3.4.6 `_ subprocess_of_tag = Keyword('subProcessOf') #: `3.5.1 `_ analogous_tag = Keyword('analogousTo') #: `3.5.2 `_ biomarker_tag = Keyword('biomarkerFor') #: `3.5.3 `_ prognostic_biomarker_tag = Keyword('prognosticBiomarkerFor') biomarker_tags = biomarker_tag | prognostic_biomarker_tag # Computed edges has_variant_tags = Keyword('hasVariant') has_reactant_tags = Keyword('hasReactant') has_product_tags = Keyword('hasProduct') part_of_reaction_tags = has_reactant_tags | has_product_tags #: The ``equivalentTp`` relationship has been proposed for BEL 2.0.0+ equivalent_tag = one_of_tags(['eq', EQUIVALENT_TO], EQUIVALENT_TO) #: The ``partOf`` relationship has been proposed for BEL 2.0.0+ partof_tag = Keyword(PART_OF) class BELParser(BaseParser): """Build a parser backed by a given dictionary of namespaces""" def __init__( self, graph, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, allow_naked_names=False, allow_nested=False, disallow_unqualified_translocations=False, citation_clearing=True, skip_validation=False, autostreamline=True, required_annotations=None ): """Build a BEL parser. :param pybel.BELGraph graph: The BEL Graph to use to store the network :param namespace_dict: A dictionary of {namespace: {name: encoding}}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :type namespace_dict: Optional[dict[str,dict[str,str]]] :param annotation_dict: A dictionary of {annotation: set of values}. Delegated to :class:`pybel.parser.ControlParser` :rype annotation_dict: Optional[dict[str,set[str]]] :param namespace_regex: A dictionary of {namespace: regular expression strings}. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :type namespace_regex: Optional[dict[str,str]] :param annotation_regex: A dictionary of {annotation: regular expression strings}. Delegated to :class:`pybel.parser.ControlParser` :type annotation_regex: Optional[dict[str,str]] :param bool allow_naked_names: If true, turn off naked namespace failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param bool allow_nested: If true, turn off nested statement failures. Delegated to :class:`pybel.parser.parse_identifier.IdentifierParser` :param bool disallow_unqualified_translocations: If true, allow translocations without TO and FROM clauses. :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? Delegated to :class:`pybel.parser.ControlParser` :param bool autostreamline: Should the parser be streamlined on instantiation? :param Optional[list[str]] required_annotations: Optional list of required annotations """ self.graph = graph self.allow_nested = allow_nested self.disallow_unqualified_translocations = disallow_unqualified_translocations if skip_validation: self.control_parser = ControlParser( citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, ) else: self.control_parser = ControlParser( annotation_dict=annotation_dict, annotation_regex=annotation_regex, citation_clearing=citation_clearing, required_annotations=required_annotations, ) self.identifier_parser = IdentifierParser( allow_naked_names=allow_naked_names, namespace_dict=namespace_dict, namespace_regex=namespace_regex, ) identifier = Group(self.identifier_parser.language)(IDENTIFIER) ungrouped_identifier = self.identifier_parser.language # 2.2 Abundance Modifier Functions #: `2.2.1 `_ self.pmod = get_protein_modification_language(self.identifier_parser.identifier_qualified) #: `2.2.4 `_ self.location = get_location_language(self.identifier_parser.language) opt_location = Optional(WCW + self.location) #: PyBEL BEL Specification variant self.gmod = get_gene_modification_language(self.identifier_parser.identifier_qualified) # 2.6 Other Functions #: `2.6.1 `_ self.fusion = get_fusion_language(self.identifier_parser.language) # 2.1 Abundance Functions #: `2.1.1 `_ self.general_abundance = general_abundance_tags + nest(ungrouped_identifier + opt_location) self.gene_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(variant | gsub | self.gmod))(VARIANTS)) self.gene_fusion = Group(self.fusion)(FUSION) self.gene_fusion_legacy = Group(get_legacy_fusion_langauge(identifier, 'c'))(FUSION) #: `2.1.4 `_ self.gene = gene_tag + nest(MatchFirst([ self.gene_fusion, self.gene_fusion_legacy, self.gene_modified ]) + opt_location) self.mirna_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(variant))(VARIANTS)) + opt_location #: `2.1.5 `_ self.mirna = mirna_tag + nest(self.mirna_modified) self.protein_modified = ungrouped_identifier + Optional( WCW + delimitedList(Group(MatchFirst([self.pmod, variant, fragment, psub, trunc])))( VARIANTS)) self.protein_fusion = Group(self.fusion)(FUSION) self.protein_fusion_legacy = Group(get_legacy_fusion_langauge(identifier, 'p'))(FUSION) #: `2.1.6 `_ self.protein = protein_tag + nest(MatchFirst([ self.protein_fusion, self.protein_fusion_legacy, self.protein_modified, ]) + opt_location) self.rna_modified = ungrouped_identifier + Optional(WCW + delimitedList(Group(variant))(VARIANTS)) self.rna_fusion = Group(self.fusion)(FUSION) self.rna_fusion_legacy = Group(get_legacy_fusion_langauge(identifier, 'r'))(FUSION) #: `2.1.7 `_ self.rna = rna_tag + nest(MatchFirst([ self.rna_fusion, self.rna_fusion_legacy, self.rna_modified, ]) + opt_location) self.single_abundance = MatchFirst([ self.general_abundance, self.gene, self.mirna, self.protein, self.rna ]) #: `2.1.2 `_ self.complex_singleton = complex_tag + nest(ungrouped_identifier + opt_location) self.complex_list = complex_tag + nest( delimitedList(Group(self.single_abundance | self.complex_singleton))(MEMBERS) + opt_location) self.complex_abundances = self.complex_list | self.complex_singleton # Definition of all simple abundances that can be used in a composite abundance self.simple_abundance = self.complex_abundances | self.single_abundance self.simple_abundance.setParseAction(self.check_function_semantics) #: `2.1.3 `_ self.composite_abundance = composite_abundance_tag + nest( delimitedList(Group(self.simple_abundance))(MEMBERS) + opt_location ) self.abundance = self.simple_abundance | self.composite_abundance # 2.4 Process Modifier Function # backwards compatibility with BEL v1.0 molecular_activity_default = oneOf(list(language.activity_labels)).setParseAction( handle_molecular_activity_default) #: `2.4.1 `_ self.molecular_activity = molecular_activity_tags + nest( molecular_activity_default | self.identifier_parser.language ) # 2.3 Process Functions #: `2.3.1 `_ self.biological_process = biological_process_tag + nest(ungrouped_identifier) #: `2.3.2 `_ self.pathology = pathology_tag + nest(ungrouped_identifier) self.bp_path = self.biological_process | self.pathology self.bp_path.setParseAction(self.check_function_semantics) self.activity_standard = activity_tag + nest( Group(self.simple_abundance)(TARGET) + Optional(WCW + Group(self.molecular_activity)(EFFECT)) ) activity_legacy_tags = oneOf(language.activities)(MODIFIER) self.activity_legacy = activity_legacy_tags + nest(Group(self.simple_abundance)(TARGET)) self.activity_legacy.setParseAction(handle_activity_legacy) #: `2.3.3 `_ self.activity = self.activity_standard | self.activity_legacy self.process = self.bp_path | self.activity # 2.5 Transformation Functions from_loc = Suppress(FROM_LOC) + nest(identifier(FROM_LOC)) to_loc = Suppress(TO_LOC) + nest(identifier(TO_LOC)) self.cell_secretion = cell_secretion_tag + nest(Group(self.simple_abundance)(TARGET)) self.cell_surface_expression = cell_surface_expression_tag + nest(Group(self.simple_abundance)(TARGET)) self.translocation_standard = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(from_loc + WCW + to_loc)(EFFECT) ) self.translocation_legacy = nest( Group(self.simple_abundance)(TARGET) + WCW + Group(identifier(FROM_LOC) + WCW + identifier(TO_LOC))(EFFECT) ) self.translocation_legacy.addParseAction(handle_legacy_tloc) self.translocation_unqualified = nest(Group(self.simple_abundance)(TARGET)) if self.disallow_unqualified_translocations: self.translocation_unqualified.setParseAction(self.handle_translocation_illegal) #: `2.5.1 `_ self.translocation = translocation_tag + MatchFirst([ self.translocation_unqualified, self.translocation_standard, self.translocation_legacy ]) #: `2.5.2 `_ self.degradation = degradation_tags + nest(Group(self.simple_abundance)(TARGET)) #: `2.5.3 `_ self.reactants = Suppress(REACTANTS) + nest(delimitedList(Group(self.simple_abundance))) self.products = Suppress(PRODUCTS) + nest(delimitedList(Group(self.simple_abundance))) self.reaction = reaction_tags + nest(Group(self.reactants)(REACTANTS), Group(self.products)(PRODUCTS)) self.transformation = MatchFirst([ self.cell_secretion, self.cell_surface_expression, self.translocation, self.degradation, self.reaction ]) # 3 BEL Relationships self.bel_term = MatchFirst([self.transformation, self.process, self.abundance]).streamline() self.bel_to_bel_relations = [ association_tag, increases_tag, decreases_tag, positive_correlation_tag, negative_correlation_tag, causes_no_change_tag, orthologous_tag, is_a_tag, equivalent_tag, partof_tag, directly_increases_tag, directly_decreases_tag, analogous_tag, regulates_tag, ] self.bel_to_bel = triple(self.bel_term, MatchFirst(self.bel_to_bel_relations), self.bel_term) # Mixed Relationships #: `3.1.5 `_ self.rate_limit = triple( MatchFirst([self.biological_process, self.activity, self.transformation]), rate_limit_tag, self.biological_process ) #: `3.4.6 `_ self.subprocess_of = triple( MatchFirst([self.process, self.activity, self.transformation]), subprocess_of_tag, self.process ) #: `3.3.2 `_ self.transcribed = triple(self.gene, transcribed_tag, self.rna) #: `3.3.3 `_ self.translated = triple(self.rna, translated_tag, self.protein) #: `3.4.1 `_ self.has_member = triple(self.abundance, has_member_tag, self.abundance) #: `3.4.2 `_ self.abundance_list = Suppress('list') + nest(delimitedList(Group(self.abundance))) self.has_members = triple(self.abundance, has_members_tag, self.abundance_list) self.has_members.setParseAction(self.handle_has_members) self.has_components = triple(self.abundance, has_components_tag, self.abundance_list) self.has_components.setParseAction(self.handle_has_components) self.has_list = self.has_members | self.has_components # `3.4.3 `_ self.has_component = triple( self.complex_abundances | self.composite_abundance, has_component_tag, self.abundance ) self.biomarker = triple(self.bel_term, biomarker_tags, self.process) self.has_variant_relation = triple(self.abundance, has_variant_tags, self.abundance) self.part_of_reaction = triple(self.reaction, part_of_reaction_tags, self.abundance) self.relation = MatchFirst([ self.bel_to_bel, # self.has_member, # self.has_component, self.subprocess_of, self.rate_limit, self.biomarker, self.transcribed, self.translated, # self.has_variant_relation, # self.part_of_reaction, ]) self.relation.setParseAction(self._handle_relation_harness) self.unqualified_relation = MatchFirst([ self.has_member, self.has_component, self.has_variant_relation, self.part_of_reaction ]) self.unqualified_relation.setParseAction(self.handle_unqualified_relation) #: 3.1 Causal Relationships - nested. Not enabled by default. causal_relation_tags = MatchFirst([ increases_tag, decreases_tag, directly_decreases_tag, directly_increases_tag ]) self.nested_causal_relationship = triple( self.bel_term, causal_relation_tags, nest(triple(self.bel_term, causal_relation_tags, self.bel_term)) ) self.nested_causal_relationship.setParseAction(self.handle_nested_relation) self.label_relationship = And([Group(self.bel_term)(SUBJECT), Suppress('labeled'), quote(OBJECT)]) self.label_relationship.setParseAction(self.handle_label_relation) # has_members is handled differently from all other relations becuase it gets distrinbuted self.relation = MatchFirst([ self.has_list, self.nested_causal_relationship, self.relation, self.unqualified_relation, self.label_relationship, ]) self.singleton_term = (self.bel_term + StringEnd()).setParseAction(self.handle_term) self.statement = self.relation | self.singleton_term self.language = self.control_parser.language | self.statement self.language.setName('BEL') super(BELParser, self).__init__(self.language, streamline=autostreamline) @property def namespace_dict(self): """The dictionary of {namespace: {name: encoding}} stored in the internal identifier parser :rtype: dict[str,dict[str,str]] """ return self.identifier_parser.namespace_dict @property def namespace_regex(self): """The dictionary of {namespace keyword: compiled regular expression} stored the internal identifier parser :rtype: dict[str,re] """ return self.identifier_parser.namespace_regex_compiled @property def annotation_dict(self): """A dictionary of annotations to their set of values :rtype: dict[str,set[str]] """ return self.control_parser.annotation_dict @property def annotation_regex(self): """A dictionary of annotations defined by regular expressions {annotation keyword: string regular expression} :rtype: dict[str,str] """ return self.control_parser.annotation_regex @property def allow_naked_names(self): """Should naked names be parsed, or should errors be thrown? :rtype: bool """ return self.identifier_parser.allow_naked_names def get_annotations(self): """Get current annotations in this parser :rtype: dict """ return self.control_parser.get_annotations() def clear(self): """Clears the graph and all control parser data (current citation, annotations, and statement group)""" self.graph.clear() self.control_parser.clear() def handle_nested_relation(self, line, position, tokens): """Handles nested statements. If :code:`allow_nested` is False, raises a warning. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: NestedRelationWarning """ if not self.allow_nested: raise NestedRelationWarning(self.line_number, line, position) self._handle_relation_harness(line, position, { SUBJECT: tokens[SUBJECT], RELATION: tokens[RELATION], OBJECT: tokens[OBJECT][SUBJECT] }) self._handle_relation_harness(line, position, { SUBJECT: tokens[OBJECT][SUBJECT], RELATION: tokens[OBJECT][RELATION], OBJECT: tokens[OBJECT][OBJECT] }) return tokens def check_function_semantics(self, line, position, tokens): """Raises an exception if the function used on the tokens is wrong :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: InvalidFunctionSemantic """ if self.namespace_dict is None or NAMESPACE not in tokens: return tokens namespace, name = tokens[NAMESPACE], tokens[NAME] if namespace in self.namespace_regex: return tokens if self.allow_naked_names and tokens[NAMESPACE] == DIRTY: # Don't check dirty names in lenient mode return tokens valid_functions = set(itt.chain.from_iterable( belns_encodings[k] for k in self.namespace_dict[namespace][name] )) if tokens[FUNCTION] not in valid_functions: raise InvalidFunctionSemantic(self.line_number, line, position, tokens[FUNCTION], namespace, name, valid_functions) return tokens def handle_term(self, line, position, tokens): """Handle BEL terms (the subject and object of BEL relations). :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ self.ensure_node(tokens) return tokens def _handle_list_helper(self, tokens, relation): """Provide the functionality for :meth:`handle_has_members` and :meth:`handle_has_components`.""" parent_node_dsl = self.ensure_node(tokens[0]) for child_tokens in tokens[2]: child_node_dsl = self.ensure_node(child_tokens) self.graph.add_unqualified_edge(parent_node_dsl, child_node_dsl, relation) return tokens def handle_has_members(self, line, position, tokens): """Handle list relations like ``p(X) hasMembers list(p(Y), p(Z), ...).`` :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ return self._handle_list_helper(tokens, HAS_MEMBER) def handle_has_components(self, line, position, tokens): """Handle list relations like ``p(X) hasComponents list(p(Y), p(Z), ...)``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ return self._handle_list_helper(tokens, HAS_COMPONENT) def _add_qualified_edge_helper(self, u, v, relation, annotations, subject_modifier, object_modifier): """Add a qualified edge from the internal aspects of the parser.""" self.graph.add_qualified_edge( u, v, relation=relation, evidence=self.control_parser.evidence, citation=self.control_parser.citation.copy(), annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **{LINE: self.line_number} ) def _add_qualified_edge(self, u, v, relation, annotations, subject_modifier, object_modifier): """Add an edge, then adds the opposite direction edge if it should.""" self._add_qualified_edge_helper( u, v, relation=relation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) if relation in TWO_WAY_RELATIONS: self._add_qualified_edge_helper( v, u, relation=relation, annotations=annotations, object_modifier=subject_modifier, subject_modifier=object_modifier, ) def _handle_relation(self, tokens): """A policy in which all annotations are stored as sets, including single annotations. :param pyparsing.ParseResult tokens: The tokens from PyParsing """ subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) subject_modifier = modifier_po_to_dict(tokens[SUBJECT]) object_modifier = modifier_po_to_dict(tokens[OBJECT]) annotations = { annotation_name: ( { ae: True for ae in annotation_entry } if isinstance(annotation_entry, set) else { annotation_entry: True } ) for annotation_name, annotation_entry in self.control_parser.annotations.items() } self._add_qualified_edge( subject_node_dsl, object_node_dsl, relation=tokens[RELATION], annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, ) def _handle_relation_harness(self, line, position, tokens): """Handle BEL relations based on the policy specified on instantiation. Note: this can't be changed after instantiation! :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ if not self.control_parser.citation: raise MissingCitationException(self.line_number, line, position) if not self.control_parser.evidence: raise MissingSupportWarning(self.line_number, line, position) missing_required_annotations = self.control_parser.get_missing_required_annotations() if missing_required_annotations: raise MissingAnnotationWarning(self.line_number, line, position, missing_required_annotations) self._handle_relation(tokens) return tokens def handle_unqualified_relation(self, line, position, tokens): """Handle unqualified relations. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ subject_node_dsl = self.ensure_node(tokens[SUBJECT]) object_node_dsl = self.ensure_node(tokens[OBJECT]) rel = tokens[RELATION] self.graph.add_unqualified_edge(subject_node_dsl, object_node_dsl, rel) def handle_label_relation(self, line, position, tokens): """Handle statements like ``p(X) label "Label for X"``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RelabelWarning """ subject_node_dsl = self.ensure_node(tokens[SUBJECT]) description = tokens[OBJECT] if self.graph.has_node_description(subject_node_dsl): raise RelabelWarning( line_number=self.line_number, line=line, position=position, node=self.graph.node, old_label=self.graph.get_node_description(subject_node_dsl), new_label=description ) self.graph.set_node_description(subject_node_dsl, description) def ensure_node(self, tokens): """Turn parsed tokens into canonical node name and makes sure its in the graph. :param pyparsing.ParseResult tokens: Tokens from PyParsing :return: A pair of the PyBEL node tuple and the PyBEL node data dictionary :rtype: BaseEntity """ if MODIFIER in tokens: return self.ensure_node(tokens[TARGET]) node_dsl = parse_result_to_dsl(tokens) self.graph.add_node_from_data(node_dsl) return node_dsl def handle_translocation_illegal(self, line, position, tokens): """Handle a malformed translocation. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ raise MalformedTranslocationWarning(self.line_number, line, position, tokens) # HANDLERS def handle_molecular_activity_default(line, position, tokens): """Handle a BEL 2.0 style molecular activity with BEL default names. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ upgraded = language.activity_labels[tokens[0]] tokens[NAMESPACE] = BEL_DEFAULT_NAMESPACE tokens[NAME] = upgraded return tokens def handle_activity_legacy(line, position, tokens): """Handle BEL 1.0 activities. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ legacy_cls = language.activity_labels[tokens[MODIFIER]] tokens[MODIFIER] = ACTIVITY tokens[EFFECT] = { NAME: legacy_cls, NAMESPACE: BEL_DEFAULT_NAMESPACE } log.log(5, 'upgraded legacy activity to %s', legacy_cls) return tokens def handle_legacy_tloc(line, position, tokens): """Handles translocations that lack the ``fromLoc`` and ``toLoc`` entries :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ log.log(5, 'legacy translocation statement: %s', line) return tokens def modifier_po_to_dict(tokens): """Get location, activity, and/or transformation information as a dictionary. :return: a dictionary describing the modifier :rtype: dict """ attrs = {} if LOCATION in tokens: attrs[LOCATION] = dict(tokens[LOCATION]) if MODIFIER not in tokens: return attrs if LOCATION in tokens[TARGET]: attrs[LOCATION] = tokens[TARGET][LOCATION].asDict() if tokens[MODIFIER] == DEGRADATION: attrs[MODIFIER] = tokens[MODIFIER] elif tokens[MODIFIER] == ACTIVITY: attrs[MODIFIER] = tokens[MODIFIER] if EFFECT in tokens: attrs[EFFECT] = dict(tokens[EFFECT]) elif tokens[MODIFIER] == TRANSLOCATION: attrs[MODIFIER] = tokens[MODIFIER] if EFFECT in tokens: attrs[EFFECT] = tokens[EFFECT].asDict() elif tokens[MODIFIER] == CELL_SECRETION: attrs.update(secretion()) elif tokens[MODIFIER] == CELL_SURFACE_EXPRESSION: attrs.update(cell_surface_expression()) else: raise ValueError('Invalid value for tokens[MODIFIER]: {}'.format(tokens[MODIFIER])) return attrs pybel-0.12.1/src/pybel/parser/parse_control.py000066400000000000000000000373111334645200200213340ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Control Parser ~~~~~~~~~~~~~~ This module handles parsing control statement, which add annotations and namespaces to the document. .. see also:: https://wiki.openbel.org/display/BLD/Control+Records """ import logging import re from pyparsing import And, MatchFirst, Suppress, oneOf, pyparsing_common as ppc from .baseparser import BaseParser from .exc import ( CitationTooLongException, CitationTooShortException, IllegalAnnotationValueWarning, InvalidCitationType, InvalidPubMedIdentifierWarning, MissingAnnotationKeyWarning, MissingAnnotationRegexWarning, MissingCitationException, UndefinedAnnotationWarning, ) from .utils import delimited_quoted_list, delimited_unquoted_list, is_int, qid, quote from ..constants import ( ANNOTATIONS, BEL_KEYWORD_ALL, BEL_KEYWORD_CITATION, BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SET, BEL_KEYWORD_STATEMENT_GROUP, BEL_KEYWORD_SUPPORT, BEL_KEYWORD_UNSET, CITATION, CITATION_ENTRIES, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPES, CITATION_TYPE_PUBMED, EVIDENCE, ) from ..utils import valid_date __all__ = ['ControlParser'] log = logging.getLogger(__name__) set_tag = Suppress(BEL_KEYWORD_SET) unset_tag = Suppress(BEL_KEYWORD_UNSET) unset_all = Suppress(BEL_KEYWORD_ALL) supporting_text_tags = oneOf([BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT]) set_statement_group_stub = And([Suppress(BEL_KEYWORD_STATEMENT_GROUP), Suppress('='), qid('group')]) set_citation_stub = And([Suppress(BEL_KEYWORD_CITATION), Suppress('='), delimited_quoted_list('values')]) set_evidence_stub = And([Suppress(supporting_text_tags), Suppress('='), quote('value')]) class ControlParser(BaseParser): """A parser for BEL control statements. .. seealso:: BEL 1.0 specification on `control records `_ """ def __init__(self, annotation_dict=None, annotation_regex=None, citation_clearing=True, required_annotations=None): """ :param annotation_dict: A dictionary of {annotation: set of valid values} for parsing :type annotation_dict: Optional[dict[str,set[str]]] :param annotation_regex: A dictionary of {annotation: regular expression string} :type annotation_regex: Optional[dict[str,str]] :param bool citation_clearing: Should :code:`SET Citation` statements clear evidence and all annotations? :param Optional[list[str]] required_annotations: Annotations that are required """ self.citation_clearing = citation_clearing self._annotation_dict = {} if annotation_dict is None else annotation_dict self._annotation_regex = {} if annotation_regex is None else annotation_regex self._annotation_regex_compiled = { keyword: re.compile(value) for keyword, value in self.annotation_regex.items() } self.statement_group = None self.citation = {} self.evidence = None self.annotations = {} self.required_annotations = required_annotations or [] annotation_key = ppc.identifier('key').setParseAction(self.handle_annotation_key) self.set_statement_group = set_statement_group_stub().setParseAction(self.handle_set_statement_group) self.set_citation = set_citation_stub().setParseAction(self.handle_set_citation) self.set_evidence = set_evidence_stub().setParseAction(self.handle_set_evidence) set_command_prefix = And([annotation_key('key'), Suppress('=')]) self.set_command = set_command_prefix + qid('value') self.set_command.setParseAction(self.handle_set_command) self.set_command_list = set_command_prefix + delimited_quoted_list('values') self.set_command_list.setParseAction(self.handle_set_command_list) self.unset_command = annotation_key('key') self.unset_command.addParseAction(self.handle_unset_command) self.unset_evidence = supporting_text_tags(EVIDENCE) self.unset_evidence.setParseAction(self.handle_unset_evidence) self.unset_citation = Suppress(BEL_KEYWORD_CITATION) self.unset_citation.setParseAction(self.handle_unset_citation) self.unset_statement_group = Suppress(BEL_KEYWORD_STATEMENT_GROUP) self.unset_statement_group.setParseAction(self.handle_unset_statement_group) self.unset_list = delimited_unquoted_list('values') self.unset_list.setParseAction(self.handle_unset_list) self.unset_all = unset_all.setParseAction(self.handle_unset_all) self.set_statements = set_tag + MatchFirst([ self.set_statement_group, self.set_citation, self.set_evidence, self.set_command, self.set_command_list, ]) self.unset_statements = unset_tag + MatchFirst([ self.unset_all, self.unset_citation, self.unset_evidence, self.unset_statement_group, self.unset_command, self.unset_list ]) self.language = self.set_statements | self.unset_statements super(ControlParser, self).__init__(self.language) @property def annotation_dict(self): """A dictionary of annotaions to their set of values :rtype: dict[str,set[str]] """ return self._annotation_dict @property def annotation_regex(self): """A dictioary of annotations defined by regular expressions {annotation keyword: string regular expression} :return: dict[str,str] """ return self._annotation_regex @property def annotation_regex_compiled(self): """A dictionary of annotations defined by regular expressions {annotation keyword: compiled regular expression} :rtype: dict[str,re] """ return self._annotation_regex_compiled @property def _in_debug_mode(self): return not self.annotation_dict and not self.annotation_regex def has_enumerated_annotation(self, annotation): return annotation in self.annotation_dict def has_regex_annotation(self, annotation): return annotation in self.annotation_regex def raise_for_undefined_annotation(self, line, position, annotation): """Raises is an annotation is not defined :param str line: The line being parsed :param int position: The position in the line being parsed :param str annotation: The annotation to check :raises: UndefinedAnnotationWarning """ if self._in_debug_mode: return if not self.has_enumerated_annotation(annotation) and not self.has_regex_annotation(annotation): raise UndefinedAnnotationWarning(self.line_number, line, position, annotation) def raise_for_invalid_annotation_value(self, line, position, key, value): """Raises is an annotation is not defined :param str line: The line being parsed :param int position: The position in the line being parsed :param str key: The annotation to check :param str value: The entry in the annotation to check :raises: IllegalAnnotationValueWarning or MissingAnnotationRegexWarning """ if self._in_debug_mode: return if self.has_enumerated_annotation(key) and value not in self.annotation_dict[key]: raise IllegalAnnotationValueWarning(self.line_number, line, position, key, value) elif self.has_regex_annotation(key) and not self.annotation_regex_compiled[key].match(value): raise MissingAnnotationRegexWarning(self.line_number, line, position, key, value) def raise_for_missing_citation(self, line, position): """Raises if there is no citation present in the parser :param str line: The line being parsed :param int position: The position in the line being parsed :raises: MissingCitationException """ if self.citation_clearing and not self.citation: raise MissingCitationException(self.line_number, line, position) def handle_annotation_key(self, line, position, tokens): """Called on all annotation keys before parsing to validate that it's either enumerated or as a regex :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raise: MissingCitationException or UndefinedAnnotationWarning """ key = tokens['key'] self.raise_for_missing_citation(line, position) self.raise_for_undefined_annotation(line, position, key) return tokens def handle_set_statement_group(self, line, position, tokens): self.statement_group = tokens['group'] return tokens def handle_set_citation(self, line, position, tokens): self.clear_citation() values = tokens['values'] if len(values) < 2: raise CitationTooShortException(self.line_number, line, position) citation_type = values[0] if citation_type not in CITATION_TYPES: raise InvalidCitationType(self.line_number, line, position, citation_type) if 2 == len(values): return self.handle_set_citation_double(line, position, tokens) citation_reference = values[2] if citation_type == CITATION_TYPE_PUBMED and not is_int(citation_reference): raise InvalidPubMedIdentifierWarning(self.line_number, line, position, citation_reference) if 4 <= len(values) and not valid_date(values[3]): log.debug('Invalid date: %s. Truncating entry.', values[3]) self.citation = dict(zip(CITATION_ENTRIES, values[:3])) return tokens # TODO consider parsing up authors list if 6 < len(values): raise CitationTooLongException(self.line_number, line, position) self.citation = dict(zip(CITATION_ENTRIES, values)) return tokens def handle_set_citation_double(self, line, position, tokens): values = tokens['values'] if values[0] == CITATION_TYPE_PUBMED and not is_int(values[1]): raise InvalidPubMedIdentifierWarning(self.line_number, line, position, values[1]) self.citation = dict(zip((CITATION_TYPE, CITATION_REFERENCE), values)) return tokens def handle_set_evidence(self, line, position, tokens): self.evidence = tokens['value'] return tokens def handle_set_command(self, line, position, tokens): key = tokens['key'] value = tokens['value'] self.raise_for_invalid_annotation_value(line, position, key, value) self.annotations[key] = value return tokens def handle_set_command_list(self, line, position, tokens): key = tokens['key'] values = tokens['values'] for value in values: self.raise_for_invalid_annotation_value(line, position, key, value) self.annotations[key] = set(values) return tokens def handle_unset_statement_group(self, line, position, tokens): """Unsets the statement group, or raises an exception if it is not set. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: MissingAnnotationKeyWarning """ if self.statement_group is None: raise MissingAnnotationKeyWarning(self.line_number, line, position, BEL_KEYWORD_STATEMENT_GROUP) self.statement_group = None return tokens def handle_unset_citation(self, line, position, tokens): """Unsets the citation, or raises an exception if it is not set :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: MissingAnnotationKeyWarning """ if not self.citation: raise MissingAnnotationKeyWarning(self.line_number, line, position, BEL_KEYWORD_CITATION) self.clear_citation() return tokens def handle_unset_evidence(self, line, position, tokens): """Unsets the evidence, or throws an exception if it is not already set. The value for ``tokens[EVIDENCE]`` corresponds to which alternate of SupportingText or Evidence was used in the BEL script. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: MissingAnnotationKeyWarning """ if self.evidence is None: raise MissingAnnotationKeyWarning(self.line_number, line, position, tokens[EVIDENCE]) self.evidence = None return tokens def validate_unset_command(self, line, position, key): """Raises an exception when trying to ``UNSET X`` if ``X`` is not already set. :param str line: The line being parsed :param int position: The position in the line being parsed :param str key: The annotation to check :raises: MissingAnnotationKeyWarning """ if key not in self.annotations: raise MissingAnnotationKeyWarning(self.line_number, line, position, key) def handle_unset_command(self, line, position, tokens): """Handles ``UNSET X`` or raises an exception if it is not already set. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: MissingAnnotationKeyWarning """ key = tokens['key'] self.validate_unset_command(line, position, key) del self.annotations[key] return tokens def handle_unset_list(self, line, position, tokens): """Handles ``UNSET {A, B, ...}`` or raises an exception of any of them are not present. Consider that all unsets are in peril if just one of them is wrong! :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: MissingAnnotationKeyWarning """ for key in tokens['values']: if key in {BEL_KEYWORD_EVIDENCE, BEL_KEYWORD_SUPPORT}: self.evidence = None else: self.validate_unset_command(line, position, key) del self.annotations[key] return tokens def handle_unset_all(self, line, position, tokens): """Handles ``UNSET_ALL``""" self.clear() return tokens def get_annotations(self): """Gets the current annotations :return: The currently stored BEL annotations :rtype: dict """ return { EVIDENCE: self.evidence, CITATION: self.citation.copy(), ANNOTATIONS: self.annotations.copy() } def get_missing_required_annotations(self): """Return missing required annotations. :rtype: list[str] """ return [ required_annotation for required_annotation in self.required_annotations if required_annotation not in self.annotations ] def clear_citation(self): """Clears the citation. Additionally, if citation clearing is enabled, clears the evidence and annotations.""" self.citation.clear() if self.citation_clearing: self.evidence = None self.annotations.clear() def clear(self): """Clears the statement_group, citation, evidence, and annotations""" self.statement_group = None self.citation.clear() self.evidence = None self.annotations.clear() pybel-0.12.1/src/pybel/parser/parse_identifier.py000066400000000000000000000137751334645200200220060ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging import re from pyparsing import Suppress from .baseparser import BaseParser from .exc import ( MissingDefaultNameWarning, MissingNamespaceNameWarning, MissingNamespaceRegexWarning, NakedNameWarning, UndefinedNamespaceWarning, ) from .utils import quote, word from ..constants import DIRTY, NAME, NAMESPACE __all__ = ['IdentifierParser'] log = logging.getLogger(__name__) class IdentifierParser(BaseParser): """A parser for identifiers in the form of namespace:name. Can be made more lenient when given a default namespace or enabling the use of naked names""" def __init__(self, namespace_dict=None, namespace_regex=None, default_namespace=None, allow_naked_names=False): """ :param Optional[dict[str,dict[str,str]]] namespace_dict: A dictionary of {namespace: {name: encoding}} :param Optional[dict[str,str]] namespace_regex: A dictionary of {namespace: regular expression string} to compile :param Optional[set[str]] default_namespace: A set of strings that can be used without a namespace :param bool allow_naked_names: If true, turn off naked namespace failures """ self._namespace_dict = namespace_dict self._namespace_regex = {} if namespace_regex is None else namespace_regex self._namespace_regex_compiled = { keyword: re.compile(pattern) for keyword, pattern in self.namespace_regex.items() } self.default_namespace = set(default_namespace) if default_namespace is not None else None self.allow_naked_names = allow_naked_names self.identifier_qualified = word(NAMESPACE) + Suppress(':') + (word | quote)(NAME) if self.namespace_dict is not None: self.identifier_qualified.setParseAction(self.handle_identifier_qualified) self.identifier_bare = (word | quote)(NAME) self.identifier_bare.setParseAction( self.handle_namespace_default if self.default_namespace else self.handle_namespace_lenient if self.allow_naked_names else self.handle_namespace_invalid ) super(IdentifierParser, self).__init__(self.identifier_qualified | self.identifier_bare) @property def namespace_dict(self): """A dictionary of {namespace: {name: encodings}} :rtype: dict[str,dict[str,str]] """ return self._namespace_dict @property def namespace_regex(self): """A dictionary of {namespace keyword: regular expression string} :rtype: dict[str,str] """ return self._namespace_regex @property def namespace_regex_compiled(self): """A dictionary of {namespace keyword: compiled regular expression} :rtype: dict[str,re] """ return self._namespace_regex_compiled def has_enumerated_namespace(self, namespace): """Checks that the namespace has been defined by an enumeration""" return namespace in self.namespace_dict def has_regex_namespace(self, namespace): """Checks that the namespace has been defined by a regular expression""" return namespace in self.namespace_regex def has_namespace(self, namespace): """Checks that the namespace has either been defined by an enumeration or a regular expression""" return self.has_enumerated_namespace(namespace) or self.has_regex_namespace(namespace) def has_enumerated_namespace_name(self, namespace, name): """Checks that the namespace is defined by an enumeration and that the name is a member""" return self.has_enumerated_namespace(namespace) and name in self.namespace_dict[namespace] def has_regex_namespace_name(self, namespace, name): """Checks that the namespace is defined as a regular expression and the name matches it""" return self.has_regex_namespace(namespace) and self.namespace_regex_compiled[namespace].match(name) def has_namespace_name(self, line, position, namespace, name): self.raise_for_missing_namespace(line, position, namespace, name) return self.has_enumerated_namespace_name(namespace, name) or self.has_regex_namespace_name(namespace, name) def raise_for_missing_namespace(self, line, position, namespace, name): if not self.has_namespace(namespace): raise UndefinedNamespaceWarning(self.line_number, line, position, namespace, name) def raise_for_missing_name(self, line, position, namespace, name): self.raise_for_missing_namespace(line, position, namespace, name) if self.has_enumerated_namespace(namespace) and not self.has_enumerated_namespace_name(namespace, name): raise MissingNamespaceNameWarning(self.line_number, line, position, namespace, name) if self.has_regex_namespace(namespace) and not self.has_regex_namespace_name(namespace, name): raise MissingNamespaceRegexWarning(self.line_number, line, position, namespace, name) def raise_for_missing_default(self, line, position, name): if not self.default_namespace: raise ValueError('Default namespace is not set') if name not in self.default_namespace: raise MissingDefaultNameWarning(self.line_number, line, position, name) def handle_identifier_qualified(self, line, position, tokens): namespace = tokens[NAMESPACE] name = tokens[NAME] self.raise_for_missing_namespace(line, position, namespace, name) self.raise_for_missing_name(line, position, namespace, name) return tokens def handle_namespace_default(self, line, position, tokens): name = tokens[NAME] self.raise_for_missing_default(line, position, name) return tokens @staticmethod def handle_namespace_lenient(line, position, tokens): tokens[NAMESPACE] = DIRTY log.debug('Naked namespace: [%d] %s', position, line) return tokens def handle_namespace_invalid(self, line, position, tokens): name = tokens[NAME] raise NakedNameWarning(self.line_number, line, position, name) pybel-0.12.1/src/pybel/parser/parse_metadata.py000066400000000000000000000324321334645200200214330ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module supports the relation parser by handling statements.""" import logging import re from pyparsing import And, MatchFirst, Suppress, Word, pyparsing_common as ppc from .baseparser import BaseParser from .exc import InvalidMetadataException, RedefinedAnnotationError, RedefinedNamespaceError, VersionFormatWarning from .utils import delimited_quoted_list, qid, quote, word from ..constants import ( BEL_KEYWORD_ANNOTATION, BEL_KEYWORD_AS, BEL_KEYWORD_DEFINE, BEL_KEYWORD_DOCUMENT, BEL_KEYWORD_LIST, BEL_KEYWORD_NAMESPACE, BEL_KEYWORD_PATTERN, BEL_KEYWORD_SET, BEL_KEYWORD_URL, DOCUMENT_KEYS, METADATA_VERSION, belns_encodings, ) from ..utils import valid_date_version __all__ = ['MetadataParser'] log = logging.getLogger(__name__) as_tag = Suppress(BEL_KEYWORD_AS) url_tag = Suppress(BEL_KEYWORD_URL) list_tag = Suppress(BEL_KEYWORD_LIST) set_tag = Suppress(BEL_KEYWORD_SET) define_tag = Suppress(BEL_KEYWORD_DEFINE) function_tags = Word(''.join(belns_encodings)) SEMANTIC_VERSION_STRING_RE = re.compile( '(?P\d+)\.(?P\d+)\.(?P\d+)(?:-(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?(?:\+(?P[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*))?') MALFORMED_VERSION_STRING_RE = re.compile('(?P\d+)(\.(?P\d+)(\.(?P\d+))?)?') class MetadataParser(BaseParser): """A parser for the document and definitions section of a BEL document. .. seealso:: BEL 1.0 Specification for the `DEFINE `_ keyword """ def __init__(self, manager, namespace_dict=None, annotation_dict=None, namespace_regex=None, annotation_regex=None, default_namespace=None, allow_redefinition=False, skip_validation=False): """Build a metadata parser. :param pybel.manager.Manager manager: A cache manager :param dict[str,dict[str,str]] namespace_dict: A dictionary of pre-loaded, enumerated namespaces from {namespace keyword: {name: encoding}} :param dict[str,set[str] annotation_dict: A dictionary of pre-loaded, enumerated annotations from {annotation keyword: set of valid values} :param dict[str,str] namespace_regex: A dictionary of pre-loaded, regular expression namespaces from {namespace keyword: regex string} :param dict[str,str] annotation_regex: A dictionary of pre-loaded, regular expression annotations from {annotation keyword: regex string} :param set[str] default_namespace: A set of strings that can be used without a namespace :param bool skip_validation: If true, don't download and cache namespaces/annotations """ #: This metadata parser's internal definition cache manager self.manager = manager self.disallow_redefinition = not allow_redefinition self.skip_validation = skip_validation #: A dictionary of cached {namespace keyword: {name: encoding}} self.namespace_dict = {} if namespace_dict is None else namespace_dict #: A dictionary of cached {annotation keyword: set of values} self.annotation_dict = {} if annotation_dict is None else annotation_dict #: A dictionary of {namespace keyword: regular expression string} self.namespace_regex = {} if namespace_regex is None else namespace_regex #: A set of names that can be used without a namespace self.default_namespace = set(default_namespace) if default_namespace is not None else None #: A dictionary of {annotation keyword: regular expression string} self.annotation_regex = {} if annotation_regex is None else annotation_regex #: A set of namespaces's URLs that can't be cached self.uncachable_namespaces = set() #: A dictionary containing the document metadata self.document_metadata = {} #: A dictionary from {namespace keyword: BEL namespace URL} self.namespace_url_dict = {} #: A dictionary from {annotation keyword: BEL annotation URL} self.annotation_url_dict = {} #: A set of annotation keywords that are defined ad-hoc in the BEL script self.annotation_lists = set() self.document = And([ set_tag, Suppress(BEL_KEYWORD_DOCUMENT), word('key'), Suppress('='), qid('value') ]) namespace_tag = And([define_tag, Suppress(BEL_KEYWORD_NAMESPACE), ppc.identifier('name'), as_tag]) self.namespace_url = And([namespace_tag, url_tag, quote('url')]) self.namespace_pattern = And([namespace_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) annotation_tag = And([define_tag, Suppress(BEL_KEYWORD_ANNOTATION), ppc.identifier('name'), as_tag]) self.annotation_url = And([annotation_tag, url_tag, quote('url')]) self.annotation_list = And([annotation_tag, list_tag, delimited_quoted_list('values')]) self.annotation_pattern = And([annotation_tag, Suppress(BEL_KEYWORD_PATTERN), quote('value')]) self.document.setParseAction(self.handle_document) self.namespace_url.setParseAction(self.handle_namespace_url) self.namespace_pattern.setParseAction(self.handle_namespace_pattern) self.annotation_url.setParseAction(self.handle_annotations_url) self.annotation_list.setParseAction(self.handle_annotation_list) self.annotation_pattern.setParseAction(self.handle_annotation_pattern) self.language = MatchFirst([ self.document, self.namespace_url, self.annotation_url, self.annotation_list, self.annotation_pattern, self.namespace_pattern ]).setName('BEL Metadata') super(MetadataParser, self).__init__(self.language) def handle_document(self, line, position, tokens): """Handle statements like ``SET DOCUMENT X = "Y"``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing """ key = tokens['key'] value = tokens['value'] if key not in DOCUMENT_KEYS: raise InvalidMetadataException(self.line_number, line, position, key, value) norm_key = DOCUMENT_KEYS[key] if norm_key in self.document_metadata: log.warning('Tried to overwrite metadata: %s', key) return tokens self.document_metadata[norm_key] = value if norm_key == METADATA_VERSION: self.raise_for_version(line, position, value) return tokens def raise_for_redefined_namespace(self, line, position, namespace): """Raise an exception if a namespace is already defined. :param str line: The line being parsed :param int position: The position in the line being parsed :param str namespace: The namespace being parsed :raises: RedefinedNamespaceError """ if self.disallow_redefinition and self.has_namespace(namespace): raise RedefinedNamespaceError(self.line_number, line, position, namespace) def handle_namespace_url(self, line, position, tokens): """Handle statements like ``DEFINE NAMESPACE X AS URL "Y"``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError :raises: pybel.resources.exc.ResourceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) url = tokens['url'] self.namespace_url_dict[namespace] = url if self.skip_validation: return tokens namespace_result = self.manager.get_or_create_namespace(url) if isinstance(namespace_result, dict): self.namespace_dict[namespace] = namespace_result self.uncachable_namespaces.add(url) else: self.namespace_dict[namespace] = namespace_result.to_values() return tokens def handle_namespace_pattern(self, line, position, tokens): """Handle statements like ``DEFINE NAMESPACE X AS PATTERN "Y"``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedNamespaceError """ namespace = tokens['name'] self.raise_for_redefined_namespace(line, position, namespace) self.namespace_regex[namespace] = tokens['value'] return tokens def raise_for_redefined_annotation(self, line, position, annotation): """Raise an exception if the given annotation is already defined. :param str line: The line being parsed :param int position: The position in the line being parsed :param str annotation: The annotation being parsed :raises: RedefinedAnnotationError """ if self.disallow_redefinition and self.has_annotation(annotation): raise RedefinedAnnotationError(self.line_number, line, position, annotation) def handle_annotations_url(self, line, position, tokens): """Handle statements like ``DEFINE ANNOTATION X AS URL "Y"``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ keyword = tokens['name'] self.raise_for_redefined_annotation(line, position, keyword) url = tokens['url'] self.annotation_url_dict[keyword] = url if self.skip_validation: return tokens self.annotation_dict[keyword] = self.manager.get_annotation_entry_names(url) return tokens def handle_annotation_list(self, line, position, tokens): """Handle statements like ``DEFINE ANNOTATION X AS LIST {"Y","Z", ...}``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) values = set(tokens['values']) self.annotation_dict[annotation] = values self.annotation_lists.add(annotation) return tokens def handle_annotation_pattern(self, line, position, tokens): """Handle statements like ``DEFINE ANNOTATION X AS PATTERN "Y"``. :param str line: The line being parsed :param int position: The position in the line being parsed :param pyparsing.ParseResult tokens: The tokens from PyParsing :raises: RedefinedAnnotationError """ annotation = tokens['name'] self.raise_for_redefined_annotation(line, position, annotation) self.annotation_regex[annotation] = tokens['value'] return tokens def has_enumerated_annotation(self, annotation): """Check if this annotation is defined by an enumeration. :param str annotation: The keyword of a annotation :rtype: bool """ return annotation in self.annotation_dict def has_regex_annotation(self, annotation): """Check if this annotation is defined by a regular expression. :param str annotation: The keyword of a annotation :rtype: bool """ return annotation in self.annotation_regex def has_annotation(self, annotation): """Check if this annotation is defined. :param str annotation: The keyword of a annotation :rtype: bool """ return self.has_enumerated_annotation(annotation) or self.has_regex_annotation(annotation) def has_enumerated_namespace(self, namespace): """Check if this namespace is defined by an enumeration. :param str namespace: The keyword of a namespace :rtype: bool """ return namespace in self.namespace_dict def has_regex_namespace(self, namespace): """Check if this namespace is defined by a regular expression. :param str namespace: The keyword of a namespace :rtype: bool """ return namespace in self.namespace_regex def has_namespace(self, namespace): """Check if this namespace is defined. :param str namespace: The keyword of a namespace :rtype: bool """ return self.has_enumerated_namespace(namespace) or self.has_regex_namespace(namespace) def raise_for_version(self, line, position, version): """Check that a version string is valid for BEL documents. This means it's either in the YYYYMMDD or semantic version format. :param str line: The line being parsed :param int position: The position in the line being parsed :param str version: A version string :raises: VersionFormatWarning """ if valid_date_version(version): return if not SEMANTIC_VERSION_STRING_RE.match(version): raise VersionFormatWarning(self.line_number, line, position, version) pybel-0.12.1/src/pybel/parser/utils.py000066400000000000000000000051161334645200200176200ustar00rootroot00000000000000# -*- coding: utf-8 -*- import itertools as itt import logging import re from pyparsing import ( And, Group, Suppress, White, Word, ZeroOrMore, alphanums, dblQuotedString, delimitedList, oneOf, removeQuotes, replaceWith, ) from ..constants import OBJECT, RELATION, SUBJECT log = logging.getLogger('pybel') re_match_bel_header = re.compile("(SET\s+DOCUMENT|DEFINE\s+NAMESPACE|DEFINE\s+ANNOTATION)") def is_int(s): """Determines if an object can be cast to an int :param s: any object :return: true if argument can be cast to an int: :rtype: bool """ try: int(s) return True except ValueError: return False W = Suppress(ZeroOrMore(White())) C = Suppress(',') WCW = W + C + W LPF, RPF = map(Suppress, '()') LP = Suppress('(') + W RP = W + Suppress(')') word = Word(alphanums) identifier = Word(alphanums + '_') quote = dblQuotedString().setParseAction(removeQuotes) qid = quote | identifier delimited_quoted_list = And([Suppress('{'), delimitedList(quote), Suppress('}')]) delimited_unquoted_list = And([Suppress('{'), delimitedList(identifier), Suppress('}')]) def nest(*content): """Defines a delimited list by enumerating each element of the list""" if len(content) == 0: raise ValueError('no arguments supplied') return And([LPF, content[0]] + list(itt.chain.from_iterable(zip(itt.repeat(C), content[1:]))) + [RPF]) def one_of_tags(tags, canonical_tag, name=None): """This is a convenience method for defining the tags usable in the :class:`BelParser`. For example, statements like g(HGNC:SNCA) can be expressed also as geneAbundance(HGNC:SNCA). The language must define multiple different tags that get normalized to the same thing. :param list[str] tags: a list of strings that are the tags for a function. For example, ['g', 'geneAbundance'] for the abundance of a gene :param str canonical_tag: the preferred tag name. Does not have to be one of the tags. For example, 'GeneAbundance' (note capitalization) is used for the abundance of a gene :param str name: this is the key under which the value for this tag is put in the PyParsing framework. :rtype: :class:`pyparsing.ParseElement` """ element = oneOf(tags).setParseAction(replaceWith(canonical_tag)) if name is None: return element return element.setResultsName(name) def triple(subject, relation, obj): """Builds a simple triple in PyParsing that has a ``subject relation object`` format""" return And([Group(subject)(SUBJECT), relation(RELATION), Group(obj)(OBJECT)]) pybel-0.12.1/src/pybel/resources/000077500000000000000000000000001334645200200166215ustar00rootroot00000000000000pybel-0.12.1/src/pybel/resources/__init__.py000066400000000000000000000004661334645200200207400ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains utilities for download, parsing and writing BEL resource files as well as utilities for writing BEL Script.""" from . import constants, definitions, document, exc from .definitions import * from .definitions import * from .document import * from .exc import * pybel-0.12.1/src/pybel/resources/constants.py000066400000000000000000000011601334645200200212050ustar00rootroot00000000000000# -*- coding: utf-8 -*- import re METADATA_LINE_RE = re.compile("(SET\s+DOCUMENT|DEFINE\s+NAMESPACE|DEFINE\s+ANNOTATION)") citation_format = 'SET Citation = {{"PubMed","{}","{}"}}' evidence_format = 'SET Evidence = "{}"' NAMESPACE_URL_FMT = 'DEFINE NAMESPACE {} AS URL "{}"' NAMESPACE_PATTERN_FMT = 'DEFINE NAMESPACE {} AS PATTERN "{}"' ANNOTATION_URL_FMT = 'DEFINE ANNOTATION {} AS URL "{}"' ANNOTATION_PATTERN_FMT = 'DEFINE ANNOTATION {} AS PATTERN "{}"' def format_annotation_list(annotation, values): return 'DEFINE ANNOTATION {} AS LIST {{{}}}'.format(annotation, ', '.join('"{}"'.format(e) for e in values)) pybel-0.12.1/src/pybel/resources/definitions/000077500000000000000000000000001334645200200211345ustar00rootroot00000000000000pybel-0.12.1/src/pybel/resources/definitions/__init__.py000066400000000000000000000003751334645200200232520ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import annotation, definitions, namespace, write_utils from .annotation import * from .definitions import * from .namespace import * __all__ = ( definitions.__all__ + annotation.__all__ + namespace.__all__ ) pybel-0.12.1/src/pybel/resources/definitions/annotation.py000066400000000000000000000070141334645200200236620ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import print_function import time from .write_utils import DATETIME_FMT, make_author_header, make_properties_header from ..utils import get_iso_8601_date __all__ = [ 'write_annotation' ] def make_annotation_header(keyword, description=None, usage=None, version=None, created=None): """Makes the ``[AnnotationDefinition]`` section of a BELANNO file :param str keyword: Preferred BEL Keyword, maximum length of 8 :param str description: A description of this annotation :param str usage: How to use this annotation :param str version: Namespace version. Defaults to date in ``YYYYMMDD`` format. :param str created: Namespace public timestamp, ISO 8601 datetime :return: A iterator over the lines for the ``[AnnotationDefinition]`` section :rtype: iter[str] """ yield '[AnnotationDefinition]' yield 'Keyword={}'.format(keyword) yield 'TypeString={}'.format('list') yield 'VersionString={}'.format(version if version else get_iso_8601_date()) yield 'CreatedDateTime={}'.format(created if created else time.strftime(DATETIME_FMT)) if description is not None: yield 'DescriptionString={}'.format(description.strip().replace('\n', '')) if usage is not None: yield 'UsageString={}'.format(usage.strip().replace('\n', '')) def write_annotation(keyword, values, citation_name, description, usage=None, version=None, created=None, author_name=None, author_copyright=None, author_contact=None, case_sensitive=True, delimiter='|', cacheable=True, file=None, value_prefix=''): """Writes a BEL annotation (BELANNO) to a file :param str keyword: The annotation keyword :param dict[str, str] values: A dictionary of {name: label} :param str citation_name: The citation name :param str description: A description of this annotation :param str usage: How to use this annotation :param str version: The version of this annotation. Defaults to date in ``YYYYMMDD`` format. :param str created: The annotation's public timestamp, ISO 8601 datetime :param str author_name: The author's name :param str author_copyright: The copyright information for this annotation. Defaults to ``Other/Proprietary`` :param str author_contact: The contact information for the author of this annotation. :param bool case_sensitive: Should this config file be interpreted as case-sensitive? :param str delimiter: The delimiter between names and labels in this config file :param bool cacheable: Should this config file be cached? :param file file: A writable file or file-like :param str value_prefix: An optional prefix for all values """ for line in make_annotation_header(keyword, description=description, usage=usage, version=version, created=created): print(line, file=file) print(file=file) for line in make_author_header(name=author_name, contact=author_contact, copyright_str=author_copyright): print(line, file=file) print(file=file) print('[Citation]', file=file) print('NameString={}'.format(citation_name), file=file) print(file=file) for line in make_properties_header(case_sensitive=case_sensitive, delimiter=delimiter, cacheable=cacheable): print(line, file=file) print(file=file) print('[Values]', file=file) for key, value in sorted(values.items()): if not key.strip(): continue print('{}{}|{}'.format(value_prefix, key.strip(), value.strip().replace('\n', '')), file=file) pybel-0.12.1/src/pybel/resources/definitions/definitions.py000066400000000000000000000054651334645200200240330ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging import os import requests.exceptions import six from configparser import ConfigParser from ..exc import EmptyResourceError, InvalidResourceError, MissingResourceError from ..utils import download, is_url __all__ = [ 'parse_bel_resource', 'get_lines', 'get_bel_resource', ] log = logging.getLogger(__name__) def _get_bel_resource_kvp(line, delimiter): """ :param str line: :param str delimiter: :rtype: tuple[str,str] """ split_line = line.rsplit(delimiter, 1) key = split_line[0].strip() value = split_line[1].strip() if 2 == len(split_line) else None return key, value def parse_bel_resource(lines): """Parses a BEL config (BELNS, BELANNO, or BELEQ) file from the given line iterator over the file :param iter[str] lines: An iterable over the lines in a BEL config file :return: A config-style dictionary representing the BEL config file :rtype: dict """ lines = list(lines) value_line = 1 + max( index for index, line in enumerate(lines) if '[Values]' == line.strip() ) metadata_config = ConfigParser(strict=False) metadata_config.optionxform = lambda option: option metadata_config.read_file(lines[:value_line]) delimiter = metadata_config['Processing']['DelimiterString'] value_dict = dict( _get_bel_resource_kvp(line, delimiter) for line in lines[value_line:] ) res = {} res.update({k: dict(v) for k, v in metadata_config.items()}) res['Values'] = value_dict return res def get_lines(location): """Gets the lines from a location :param str location: The URL location to download or a file path to open. File path expands user. :return: list[str] :raises: requests.exceptions.HTTPError """ if is_url(location): res = download(location) return list(line.decode('utf-8', errors='ignore').strip() for line in res.iter_lines()) else: with open(os.path.expanduser(location)) as f: return list(f) def get_bel_resource(location): """Loads/downloads and parses a config file from the given url or file path :param str location: The URL or file path to a BELNS, BELANNO, or BELEQ file to download and parse :return: A config-style dictionary representing the BEL config file :rtype: dict :raises: pybel.resources.exc.ResourceError """ log.debug('getting resource: %s', location) try: lines = get_lines(location) except requests.exceptions.HTTPError as e: six.raise_from(MissingResourceError(location), e) try: result = parse_bel_resource(lines) except ValueError as e: six.raise_from(InvalidResourceError(location), e) if not result['Values']: raise EmptyResourceError(location) return result pybel-0.12.1/src/pybel/resources/definitions/namespace.py000066400000000000000000000255501334645200200234510ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import print_function from collections import Iterable, Mapping import time from pybel.constants import NAMESPACE_DOMAIN_TYPES, belns_encodings from pybel.resources.utils import get_iso_8601_date from .write_utils import DATETIME_FMT, make_author_header, make_citation_header, make_properties_header __all__ = [ 'write_namespace', ] def write_namespace(values, namespace_name, namespace_keyword, namespace_domain=None, author_name=None, citation_name=None, namespace_description=None, namespace_species=None, namespace_version=None, namespace_query_url=None, namespace_created=None, author_contact=None, author_copyright=None, citation_description=None, citation_url=None, citation_version=None, citation_date=None, case_sensitive=True, delimiter='|', cacheable=True, functions=None, file=None, value_prefix='', sort_key=None): """Write a BEL namespace (BELNS) to a file. :param values: An iterable of values (strings) or dictionary of values to their encodings :type values: iter[str] or dict[str,str] :param str namespace_name: The namespace nam :param str namespace_keyword: Preferred BEL Keyword, maximum length of 8 (corresponds to MIRIAM namespace) :param Optional[str] namespace_domain: One of: :data:`pybel.constants.NAMESPACE_DOMAIN_BIOPROCESS`, :data:`pybel.constants.NAMESPACE_DOMAIN_CHEMICAL`, :data:`pybel.constants.NAMESPACE_DOMAIN_GENE`, or :data:`pybel.constants.NAMESPACE_DOMAIN_OTHER` :param Optional[str] author_name: The namespace's authors :param Optional[str] citation_name: The name of the citation :param Optional[str] namespace_query_url: HTTP URL to query for details on namespace values (must be valid URL) :param Optional[str] namespace_description: Namespace description :param Optional[str] namespace_species: Comma-separated list of species taxonomy id's :param Optional[str] namespace_version: Namespace version :param Optional[str] namespace_created: Namespace public timestamp, ISO 8601 datetime :param Optional[str] author_contact: Namespace author's contact info/email address :param Optional[str] author_copyright: Namespace's copyright/license information :param Optional[str] citation_description: Citation description :param Optional[str] citation_url: URL to more citation information :param Optional[str] citation_version: Citation version :param Optional[str] citation_date: Citation publish timestamp, ISO 8601 Date :param bool case_sensitive: Should this config file be interpreted as case-sensitive? :param str delimiter: The delimiter between names and labels in this config file :param bool cacheable: Should this config file be cached? :param Optional[str] functions: The encoding for the elements in this namespace. See :data:`pybel.constants.belns_encodings` :param Optional[file] file: A writable file or file-like :param str value_prefix: a prefix for each name :param sort_key: A function to sort the values with :func:`sorted`. Give ``False`` to not sort """ write_namespace_header( namespace_name=namespace_name, namespace_keyword=namespace_keyword, namespace_domain=namespace_domain, author_name=author_name, citation_name=citation_name, namespace_description=namespace_description, namespace_species=namespace_species, namespace_version=namespace_version, namespace_query_url=namespace_query_url, namespace_created=namespace_created, author_contact=author_contact, author_copyright=author_copyright, citation_description=citation_description, citation_url=citation_url, citation_version=citation_version, citation_date=citation_date, case_sensitive=case_sensitive, delimiter=delimiter, cacheable=cacheable, file=file, ) write_namespace_body( values, delimiter=delimiter, functions=functions, file=file, value_prefix=value_prefix, sort_key=sort_key, ) def write_namespace_body(values, delimiter='|', functions=None, file=None, value_prefix='', sort_key=None): """Writes the [Values] section of a BEL namespace file :param values: An iterable of values (strings) or dictionary of {label:encodings} :type values: iter[str] or dict[str,str] :param str delimiter: The delimiter between names and labels in this config file :param Optional[str] functions: The encoding for the elements in this namespace. See :data:`pybel.constants.belns_encodings` :param Optional[file] file: A writable file or file-like :param str value_prefix: a prefix for each name :param sort_key: A function to sort the values with :func:`sorted`. Give ``False`` to not sort """ if isinstance(values, Mapping): entries = sorted( (k, ''.join(sorted(v if v is not None else belns_encodings))) for k, v in values.items() ) elif isinstance(values, Iterable): function_values = ''.join(sorted(functions if functions is not None else belns_encodings.keys())) entries = [(k, function_values) for k in sorted(set(values), key=sort_key)] else: raise TypeError print('[Values]', file=file) for label, encodings in entries: if not label: continue label = str(label).strip() if not label: continue print('{}{}{}{}'.format(value_prefix, label, delimiter, encodings), file=file) def write_namespace_header(namespace_name, namespace_keyword, namespace_domain=None, author_name=None, citation_name=None, namespace_description=None, namespace_species=None, namespace_version=None, namespace_query_url=None, namespace_created=None, author_contact=None, author_copyright=None, citation_description=None, citation_url=None, citation_version=None, citation_date=None, case_sensitive=True, delimiter='|', cacheable=True, file=None): """Write a BEL namespace (BELNS) to a file. :param str namespace_name: The namespace name :param str namespace_keyword: Preferred BEL Keyword, maximum length of 8 :param Optional[str] namespace_domain: One of: :data:`pybel.constants.NAMESPACE_DOMAIN_BIOPROCESS`, :data:`pybel.constants.NAMESPACE_DOMAIN_CHEMICAL`, :data:`pybel.constants.NAMESPACE_DOMAIN_GENE`, or :data:`pybel.constants.NAMESPACE_DOMAIN_OTHER` :param Optional[str] author_name: The namespace's authors :param Optional[str] citation_name: The name of the citation :param Optional[str] namespace_query_url: HTTP URL to query for details on namespace values (must be valid URL) :param Optional[str] namespace_description: Namespace description :param Optional[str] namespace_species: Comma-separated list of species taxonomy id's :param Optional[str] namespace_version: Namespace version :param Optional[str] namespace_created: Namespace public timestamp, ISO 8601 datetime :param Optional[str] author_contact: Namespace author's contact info/email address :param Optional[str] author_copyright: Namespace's copyright/license information :param Optional[str] citation_description: Citation description :param Optional[str] citation_url: URL to more citation information :param Optional[str] citation_version: Citation version :param Optional[str] citation_date: Citation publish timestamp, ISO 8601 Date :param bool case_sensitive: Should this config file be interpreted as case-sensitive? :param str delimiter: The delimiter between names and labels in this config file :param bool cacheable: Should this config file be cached? :param Optional[file] file: A writable file or file-like """ namespace_header_lines = make_namespace_header( namespace_name, namespace_keyword, namespace_domain, query_url=namespace_query_url, description=namespace_description, species=namespace_species, version=namespace_version, created=namespace_created ) for line in namespace_header_lines: print(line, file=file) print(file=file) author_header_lines = make_author_header( author_name, contact=author_contact, copyright_str=author_copyright ) for line in author_header_lines: print(line, file=file) print(file=file) citation_header_lines = make_citation_header( citation_name, description=citation_description, url=citation_url, version=citation_version, date=citation_date ) for line in citation_header_lines: print(line, file=file) print(file=file) properties_header_lines = make_properties_header( case_sensitive=case_sensitive, delimiter=delimiter, cacheable=cacheable ) for line in properties_header_lines: print(line, file=file) print(file=file) def make_namespace_header(name, keyword, domain=None, query_url=None, description=None, species=None, version=None, created=None): """Make the ``[Namespace]`` section of a BELNS file. :param str name: The namespace name :param str keyword: Preferred BEL Keyword, maximum length of 8 :param Optional[str] domain: One of: :data:`pybel.constants.NAMESPACE_DOMAIN_BIOPROCESS`, :data:`pybel.constants.NAMESPACE_DOMAIN_CHEMICAL`, :data:`pybel.constants.NAMESPACE_DOMAIN_GENE`, or :data:`pybel.constants.NAMESPACE_DOMAIN_OTHER` :param Optional[str] query_url: HTTP URL to query for details on namespace values (must be valid URL) :param Optional[str] description: Namespace description :param Optional[str] species: Comma-separated list of species taxonomy id's :param Optional[str] version: Namespace version. Defaults to current date in ``YYYYMMDD`` format. :param Optional[str] created: Namespace public timestamp, ISO 8601 datetime :return: An iterator over the lines of the ``[Namespace]`` section of a BELNS file :rtype: iter[str] """ if domain is not None and domain not in NAMESPACE_DOMAIN_TYPES: raise ValueError('Invalid domain: {}. Should be one of: {}'.format(domain, NAMESPACE_DOMAIN_TYPES)) yield '[Namespace]' yield 'Keyword={}'.format(keyword) yield 'NameString={}'.format(name) if domain: yield 'DomainString={}'.format(domain) yield 'VersionString={}'.format(version if version else get_iso_8601_date()) yield 'CreatedDateTime={}'.format(created if created else time.strftime(DATETIME_FMT)) if description: yield 'DescriptionString={}'.format(description.strip().replace('\n', '')) if species is not None: yield 'SpeciesString={}'.format(species) if query_url is not None: yield 'QueryValueURL={}'.format(query_url) pybel-0.12.1/src/pybel/resources/definitions/write_utils.py000066400000000000000000000046041334645200200240640ustar00rootroot00000000000000# -*- coding: utf-8 -*- import getpass DATETIME_FMT = '%Y-%m-%dT%H:%M:%S' def make_author_header(name=None, contact=None, copyright_str=None): """Makes the ``[Author]`` section of a BELNS file :param str name: Namespace's authors :param str contact: Namespace author's contact info/email address :param str copyright_str: Namespace's copyright/license information. Defaults to ``Other/Proprietary`` :return: An iterable over the lines of the ``[Author]`` section of a BELNS file :rtype: iter[str] """ yield '[Author]' yield 'NameString={}'.format(name if name is not None else getpass.getuser()) yield 'CopyrightString={}'.format('Other/Proprietary' if copyright_str is None else copyright_str) if contact is not None: yield 'ContactInfoString={}'.format(contact) def make_citation_header(name, description=None, url=None, version=None, date=None): """Makes the ``[Citation]`` section of a BEL config file. :param str name: Citation name :param str description: Citation description :param str url: URL to more citation information :param str version: Citation version :param str date: Citation publish timestamp, ISO 8601 Date :return: An iterable over the lines of the ``[Citation]`` section of a BEL config file :rtype: iter[str] """ yield '[Citation]' yield 'NameString={}'.format(name) if date is not None: yield 'PublishedDate={}'.format(date) if version is not None: yield 'PublishedVersionString={}'.format(version) if description is not None: yield 'DescriptionString={}'.format(description) if url is not None: yield 'ReferenceURL={}'.format(url) def make_properties_header(case_sensitive=True, delimiter='|', cacheable=True): """Makes the ``[Processing]`` section of a BEL config file. :param bool case_sensitive: Should this config file be interpreted as case-sensitive? :param str delimiter: The delimiter between names and labels in this config file :param bool cacheable: Should this config file be cached? :return: An iterable over the lines of the ``[Processing]`` section of a BEL config file :rtype: iter[str] """ yield '[Processing]' yield 'CaseSensitiveFlag={}'.format('yes' if case_sensitive else 'no') yield 'DelimiterString={}'.format(delimiter) yield 'CacheableFlag={}'.format('yes' if cacheable else 'no') pybel-0.12.1/src/pybel/resources/document.py000066400000000000000000000240131334645200200210110ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for reading BEL Script.""" from __future__ import absolute_import, print_function, unicode_literals import itertools as itt import logging import time from .constants import ( ANNOTATION_PATTERN_FMT, ANNOTATION_URL_FMT, METADATA_LINE_RE, NAMESPACE_PATTERN_FMT, NAMESPACE_URL_FMT, format_annotation_list, ) from ..constants import VERSION log = logging.getLogger(__name__) def sanitize_file_line_iter(file, note_char=':'): """Clean a line iterator by removing extra whitespace, blank lines, comment lines, and log nodes. :param iter[str] file: An iterable over the lines in a BEL Script :param str note_char: The character sequence denoting a special note :returns: An iterator over the line number and the lines that should be processed :rtype: iter[tuple[int,str]] """ for line_number, line in enumerate(file, start=1): line = line.strip() if not line: continue if line[0] == '#': if len(line) > 1 and line[1] == note_char: log.info('NOTE: Line %d: %s', line_number, line) continue yield line_number, line def sanitize_file_lines(file): """Enumerate a line iterator and returns the pairs of (line number, line) that are cleaned. :param iter[str] file: An iterable over the lines in a BEL Script :rtype: iter[tuple[int,str]] """ line_iterator = sanitize_file_line_iter(file) for line_number, line in line_iterator: if line.endswith('\\'): log.log(4, 'Multiline quote starting on line: %d', line_number) line = line.strip('\\').strip() next_line_number, next_line = next(line_iterator) while next_line.endswith('\\'): log.log(3, 'Extending line: %s', next_line) line += " " + next_line.strip('\\').strip() next_line_number, next_line = next(line_iterator) line += " " + next_line.strip() log.log(3, 'Final line: %s', line) elif 1 == line.count('"'): log.log(4, 'PyBEL013 Missing new line escapes [line: %d]', line_number) next_line_number, next_line = next(line_iterator) next_line = next_line.strip() while not next_line.endswith('"'): log.log(3, 'Extending line: %s', next_line) line = '{} {}'.format(line.strip(), next_line) next_line_number, next_line = next(line_iterator) next_line = next_line.strip() line = '{} {}'.format(line, next_line) log.log(3, 'Final line: %s', line) comment_loc = line.rfind(' //') if 0 <= comment_loc: line = line[:comment_loc] yield line_number, line def split_file_to_annotations_and_definitions(file): """Enumerate a line iterable and splits into 3 parts. :param iter[str] file: An iterable over lines in a BEL document :rtype: tuple[iter[str],iter[str],iter[str]] """ line_number_line_pairs = iter(sanitize_file_lines(file)) last_value = {0: (None, None)} # just do this because python2 doesn't allow nonlocal variables def document_metadata(): """Iterate over the document metadata lines.""" for i, line in line_number_line_pairs: if not line.startswith('SET DOCUMENT'): last_value[0] = i, line return yield i, line def definitions(): """Iterate over the resource definition lines.""" yield last_value[0] for i, line in line_number_line_pairs: if not METADATA_LINE_RE.match(line): last_value[0] = i, line return yield i, line def statements(): """Iterate over the statement lines.""" yield last_value[0] for line_number_line_pari in line_number_line_pairs: yield line_number_line_pari return document_metadata(), definitions(), statements() def make_document_metadata(name, version=None, contact=None, description=None, authors=None, copyright=None, licenses=None, disclaimer=None): """Build a list of lines for the document metadata section of a BEL document. :param str name: The unique name for this BEL document :param Optional[str] version: The version. Defaults to the current date in ``YYYYMMDD`` format. :param Optional[str] description: A description of the contents of this document :param Optional[str] authors: The authors of this document :param Optional[str] contact: The email address of the maintainer :param Optional[str] copyright: Copyright information about this document :param Optional[str] licenses: The license applied to this document :param Optional[str] disclaimer: The disclaimer for this document :return: An iterator over the lines for the document metadata section :rtype: iter[str] """ yield '# This document was created by PyBEL v{} on {}\n'.format(VERSION, time.asctime()) yield '#' * 80 yield '#| Metadata' yield '#' * 80 + '\n' yield 'SET DOCUMENT Name = "{}"'.format(name) yield 'SET DOCUMENT Version = "{}"'.format(version if version else time.strftime('%Y%m%d')) if description: yield 'SET DOCUMENT Description = "{}"'.format(description.replace('\n', '')) if authors: yield 'SET DOCUMENT Authors = "{}"'.format(authors) if contact: yield 'SET DOCUMENT ContactInfo = "{}"'.format(contact) if licenses: yield 'SET DOCUMENT Licenses = "{}"'.format(licenses) if copyright: yield 'SET DOCUMENT Copyright = "{}"'.format(copyright) if disclaimer: yield 'SET DOCUMENT Disclaimer = "{}"'.format(disclaimer) yield '' def make_document_namespaces(namespace_url=None, namespace_patterns=None): """Iterate over lines for the namespace definitions. :param Optional[dict[str,str]] namespace_url: dictionary of {str name: str URL} of namespaces :param Optional[dict[str,str]] namespace_patterns: A dictionary of {str name: str regex} :return: An iterator over the lines for the namespace definitions :rtype: iter[str] """ yield '#' * 80 yield '#| Namespaces' yield '#' * 80 + '\n' yield '# Enumerated Namespaces\n' for name, url in sorted(namespace_url.items()): yield NAMESPACE_URL_FMT.format(name, url) if namespace_patterns: yield '\n# Regular Expression Namespaces\n' for name, pattern in sorted(namespace_patterns.items()): yield NAMESPACE_PATTERN_FMT.format(name, pattern) yield '' def make_document_annotations(annotation_url=None, annotation_patterns=None, annotation_list=None): """Iterate over lines for the annotation definitions. :param Optional[dict[str,str]] annotation_url: A dictionary of {str name: str URL} of annotations :param Optional[dict[str,str]] annotation_patterns: A dictionary of {str name: str regex} :param Optional[dict[str,set[str]]] annotation_list: A dictionary of {str name: set of name str} :return: An iterator over the lines for the annotation definitions :rtype: iter[str] """ if annotation_url or annotation_patterns or annotation_list: yield '#' * 80 yield '#| Annotations' yield '#' * 80 + '\n' if annotation_url: for name, url in sorted(annotation_url.items()): yield ANNOTATION_URL_FMT.format(name, url) if annotation_patterns: for name, pattern in sorted(annotation_patterns.items()): yield ANNOTATION_PATTERN_FMT.format(name, pattern) if annotation_list: for annotation, values in sorted(annotation_list.items()): yield format_annotation_list(annotation, sorted(values)) yield '' def make_knowledge_header(name, version=None, description=None, authors=None, contact=None, copyright=None, licenses=None, disclaimer=None, namespace_url=None, namespace_patterns=None, annotation_url=None, annotation_patterns=None, annotation_list=None, ): """Iterate over lines for the header of a BEL document, with standard document metadata and definitions. :param str name: The unique name for this BEL document :param Optional[str] version: The version. Defaults to current date in format ``YYYYMMDD``. :param Optional[str] description: A description of the contents of this document :param Optional[str] authors: The authors of this document :param Optional[str] contact: The email address of the maintainer :param Optional[str] copyright: Copyright information about this document :param Optional[str] licenses: The license applied to this document :param Optional[str] disclaimer: The disclaimer for this document :param Optional[dict[str,str]] namespace_url: an optional dictionary of {str name: str URL} of namespaces :param Optional[dict[str,str]] namespace_patterns: An optional dictionary of {str name: str regex} namespaces :param Optional[dict[str,str]] annotation_url: An optional dictionary of {str name: str URL} of annotations :param Optional[dict[str,str]] annotation_patterns: An optional dictionary of {str name: str regex} of regex annotations :param Optional[dict[str,set[str]]] annotation_list: An optional dictionary of {str name: set of names} of list annotations :rtype: iter[str] """ metadata_iter = make_document_metadata( name=name, contact=contact, description=description, authors=authors, version=version, copyright=copyright, licenses=licenses, disclaimer=disclaimer ) namespaces_iter = make_document_namespaces( namespace_url=namespace_url, namespace_patterns=namespace_patterns ) annotations_iter = make_document_annotations( annotation_url=annotation_url, annotation_patterns=annotation_patterns, annotation_list=annotation_list ) for line in itt.chain(metadata_iter, namespaces_iter, annotations_iter): yield line yield '#' * 80 yield '#| Statements' yield '#' * 80 + '\n' pybel-0.12.1/src/pybel/resources/exc.py000066400000000000000000000015141334645200200177530ustar00rootroot00000000000000# -*- coding: utf-8 -*- class ResourceError(ValueError): """Base class for resource errors""" def __init__(self, location): super(ValueError, self).__init__(location) self.location = location class MissingResourceError(ResourceError): """Raised when trying to download a file that doesn't exist anymore""" def __str__(self): return "Can't locate resource: {}".format(self.location) class InvalidResourceError(ResourceError): """Raise when downloading a file that is not actually a BEL resource file""" def __str__(self): return 'URL does not point to a BEL resource: {}'.format(self.location) class EmptyResourceError(ResourceError): """Raised when downloading an empty file""" def __str__(self): return 'Downloaded empty resource at {}'.format(self.location) pybel-0.12.1/src/pybel/resources/utils.py000066400000000000000000000015341334645200200203360ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging import time import requests from requests.compat import urlparse from requests_file import FileAdapter log = logging.getLogger(__name__) def get_iso_8601_date(): """Gets the current ISO 8601 date as a string :rtype: str """ return time.strftime('%Y%m%d') def is_url(s): """Checks if a string is a valid URL :param str s: An input string :return: Is the string a valid URL? :rtype: bool """ return urlparse(s).scheme != "" def download(url): """Uses requests to download an URL, maybe from a file :param str url: The URL to download :rtype: requests.Response :raises: requests.exceptions.HTTPError """ session = requests.Session() session.mount('file://', FileAdapter()) res = session.get(url) res.raise_for_status() return res pybel-0.12.1/src/pybel/struct/000077500000000000000000000000001334645200200161335ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/__init__.py000066400000000000000000000016611334645200200202500ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ PyBEL's main data structure is a subclass of :class:`networkx.MultiDiGraph`. The graph contains metadata for the PyBEL version, the BEL script metadata, the namespace definitions, the annotation definitions, and the warnings produced in analysis. Like any :mod:`networkx` graph, all attributes of a given object can be accessed through the :code:`graph` property, like in: :code:`my_graph.graph['my key']`. Convenient property definitions are given for these attributes. """ from . import filters, graph, grouping, mutation, operations, summary from .filters import * from .graph import * from .grouping import * from .mutation import * from .operations import * from .pipeline import Pipeline from .summary import * __all__ = ( graph.__all__ + grouping.__all__ + operations.__all__ + filters.__all__ + summary.__all__ + mutation.__all__ + ['Pipeline'] ) pybel-0.12.1/src/pybel/struct/filters/000077500000000000000000000000001334645200200176035ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/filters/__init__.py000066400000000000000000000017631334645200200217230ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains functions for filtering node and edge iterables. It relies heavily on the concepts of `functional programming `_ and the concept of `predicates `_. """ from . import ( edge_filters, edge_predicate_builders, edge_predicates, node_filters, node_predicate_builders, node_predicates, node_selection, utils, ) from .edge_filters import * from .edge_predicate_builders import * from .edge_predicates import * from .node_filters import * from .node_predicate_builders import * from .node_predicates import * from .node_selection import * from .utils import * __all__ = ( edge_filters.__all__ + edge_predicates.__all__ + edge_predicate_builders.__all__ + node_filters.__all__ + node_predicate_builders.__all__ + node_predicates.__all__ + node_selection.__all__ + utils.__all__ ) pybel-0.12.1/src/pybel/struct/filters/edge_filters.py000066400000000000000000000102071334645200200226110ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Filter functions for edges in BEL graphs. A edge predicate is a function that takes five arguments: a :class:`BELGraph`, a source node tuple, a target node tuple, a key, and a data dictionary. It returns a boolean representing whether the edge passed the given test. This module contains a set of default functions for filtering lists of edges and building edge predicate functions. A general use for an edge predicate is to use the built-in :func:`filter` in code like :code:`filter(your_edge_predicate, graph.edges(keys=True, data=True))` """ from collections import Iterable from .edge_predicates import keep_edge_permissive __all__ = [ 'invert_edge_predicate', 'and_edge_predicates', 'filter_edges', 'count_passed_edge_filter', ] def invert_edge_predicate(edge_predicate): """Build an edge predicate that is the inverse of the given edge predicate. :param edge_predicate: An edge predicate :type edge_predicate: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ def _inverse_filter(graph, u, v, k): return not edge_predicate(graph, u, v, k) return _inverse_filter def and_edge_predicates(edge_predicates=None): """Concatenate multiple edge predicates to a new predicate that requires all predicates to be met. :param edge_predicates: a list of predicates (graph, node, node, key, data) -> bool :type edge_predicates: Optional[(pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool or iter[(pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool]] :return: A combine filter :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ # If no filters are given, then return the trivially permissive filter if not edge_predicates: return keep_edge_permissive # If something that isn't a list or tuple is given, assume it's a function and return it if not isinstance(edge_predicates, Iterable): return edge_predicates edge_predicates = tuple(edge_predicates) # If only one predicate is given, don't bother wrapping it if 1 == len(edge_predicates): return edge_predicates[0] def concatenated_edge_predicate(graph, u, v, k): """Pass only for an edge that pass all enclosed predicates. :param BELGraph graph: A BEL Graph :param BaseEntity u: A BEL node :param BaseEntity v: A BEL node :param str k: The edge key between the given nodes :return: If the edge passes all enclosed predicates :rtype: bool """ return all( edge_predicate(graph, u, v, k) for edge_predicate in edge_predicates ) return concatenated_edge_predicate def filter_edges(graph, edge_predicates=None): """Apply a set of filters to the edges iterator of a BEL graph. :param BELGraph graph: A BEL graph :param edge_predicates: A predicate or list of predicates :type edge_predicates: None or ((pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool) or iter[(pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool] :return: An iterable of edges that pass all predicates :rtype: iter[BaseEntity, BaseEntity, str] """ # If no predicates are given, return the standard edge iterator if not edge_predicates: for u, v, k in graph.edges(keys=True): yield u, v, k else: compound_edge_predicate = and_edge_predicates(edge_predicates=edge_predicates) for u, v, k in graph.edges(keys=True): if compound_edge_predicate(graph, u, v, k): yield u, v, k def count_passed_edge_filter(graph, edge_predicates=None): """Return the number of edges passing a given set of predicates. :param pybel.BELGraph graph: A BEL graph :param edge_predicates: A predicate or list of predicates :type edge_predicates: Optional[(pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool or iter[(pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool]] :return: The number of edges passing a given set of predicates :rtype: int """ return sum(1 for _ in filter_edges(graph, edge_predicates=edge_predicates)) pybel-0.12.1/src/pybel/struct/filters/edge_predicate_builders.py000066400000000000000000000205761334645200200250040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for predicates for edge data from BEL graphs.""" from collections import Iterable from six import string_types from .edge_predicates import edge_predicate, has_authors, has_pubmed, keep_edge_permissive from ...constants import ANNOTATIONS, CAUSAL_RELATIONS, CITATION, CITATION_AUTHORS, CITATION_REFERENCE, RELATION __all__ = [ 'build_annotation_dict_all_filter', 'build_annotation_dict_any_filter', 'build_upstream_edge_predicate', 'build_downstream_edge_predicate', 'build_relation_predicate', 'build_pmid_inclusion_filter', 'build_author_inclusion_filter', ] def _annotation_dict_all_filter(data, query): """Match edges with the given dictionary as a sub-dictionary. :param dict data: A PyBEL edge data dictionary :param dict query: The annotation query dict to match :rtype: bool """ annotations = data.get(ANNOTATIONS) if annotations is None: return False for key, values in query.items(): ak = annotations.get(key) if ak is None: return False for value in values: if value not in ak: return False return True def build_annotation_dict_all_filter(annotations): """Build an edge predicate that passes for edges whose data dictionaries's annotations entry are super-dictionaries to the given dictionary. If no annotations are given, will always evaluate to true. :param dict[str,iter[str]] annotations: The annotation query dict to match :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ if not annotations: return keep_edge_permissive @edge_predicate def annotation_dict_all_filter(data): """Check if the all of the annotations in the enclosed query match. :param dict data: A PyBEL edge data dictionary :rtype: bool """ return _annotation_dict_all_filter(data, query=annotations) return annotation_dict_all_filter def _annotation_dict_any_filter(data, query): """Match edges with the given dictionary as a sub-dictionary. :param dict data: A PyBEL edge data dictionary :param dict[str,iter[str]] query: The annotation query dict to match :rtype: bool """ annotations = data.get(ANNOTATIONS) if annotations is None: return False return any( key in annotations and value in annotations[key] for key, values in query.items() for value in values ) def build_annotation_dict_any_filter(annotations): """Build an edge predicate that passes for edges whose data dictionaries match the given dictionary. If the given dictionary is empty, will always evaluate to true. :param dict[str,iter[str]] annotations: The annotation query dict to match :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ if not annotations: return keep_edge_permissive @edge_predicate def annotation_dict_any_filter(data): """Checks if the any of the annotations in the enclosed query match :param dict data: A PyBEL edge data dictionary :rtype: bool """ return _annotation_dict_any_filter(data, query=annotations) return annotation_dict_any_filter def build_upstream_edge_predicate(nodes): """Build an edge predicate that pass for relations for which one of the given nodes is the object. :param iter[tuple] nodes: An iterable of PyBEL node tuples :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ nodes = set(nodes) def upstream_filter(graph, u, v, k): """Pass for relations for which one of the given nodes is the object. :type graph: pybel.BELGraph :type u: tuple :type v: tuple :type k: int :rtype: bool """ return v in nodes and graph[u][v][k][RELATION] in CAUSAL_RELATIONS return upstream_filter def build_downstream_edge_predicate(nodes): """Build an edge predicate that passes for edges for which one of the given nodes is the subject. :param iter[tuple] nodes: An iterable of PyBEL node tuples :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ nodes = set(nodes) def downstream_filter(graph, u, v, k): """Pass for relations for which one of the given nodes is the subject. :type graph: pybel.BELGraph :type u: tuple :type v: tuple :type k: int :rtype: bool """ return u in nodes and graph[u][v][k][RELATION] in CAUSAL_RELATIONS return downstream_filter def build_relation_predicate(relations): """Build an edge predicate that passes for edges with the given relation. :param relations: A relation string :type relations: str or iter[str] :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ if isinstance(relations, str): @edge_predicate def relation_predicate(data): """Pass for relations matching the enclosed value. :param dict data: A PyBEL edge data dictionary :return: If the edge has the contained relation :rtype: bool """ return data[RELATION] == relations return relation_predicate elif isinstance(relations, Iterable): relation_set = set(relations) @edge_predicate def relation_predicate(data): """Pass for relations matching the enclosed values. :param dict data: A PyBEL edge data dictionary :return: If the edge has one of the contained relations :rtype: bool """ return data[RELATION] in relation_set else: raise TypeError return relation_predicate def build_pmid_inclusion_filter(pmids): """Build an edge predicate that passes for edges with citations from the given PubMed identifier(s). :param pmids: A PubMed identifier or list of PubMed identifiers to filter for :type pmids: str or iter[str] :return: An edge predicate :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ if isinstance(pmids, string_types): @edge_predicate def pmid_inclusion_filter(data): """Pass for edges with PubMed citations matching the contained PubMed identifier. :param dict data: The edge data dictionary :return: If the edge has a PubMed citation with the contained PubMed identifier :rtype: bool """ return has_pubmed(data) and data[CITATION][CITATION_REFERENCE] == pmids else: pmids = set(pmids) @edge_predicate def pmid_inclusion_filter(data): """Pass for edges with PubMed citations matching one of the contained PubMed identifiers. :param dict data: The edge data dictionary :return: If the edge has a PubMed citation with one of the contained PubMed identifiers :rtype: bool """ return has_pubmed(data) and data[CITATION][CITATION_REFERENCE] in pmids return pmid_inclusion_filter def build_author_inclusion_filter(authors): """Build an edge predicate that passes for edges with citations written by the given author(s). :param authors: An author or list of authors :type authors: str or iter[str] :return: An edge predicate :rtype: (pybel.BELGraph, BaseEntity, BaseEntity, str) -> bool """ if isinstance(authors, string_types): @edge_predicate def author_filter(data): """Pass for edges with citations with an author that matches the contained author. :param dict data: The edge data dictionary :return: If the edge has a citation with an author that matches the the contained author :rtype: bool """ return has_authors(data) and authors in data[CITATION][CITATION_AUTHORS] else: authors = set(authors) @edge_predicate def author_filter(data): """Pass for edges with citations with an author that matches one or more of the contained authors. :param dict data: The edge data dictionary :return: If the edge has a citation with an author that matches the the contained author :rtype: bool """ return has_authors(data) and any( author in data[CITATION][CITATION_AUTHORS] for author in authors ) return author_filter pybel-0.12.1/src/pybel/struct/filters/edge_predicates.py000066400000000000000000000147231334645200200232730ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Predicates for edge data from BEL graphs.""" from functools import wraps from .utils import part_has_modifier from ..graph import BELGraph from ...constants import ( ACTIVITY, ANNOTATIONS, ASSOCIATION, CAUSAL_RELATIONS, CITATION, CITATION_AUTHORS, CITATION_TYPE, CITATION_TYPE_PUBMED, DEGRADATION, DIRECT_CAUSAL_RELATIONS, EVIDENCE, OBJECT, POLAR_RELATIONS, RELATION, SUBJECT, TRANSLOCATION, ) from ...dsl import BiologicalProcess, Pathology __all__ = [ 'edge_predicate', 'keep_edge_permissive', 'has_provenance', 'has_pubmed', 'has_authors', 'is_causal_relation', 'is_direct_causal_relation', 'is_associative_relation', 'has_polarity', 'edge_has_activity', 'edge_has_degradation', 'edge_has_translocation', 'edge_has_annotation', 'has_pathology_causal', ] def edge_predicate(func): # noqa: D202 """Decorate an edge predicate function that only takes a dictionary as its singular argument. Apply this as a decorator to a function that takes a single argument, a PyBEL node data dictionary, to make sure that it can also accept a pair of arguments, a BELGraph and a PyBEL node tuple as well. :type func: (dict) -> bool :rtype: (pybel.BELGraph, tuple, tuple, int) -> bool """ @wraps(func) def _wrapped(*args): x = args[0] if isinstance(x, BELGraph): u, v, k = args[1:4] return func(x[u][v][k]) return func(*args) return _wrapped def keep_edge_permissive(*args, **kwargs): """Return true for all edges. :param dict data: A PyBEL edge data dictionary from a :class:`pybel.BELGraph` :return: Always returns :code:`True` :rtype: bool """ return True @edge_predicate def has_provenance(data): """Check if the edge has provenance information (i.e. citation and evidence). :param dict data: The edge data dictionary :return: If the edge has both a citation and and evidence entry :rtype: bool """ return CITATION in data and EVIDENCE in data @edge_predicate def has_pubmed(data): """Check if the edge has a PubMed citation. :param dict data: A PyBEL edge data dictionary from a :class:`pybel.BELGraph` :return: Does the edge data dictionary has a PubMed citation? :rtype: bool """ return CITATION in data and CITATION_TYPE_PUBMED == data[CITATION][CITATION_TYPE] @edge_predicate def has_authors(data): """Check if the edge contains author information for its citation. :param dict data: A PyBEL edge data dictionary from a :class:`pybel.BELGraph` :return: Does the edge's citation data dictionary have authors included? :rtype: bool """ return CITATION in data and CITATION_AUTHORS in data[CITATION] and data[CITATION][CITATION_AUTHORS] @edge_predicate def is_causal_relation(data): """Check if the given relation is causal. :param dict data: The PyBEL edge data dictionary :rtype: bool """ return data[RELATION] in CAUSAL_RELATIONS @edge_predicate def is_direct_causal_relation(data): """Check if the edge is a direct causal relation. :param dict data: The PyBEL edge data dictionary :rtype: bool """ return data[RELATION] in DIRECT_CAUSAL_RELATIONS @edge_predicate def is_associative_relation(data): """Check if the edge has an association relation. :param dict data: The PyBEL edge data dictionary :return: If the edge is a causal edge :rtype: bool """ return data[RELATION] == ASSOCIATION @edge_predicate def has_polarity(data): """Check if the edge has polarity. :param dict data: The edge data dictionary :return: If the edge is a polar edge :rtype: bool """ return data[RELATION] in POLAR_RELATIONS def _has_modifier(data, modifier): """Check if the edge has the given modifier. :param dict data: The edge data dictionary :param str modifier: The modifier to check. One of :data:`pybel.constants.ACTIVITY`, :data:`pybel.constants.DEGRADATION`, or :data:`pybel.constants.TRANSLOCATION`. :return: Does either the subject or object have the given modifier :rtype: bool """ return part_has_modifier(data, SUBJECT, modifier) or part_has_modifier(data, OBJECT, modifier) @edge_predicate def edge_has_activity(data): """Check if the edge contains an activity in either the subject or object. :param dict data: The edge data dictionary :return: If the edge contains an activity in either the subject or object :rtype: bool """ return _has_modifier(data, ACTIVITY) @edge_predicate def edge_has_translocation(data): """Check if the edge has a translocation in either the subject or object. :param dict data: The edge data dictionary :return: If the edge has a translocation in either the subject or object :rtype: bool """ return _has_modifier(data, TRANSLOCATION) @edge_predicate def edge_has_degradation(data): """Check if the edge contains a degradation in either the subject or object. :param dict data: The edge data dictionary :return: If the edge contains a degradation in either the subject or object :rtype: bool """ return _has_modifier(data, DEGRADATION) def edge_has_annotation(data, key): """Check if an edge has the given annotation. :param dict data: The data dictionary from a BELGraph's edge :param str key: An annotation key :return: If the annotation key is present in the current data dictionary :rtype: Optional[Any] For example, it might be useful to print all edges that are annotated with 'Subgraph': >>> from pybel.examples import sialic_acid_graph >>> for u, v, data in sialic_acid_graph.edges(data=True): >>> if edge_has_annotation(data, 'Species') >>> print(u, v, data) """ annotations = data.get(ANNOTATIONS) if annotations is None: return return annotations.get(key) def has_pathology_causal(graph, u, v, k): """Check if the subject is a pathology and has a causal relationship with a non bioprocess/pathology. :param pybel.BELGraph graph: A BEL Graph :param BaseEntity u: A BEL node :param BaseEntity v: A BEL node :param str k: The edge key between the given nodes :return: If the subject of this edge is a pathology and it participates in a causal reaction. :rtype: bool """ return ( isinstance(u, Pathology) and is_causal_relation(graph, u, v, k) and not isinstance(v, (Pathology, BiologicalProcess)) ) pybel-0.12.1/src/pybel/struct/filters/node_filters.py000066400000000000000000000115631334645200200226400ustar00rootroot00000000000000# -*- coding: utf-8 -*- """ Node Filters ------------ A node predicate is a function that takes two arguments: a :class:`BELGraph` and a node tuple. It returns a boolean representing whether the node passed the given test. This module contains a set of default functions for filtering lists of nodes and building node predicates. A general use for a node predicate is to use the built-in :func:`filter` in code like :code:`filter(your_node_predicate, graph)` """ from collections import Iterable from .node_predicates import keep_node_permissive __all__ = [ 'invert_node_predicate', 'concatenate_node_predicates', 'filter_nodes', 'get_nodes', 'count_passed_node_filter', ] def invert_node_predicate(node_predicate): """Build a node predicate that is the inverse of the given node predicate. :param node_predicate: An edge predicate :type node_predicate: (pybel.BELGraph, BaseEntity) -> bool :rtype: (pybel.BELGraph, BaseEntity) -> bool """ def inverse_predicate(graph, node): """Return the inverse of the enclosed node predicate applied to the graph and node. :type graph: pybel.BELGraph :type node: tuple :return: bool """ return not node_predicate(graph, node) return inverse_predicate def concatenate_node_predicates(node_predicates=None): """Concatenate multiple node predicates to a new predicate that requires all predicates to be met. :param node_predicates: A predicate or list of predicates (graph, node) -> bool :type node_predicates: None or (pybel.BELGraph, BaseEntity) -> bool or iter[(pybel.BELGraph, BaseEntity) -> bool] :return: A combine predicate (graph, node) -> bool :rtype: (pybel.BELGraph, BaseEntity) -> bool Example usage: >>> from pybel.dsl import protein, gene >>> from pybel.struct.filters.node_predicates import not_pathology, node_exclusion_predicate_builder >>> app_protein = protein(name='APP', namespace='HGNC') >>> app_gene = gene(name='APP', namespace='HGNC') >>> app_predicate = node_exclusion_predicate_builder([app_protein, app_gene]) >>> my_predicate = concatenate_node_predicates([not_pathology, app_predicate]) """ # If no predicates are given, then return the trivially permissive predicate if not node_predicates: return keep_node_permissive # If a predicate outside a list is given, just return it if not isinstance(node_predicates, Iterable): return node_predicates node_predicates = list(node_predicates) # If only one predicate is given, don't bother wrapping it if 1 == len(node_predicates): return node_predicates[0] def concatenated_node_predicate(graph, node): """Pass only for a nodes that pass all enclosed predicates. :param BELGraph graph: A BEL Graph :param tuple node: A BEL node :return: If the node passes all enclosed predicates :rtype: bool """ return all( node_predicate(graph, node) for node_predicate in node_predicates ) return concatenated_node_predicate def filter_nodes(graph, node_predicates=None): """Apply a set of predicates to the nodes iterator of a BEL graph. :param BELGraph graph: A BEL graph :param node_predicates: A node predicate or list/tuple of node predicates :type node_predicates: None or ((pybel.BELGraph, BaseEntity) -> bool) or iter[(pybel.BELGraph, BaseEntity) -> bool] :return: An iterable of nodes that pass all predicates :rtype: iter[tuple] """ if not node_predicates: # If no predicates are given, return the standard node iterator for node in graph: yield node else: concatenated_predicate = concatenate_node_predicates(node_predicates=node_predicates) for node in graph: if concatenated_predicate(graph, node): yield node def get_nodes(graph, node_predicates=None): """Get the set of all nodes that pass the predicates. :param BELGraph graph: A BEL graph :param node_predicates: A node predicate or list/tuple of node predicates :type node_predicates: None or ((pybel.BELGraph, BaseEntity) -> bool) or iter[(pybel.BELGraph, BaseEntity) -> bool] :return: The set of nodes passing the predicates :rtype: set[tuple] """ return set(filter_nodes(graph, node_predicates=node_predicates)) def count_passed_node_filter(graph, node_predicates=None): """Count how many nodes pass a given set of node predicates. :param pybel.BELGraph graph: A BEL graph :param node_predicates: A node predicate or list/tuple of node predicates :type node_predicates: None or ((pybel.BELGraph, BaseEntity) -> bool) or iter[(pybel.BELGraph, BaseEntity) -> bool] :return: The number of nodes passing the given set of predicates :rtype: int """ return sum(1 for _ in filter_nodes(graph, node_predicates=node_predicates)) pybel-0.12.1/src/pybel/struct/filters/node_predicate_builders.py000066400000000000000000000135541334645200200250230ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for building node predicates.""" from collections import Iterable from six import string_types from ...constants import NAME from ...dsl import BaseEntity __all__ = [ 'function_inclusion_filter_builder', 'data_missing_key_builder', 'build_node_data_search', 'build_node_graph_data_search', 'build_node_key_search', 'build_node_name_search', ] def function_inclusion_filter_builder(func): """Build a filter that only passes on nodes of the given function(s). :param func: A BEL Function or list/set/tuple of BEL functions :type func: str or iter[str] :return: A node filter (graph, node) -> bool :rtype: (pybel.BELGraph, BaseEntity) -> bool """ if isinstance(func, string_types): return _single_function_inclusion_filter_builder(func) elif isinstance(func, Iterable): return _collection_function_inclusion_builder(func) raise TypeError('Invalid type for argument: {}'.format(func)) def _single_function_inclusion_filter_builder(func): """ :param str func: A BEL function :type func: str :return: (pybel.BELGraph, BaseEntity) -> bool """ def function_inclusion_filter(graph, node): """Passes only for a node that has the enclosed function :param BELGraph graph: A BEL Graph :param BaseEntity node: A BEL node :return: If the node doesn't have the enclosed function :rtype: bool """ return node.function == func return function_inclusion_filter def _collection_function_inclusion_builder(funcs): """ :param funcs: A sequence of BEL functions :type funcs: iter[str] :return: (pybel.BELGraph, BaseEntity) -> bool """ funcs = set(funcs) if not funcs: raise ValueError('can not build function inclusion filter with empty list of functions') def functions_inclusion_filter(graph, node): """Passes only for a node that is one of the enclosed functions :param BELGraph graph: A BEL Graph :param BaseEntity node: A BEL node :return: If the node doesn't have the enclosed functions :rtype: bool """ return node.function in funcs return functions_inclusion_filter def data_missing_key_builder(key): """Build a filter that passes only on nodes that don't have the given key in their data dictionary. :param str key: A key for the node's data dictionary :return: A node filter (graph, node) -> bool :rtype: (pybel.BELGraph, BaseEntity) -> bool """ def data_does_not_contain_key(graph, node): """Pass only for a node that doesn't contain the enclosed key in its data dictionary. :param pybel.BELGraph graph: A BEL Graph :param tuple node: A BEL node :return: If the node doesn't contain the enclosed key in its data dictionary :rtype: bool """ return key not in graph.nodes[node] return data_does_not_contain_key def build_node_data_search(key, data_predicate): """Pass for nodes who have the given key in their data dictionaries and whose associated values pass the given filter function. :param str key: The node data dictionary key to check :param data_predicate: The filter to apply to the node data dictionary :type data_predicate: (Any) -> bool :return: A node predicate :rtype: (pybel.BELGraph, BaseEntity) -> bool """ def node_data_filter(graph, node): """Pass if the given node has a given data annotated and passes the contained filter. :type graph: pybel.BELGraph :type node: BaseEntity :return: If the node has the contained key in its data dictionary and passes the contained filter :rtype: bool """ value = node.get(key) return value is not None and data_predicate(value) return node_data_filter def build_node_graph_data_search(key, data_predicate): """Build a function for testing data associated with the node in the graph. :param str key: The node data dictionary key to check :param data_predicate: The filter to apply to the node data dictionary :type data_predicate: (Any) -> bool :return: A node predicate :rtype: (pybel.BELGraph, BaseEntity) -> bool """ def node_data_filter(graph, node): """Pass if the given node has a given data annotated and passes the contained filter. :type graph: pybel.BELGraph :type node: BaseEntity :return: If the node has the contained key in its data dictionary and passes the contained filter :rtype: bool """ value = graph.nodes[node].get(key) return value is not None and data_predicate(value) return node_data_filter def build_node_key_search(query, key): """Build a node filter that only passes for nodes whose values for the given key are superstrings of the query string(s). :param query: The query string or strings to check if they're in the node name :type query: str or iter[str] :param str key: The key for the node data dictionary. Should refer only to entries that have str values :return: A node predicate :rtype: (pybel.BELGraph, BaseEntity) -> bool """ if isinstance(query, string_types): return build_node_data_search(key, lambda s: query.lower() in s.lower()) if isinstance(query, Iterable): return build_node_data_search(key, lambda s: any(q.lower() in s.lower() for q in query)) raise TypeError('query is wrong type: %s', query) def build_node_name_search(query): """Search nodes' names. Is a thin wrapper around :func:`build_node_key_search` with :data:`pybel.constants.NAME` :param query: The query string or strings to check if they're in the node name :type query: str or iter[str] :return: A node predicate :rtype: (pybel.BELGraph, BaseEntity) -> bool """ return build_node_key_search(query=query, key=NAME) pybel-0.12.1/src/pybel/struct/filters/node_predicates.py000066400000000000000000000231161334645200200233100ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Pre-defined predicates for nodes.""" from functools import wraps from .node_predicate_builders import function_inclusion_filter_builder from .utils import part_has_modifier from ..graph import BELGraph from ...constants import ( ABUNDANCE, ACTIVITY, CAUSAL_RELATIONS, DEGRADATION, FRAGMENT, FUNCTION, GENE, GMOD, HGVS, KIND, MIRNA, OBJECT, PATHOLOGY, PMOD, PROTEIN, RELATION, RNA, SUBJECT, TRANSLOCATION, VARIANTS, ) from ...dsl import BaseEntity __all__ = [ 'node_predicate', 'keep_node_permissive', 'is_abundance', 'is_gene', 'is_protein', 'is_pathology', 'not_pathology', 'has_variant', 'has_protein_modification', 'has_gene_modification', 'has_hgvs', 'has_fragment', 'has_activity', 'is_degraded', 'is_translocated', 'has_causal_in_edges', 'has_causal_out_edges', 'node_inclusion_predicate_builder', 'node_exclusion_predicate_builder', 'is_causal_source', 'is_causal_sink', 'is_causal_central', ] def node_predicate(f): """Tag a node predicate that takes a dictionary to also accept a pair of (BELGraph, tuple). Apply this as a decorator to a function that takes a single argument, a PyBEL node data dictionary, to make sure that it can also accept a pair of arguments, a BELGraph and a PyBEL node tuple as well. :type f: (dict) -> bool :rtype: (dict) or (pybel.BELGraph,tuple,*) -> bool """ @wraps(f) def wrapped(*args): x = args[0] if isinstance(x, BELGraph): return f(args[1], *args[2:]) # Assume: # if isinstance(x, dict): return f(*args) return wrapped @node_predicate def keep_node_permissive(data): """Return true for all nodes. Given BEL graph :code:`graph`, applying :func:`keep_node_permissive` with a predicate on the nodes iterable as in :code:`filter(keep_node_permissive, graph)` will result in the same iterable as iterating directly over a :class:`BELGraph` :param dict data: A PyBEL data dictionary :return: Always returns :data:`True` :rtype: bool """ return True @node_predicate def is_abundance(data): """Return true if the node is an abundance. :param dict data: A PyBEL data dictionary :rtype: bool """ return data[FUNCTION] == ABUNDANCE @node_predicate def is_gene(data): """Return true if the node is a gene. :param dict data: A PyBEL data dictionary :rtype: bool """ return data[FUNCTION] == GENE @node_predicate def is_protein(data): """Return true if the node is a protein. :param dict data: A PyBEL data dictionary :rtype: bool """ return data[FUNCTION] == PROTEIN is_central_dogma = function_inclusion_filter_builder([GENE, RNA, MIRNA, PROTEIN]) """Return true if the node is a gene, RNA, miRNA, or Protein. :param dict data: A PyBEL data dictionary :rtype: bool """ @node_predicate def is_pathology(data): """Return true if the node is a pathology. :param dict data: A PyBEL data dictionary :rtype: bool """ return data[FUNCTION] == PATHOLOGY @node_predicate def not_pathology(data): """Return false if the node is a pathology. :param dict data: A PyBEL data dictionary :rtype: bool """ return data[FUNCTION] != PATHOLOGY @node_predicate def has_variant(data): """Return true if the node has any variants. :param dict data: A PyBEL data dictionary :rtype: bool """ return VARIANTS in data def _node_has_variant(data, variant): """Return true if the node has at least one of the given variant. :param dict data: A PyBEL data dictionary :param str variant: :data:`PMOD`, :data:`HGVS`, :data:`GMOD`, or :data:`FRAGMENT` :rtype: bool """ return VARIANTS in data and any( variant_dict[KIND] == variant for variant_dict in data[VARIANTS] ) @node_predicate def has_protein_modification(data): """Return true if the node has a protein modification variant. :param dict data: A PyBEL data dictionary :rtype: bool """ return _node_has_variant(data, PMOD) @node_predicate def has_gene_modification(data): """Return true if the node has a gene modification. :param dict data: A PyBEL data dictionary :rtype: bool """ return _node_has_variant(data, GMOD) @node_predicate def has_hgvs(data): """Return true if the node has an HGVS variant. :param dict data: A PyBEL data dictionary :rtype: bool """ return _node_has_variant(data, HGVS) @node_predicate def has_fragment(data): """Return true if the node has a fragment. :param dict data: A PyBEL data dictionary :rtype: bool """ return _node_has_variant(data, FRAGMENT) def _node_has_modifier(graph, node, modifier): """Return true if over any of a nodes edges, it has a given modifier. Modifier can be one of: - :data:`pybel.constants.ACTIVITY`, - :data:`pybel.constants.DEGRADATION` - :data:`pybel.constants.TRANSLOCATION`. :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :param str modifier: One of :data:`pybel.constants.ACTIVITY`, :data:`pybel.constants.DEGRADATION`, or :data:`pybel.constants.TRANSLOCATION` :return: If the node has a known modifier :rtype: bool """ modifier_in_subject = any( part_has_modifier(d, SUBJECT, modifier) for _, _, d in graph.out_edges(node, data=True) ) modifier_in_object = any( part_has_modifier(d, OBJECT, modifier) for _, _, d in graph.in_edges(node, data=True) ) return modifier_in_subject or modifier_in_object def has_activity(graph, node): """Return true if over any of the node's edges, it has a molecular activity. :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :return: If the node has a known molecular activity :rtype: bool """ return _node_has_modifier(graph, node, ACTIVITY) def is_degraded(graph, node): """Return true if over any of the node's edges, it is degraded. :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :return: If the node has a known degradation :rtype: bool """ return _node_has_modifier(graph, node, DEGRADATION) def is_translocated(graph, node): """Return true if over any of the node's edges, it is translocated. :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :return: If the node has a known translocation :rtype: bool """ return _node_has_modifier(graph, node, TRANSLOCATION) def has_causal_in_edges(graph, node): """Return true if the node contains any in_edges that are causal. :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :rtype: bool """ return any( data[RELATION] in CAUSAL_RELATIONS for _, _, data in graph.in_edges(node, data=True) ) def has_causal_out_edges(graph, node): """Return true if the node contains any out_edges that are causal. :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :rtype: bool """ return any( data[RELATION] in CAUSAL_RELATIONS for _, _, data in graph.out_edges(node, data=True) ) def node_exclusion_predicate_builder(nodes): """Build a node predicate that returns false for the given nodes. :param nodes: A list of PyBEL node data dictionaries or PyBEL node tuples :type nodes: iter[BaseEntity] :rtype: (BELGraph, BaseEntity) -> bool """ nodes = set(nodes) @node_predicate def node_exclusion_predicate(node): """Returns true if the node is not in the given set of nodes :param BaseEntity node: A PyBEL data dictionary :rtype: bool """ return node not in nodes return node_exclusion_predicate def node_inclusion_predicate_builder(nodes): """Build a function that returns true for the given nodes. :param nodes: A list of PyBEL node data dictionaries or PyBEL node tuples :type nodes: iter[BaseEntity] :rtype: (BELGraph, BaseEntity) -> bool """ nodes = set(nodes) @node_predicate def node_inclusion_predicate(node): """Returns true if the node is in the given set of nodes :param BaseEntity node: A PyBEL data dictionary :rtype: bool """ return node in nodes return node_inclusion_predicate def is_causal_source(graph, node): """Return true of the node is a causal source. - Doesn't have any causal in edge(s) - Does have causal out edge(s) :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :return: If the node is a causal source :rtype: bool """ # TODO reimplement to be faster return not has_causal_in_edges(graph, node) and has_causal_out_edges(graph, node) def is_causal_sink(graph, node): """Return true if the node is a causal sink. - Does have causal in edge(s) - Doesn't have any causal out edge(s) :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :return: If the node is a causal source :rtype: bool """ return has_causal_in_edges(graph, node) and not has_causal_out_edges(graph, node) def is_causal_central(graph, node): """Return true if the node is neither a causal sink nor a causal source. - Does have causal in edges(s) - Does have causal out edge(s) :param pybel.BELGraph graph: A BEL graph :param tuple node: A BEL node :return: If the node is neither a causal sink nor a causal source :rtype: bool """ return has_causal_in_edges(graph, node) and has_causal_out_edges(graph, node) pybel-0.12.1/src/pybel/struct/filters/node_selection.py000066400000000000000000000011141334645200200231440ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for getting iterables of nodes.""" from .node_filters import filter_nodes from .node_predicate_builders import function_inclusion_filter_builder __all__ = [ 'get_nodes_by_function', ] def get_nodes_by_function(graph, func): """Get all nodes of a given type. :param pybel.BELGraph graph: A BEL graph :param str or iter[str] func: The BEL function to filter by :return: An iterable of all BEL nodes with the given function :rtype: iter[tuple] """ return filter_nodes(graph, function_inclusion_filter_builder(func)) pybel-0.12.1/src/pybel/struct/filters/utils.py000066400000000000000000000012051334645200200213130ustar00rootroot00000000000000# -*- coding: utf-8 -*- from ...constants import MODIFIER __all__ = ['part_has_modifier'] def part_has_modifier(data, part, modifier): """Returns true if the modifier is in the given subject/object part :param dict data: A PyBEL edge data dictionary :param str part: either :data:`pybel.constants.SUBJECT` or :data:`pybel.constants.OBJECT` :param modifier: The modifier to look for :rtype: bool """ part_data = data.get(part) if part_data is None: return False found_modifier = part_data.get(MODIFIER) if found_modifier is None: return False return found_modifier == modifier pybel-0.12.1/src/pybel/struct/graph.py000066400000000000000000001067161334645200200176210ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Contains the main data structure for PyBEL.""" from __future__ import print_function import logging from copy import deepcopy import networkx as nx from six import string_types from .operations import left_full_join, left_node_intersection_join, left_outer_join from ..canonicalize import edge_to_bel from ..constants import ( ANNOTATIONS, ASSOCIATION, CITATION, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_PUBMED, DECREASES, DESCRIPTION, DIRECTLY_DECREASES, DIRECTLY_INCREASES, EQUIVALENT_TO, EVIDENCE, GRAPH_ANNOTATION_LIST, GRAPH_ANNOTATION_PATTERN, GRAPH_ANNOTATION_URL, GRAPH_METADATA, GRAPH_NAMESPACE_PATTERN, GRAPH_NAMESPACE_URL, GRAPH_PYBEL_VERSION, GRAPH_UNCACHED_NAMESPACES, HAS_COMPONENT, HAS_MEMBER, HAS_PRODUCT, HAS_REACTANT, HAS_VARIANT, INCREASES, IS_A, MEMBERS, METADATA_AUTHORS, METADATA_CONTACT, METADATA_COPYRIGHT, METADATA_DESCRIPTION, METADATA_DISCLAIMER, METADATA_LICENSES, METADATA_NAME, METADATA_VERSION, NAMESPACE, OBJECT, ORTHOLOGOUS, PART_OF, PRODUCTS, REACTANTS, RELATION, SUBJECT, TRANSCRIBED_TO, TRANSLATED_TO, VARIANTS, ) from ..dsl import BaseEntity, activity from ..utils import get_version, hash_edge __all__ = [ 'BELGraph', ] log = logging.getLogger(__name__) RESOURCE_DICTIONARY_NAMES = ( GRAPH_NAMESPACE_URL, GRAPH_NAMESPACE_PATTERN, GRAPH_ANNOTATION_URL, GRAPH_ANNOTATION_PATTERN, GRAPH_ANNOTATION_LIST, ) def _clean_annotations(annotations_dict): """Fix the formatting of annotation dict. :type annotations_dict: dict[str,str] or dict[str,set[str]] or dict[str,dict[str,bool]] :rtype: dict[str,dict[str,bool]] """ return { key: ( values if isinstance(values, dict) else {v: True for v in values} if isinstance(values, set) else {values: True} ) for key, values in annotations_dict.items() } class BELGraph(nx.MultiDiGraph): """An extension to :class:`networkx.MultiDiGraph` to represent BEL.""" def __init__(self, name=None, version=None, description=None, authors=None, contact=None, license=None, copyright=None, disclaimer=None, data=None, **kwargs): """The default constructor parses a BEL graph using the built-in :mod:`networkx` methods. :param str name: The graph's name :param str version: The graph's version. Recommended to use `semantic versioning `_ or ``YYYYMMDD`` format. :param str description: A description of the graph :param str authors: The authors of this graph :param str contact: The contact email for this graph :param str license: The license for this graph :param str copyright: The copyright for this graph :param str disclaimer: The disclaimer for this graph :param data: initial graph data to pass to :class:`networkx.MultiDiGraph` :param kwargs: keyword arguments to pass to :class:`networkx.MultiDiGraph` For IO, see the :mod:`pybel.io` module. """ # TODO check that kwargs doesn't use any special pybel ones! super(BELGraph, self).__init__(data=data, **kwargs) self._warnings = [] if GRAPH_METADATA not in self.graph: self.graph[GRAPH_METADATA] = {} if name: self.name = name if version: self.version = version if description: self.description = description if authors: self.authors = authors if contact: self.contact = contact if license: self.license = license if copyright: self.copyright = copyright if disclaimer: self.disclaimer = disclaimer if GRAPH_PYBEL_VERSION not in self.graph: self.graph[GRAPH_PYBEL_VERSION] = get_version() for resource_dict in RESOURCE_DICTIONARY_NAMES: if resource_dict not in self.graph: self.graph[resource_dict] = {} if GRAPH_UNCACHED_NAMESPACES not in self.graph: self.graph[GRAPH_UNCACHED_NAMESPACES] = set() def fresh_copy(self): """Create an unfilled :class:`BELGraph` as a hook for other :mod:`networkx` functions. Is necessary for .copy() to work. :rtype: BELGraph """ return BELGraph() @property def document(self): """A dictionary holding the metadata from the "Document" section of the BEL script. All keys are normalized according to :data:`pybel.constants.DOCUMENT_KEYS` :rtype: dict[str,str] """ return self.graph[GRAPH_METADATA] @property def name(self, *attrs): # Needs *attrs since it's an override """The graph's name, from the ``SET DOCUMENT Name = "..."`` entry in the source BEL script :rtype: str """ return self.document.get(METADATA_NAME) @name.setter def name(self, *attrs, **kwargs): # Needs *attrs and **kwargs since it's an override self.document[METADATA_NAME] = attrs[0] @property def version(self): """The graph's version, from the ``SET DOCUMENT Version = "..."`` entry in the source BEL script :rtype: str """ return self.document.get(METADATA_VERSION) @version.setter def version(self, version): self.document[METADATA_VERSION] = version @property def description(self): """The graph's description, from the ``SET DOCUMENT Description = "..."`` entry in the source BEL Script :rtype: str """ return self.document.get(METADATA_DESCRIPTION) @description.setter def description(self, description): self.document[METADATA_DESCRIPTION] = description @property def authors(self): """The graph's description, from the ``SET DOCUMENT Authors = "..."`` entry in the source BEL Script :rtype: str """ return self.document[METADATA_AUTHORS] @authors.setter def authors(self, authors): self.document[METADATA_AUTHORS] = authors @property def contact(self): """The graph's description, from the ``SET DOCUMENT ContactInfo = "..."`` entry in the source BEL Script :rtype: str """ return self.document.get(METADATA_CONTACT) @contact.setter def contact(self, contact): self.document[METADATA_CONTACT] = contact @property def license(self): """The graph's license, from the `SET DOCUMENT Licenses = "..."`` entry in the source BEL Script :rtype: Optional[str] """ return self.document.get(METADATA_LICENSES) @license.setter def license(self, license): self.document[METADATA_LICENSES] = license @property def copyright(self): """The graph's copyright, from the `SET DOCUMENT Copyright = "..."`` entry in the source BEL Script :rtype: Optional[str] """ return self.document.get(METADATA_COPYRIGHT) @copyright.setter def copyright(self, copyright): self.document[METADATA_COPYRIGHT] = copyright @property def disclaimer(self): """The graph's disclaimer, from the `SET DOCUMENT Disclaimer = "..."`` entry in the source BEL Script :rtype: Optional[str] """ return self.document.get(METADATA_DISCLAIMER) @disclaimer.setter def disclaimer(self, disclaimer): self.document[METADATA_DISCLAIMER] = disclaimer @property def namespace_url(self): """A dictionary mapping the keywords used to create this graph to the URLs of the BELNS files from the ``DEFINE NAMESPACE [key] AS URL "[value]"`` entries in the definitions section. :rtype: dict[str,str] """ return self.graph[GRAPH_NAMESPACE_URL] @property def defined_namespace_keywords(self): """Returns the set of all keywords defined as namespaces in this graph :rtype: set[str] """ return ( set(self.namespace_pattern) | set(self.namespace_url) ) @property def uncached_namespaces(self): """Returns a list of namespaces's URLs that are present in the graph, but cannot be cached due to their corresponding resources' cachable flags being set to "no." :rtype: set[str] """ return self.graph[GRAPH_UNCACHED_NAMESPACES] @property def namespace_pattern(self): """A dictionary mapping the namespace keywords used to create this graph to their regex patterns from the ``DEFINE NAMESPACE [key] AS PATTERN "[value]"`` entries in the definitions section :rtype: dict[str,str] """ return self.graph[GRAPH_NAMESPACE_PATTERN] @property def annotation_url(self): """A dictionary mapping the annotation keywords used to create this graph to the URLs of the BELANNO files from the ``DEFINE ANNOTATION [key] AS URL "[value]"`` entries in the definitions section :rtype: dict[str,str] """ return self.graph[GRAPH_ANNOTATION_URL] @property def annotation_pattern(self): """A dictionary mapping the annotation keywords used to create this graph to their regex patterns from the ``DEFINE ANNOTATION [key] AS PATTERN "[value]"`` entries in the definitions section :rtype: dict[str,str] """ return self.graph[GRAPH_ANNOTATION_PATTERN] @property def annotation_list(self): """A dictionary mapping the keywords of locally defined annotations to a set of their values from the ``DEFINE ANNOTATION [key] AS LIST {"[value]", ...}`` entries in the definitions section :rtype: dict[str,set[str]] """ return self.graph[GRAPH_ANNOTATION_LIST] @property def defined_annotation_keywords(self): """Returns the set of all keywords defined as annotations in this graph :rtype: set[str] """ return ( set(self.annotation_pattern) | set(self.annotation_url) | set(self.annotation_list) ) @property def pybel_version(self): """Stores the version of PyBEL with which this graph was produced as a string :rtype: str """ return self.graph[GRAPH_PYBEL_VERSION] @property def warnings(self): """Warnings are stored in a list of 4-tuples that is a property of the graph object. This tuple respectively contains the line number, the line text, the exception instance, and the context dictionary from the parser at the time of error. :rtype: list[tuple[int,str,Exception,dict[str,str]]] """ return self._warnings def __str__(self): """Stringifies this graph as its name and version pair""" return '{} v{}'.format(self.name, self.version) def skip_storing_namespace(self, namespace): """Checks if the namespace should be skipped :param Optional[str] namespace: :rtype: bool """ return ( namespace is not None and namespace in self.namespace_url and self.namespace_url[namespace] in self.uncached_namespaces ) def add_warning(self, line_number, line, exception, context=None): """Add a warning to the internal warning log in the graph, with optional context information. :param int line_number: The line number on which the exception occurred :param str line: The line on which the exception occurred :param Exception exception: The exception that occurred :param Optional[dict] context: The context from the parser when the exception occurred """ self.warnings.append((line_number, line, exception, {} if context is None else context)) def _help_add_edge(self, u, v, attr): """Help add a pre-built edge. :type u: BaseEntity :type v: BaseEntity :type attr: dict :return: str """ self.add_node_from_data(u) self.add_node_from_data(v) return self._help_add_edge_helper(u, v, attr) def _help_add_edge_helper(self, u, v, attr): key = hash_edge(u, v, attr) if not self.has_edge(u, v, key): self.add_edge(u, v, key=key, **attr) return key def add_unqualified_edge(self, u, v, relation): """Add a unique edge that has no annotations. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str relation: A relationship label from :mod:`pybel.constants` :return: The key for this edge (a unique hash) :rtype: str """ attr = {RELATION: relation} return self._help_add_edge(u, v, attr) def add_transcription(self, u, v): """Add a transcription relation from a gene to an RNA or miRNA node. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, TRANSCRIBED_TO) def add_translation(self, u, v): """Add a translation relation from a RNA to a protein. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, TRANSLATED_TO) def _add_two_way_unqualified_edge(self, u, v, relation): """Add an unqualified edge both ways.""" self.add_unqualified_edge(v, u, relation) return self.add_unqualified_edge(u, v, relation) def add_equivalence(self, u, v): """Add two equivalence relations for the nodes. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self._add_two_way_unqualified_edge(u, v, EQUIVALENT_TO) def add_orthology(self, u, v): """Add two orthology relations for the nodes. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self._add_two_way_unqualified_edge(u, v, ORTHOLOGOUS) def add_is_a(self, u, v): """Add an isA relationship such that ``u isA v``. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, IS_A) def add_part_of(self, u, v): """Add an partOf relationship such that ``u partOf v``. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, PART_OF) def add_has_member(self, u, v): """Add an hasMember relationship such that ``u hasMember v``. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, HAS_MEMBER) def add_has_component(self, u, v): """Add an hasComponent relationship such that u hasComponent v. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, HAS_COMPONENT) def add_has_variant(self, u, v): """Add an hasVariant relationship such that ``u hasVariant v``. :param BaseEntity u: The source node :param BaseEntity v: The target node """ return self.add_unqualified_edge(u, v, HAS_VARIANT) def add_increases(self, u, v, evidence, citation, annotations=None, subject_modifier=None, object_modifier=None, **attr): """Add an increases relationship with :meth:`add_qualified_edge` using :data:`pybel.constants.INCREASES`. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and auto-fills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] subject_modifier: The modifiers (like activity) on the subject node. See data model documentation. :param Optional[dict] object_modifier: The modifiers (like activity) on the object node. See data model documentation. :return: The hash of the edge :rtype: str """ return self.add_qualified_edge(u=u, v=v, relation=INCREASES, evidence=evidence, citation=citation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **attr) def add_directly_increases(self, u, v, evidence, citation, annotations=None, subject_modifier=None, object_modifier=None, **attr): """Add a :data:`pybel.constants.DIRECTLY_INCREASES` with :meth:`add_qualified_edge`. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and auto-fills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] subject_modifier: The modifiers (like activity) on the subject node. See data model documentation. :param Optional[dict] object_modifier: The modifiers (like activity) on the object node. See data model documentation. :return: The hash of the edge :rtype: str """ return self.add_qualified_edge(u=u, v=v, relation=DIRECTLY_INCREASES, evidence=evidence, citation=citation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **attr) def add_decreases(self, u, v, evidence, citation, annotations=None, subject_modifier=None, object_modifier=None, **attr): """Add a :data:`pybel.constants.DECREASES` relationship with :meth:`add_qualified_edge`. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and auto-fills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] subject_modifier: The modifiers (like activity) on the subject node. See data model documentation. :param Optional[dict] object_modifier: The modifiers (like activity) on the object node. See data model documentation. :return: The hash of the edge :rtype: str """ return self.add_qualified_edge(u=u, v=v, relation=DECREASES, evidence=evidence, citation=citation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **attr) def add_directly_decreases(self, u, v, evidence, citation, annotations=None, subject_modifier=None, object_modifier=None, **attr): """Add a :data:`pybel.constants.DIRECTLY_DECREASES` relationship with :meth:`add_qualified_edge`. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and auto-fills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] subject_modifier: The modifiers (like activity) on the subject node. See data model documentation. :param Optional[dict] object_modifier: The modifiers (like activity) on the object node. See data model documentation. :return: The hash of the edge :rtype: str """ return self.add_qualified_edge(u=u, v=v, relation=DIRECTLY_DECREASES, evidence=evidence, citation=citation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **attr) def add_association(self, u, v, evidence, citation, annotations=None, subject_modifier=None, object_modifier=None, **attr): """Add an association relation to the network. Wraps :meth:`add_qualified_edge` for :data:`pybel.constants.ASSOCIATION`. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and auto-fills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] subject_modifier: The modifiers (like activity) on the subject node. See data model documentation. :param Optional[dict] object_modifier: The modifiers (like activity) on the object node. See data model documentation. :return: The hash of the edge :rtype: str """ return self.add_qualified_edge(u=u, v=v, relation=ASSOCIATION, evidence=evidence, citation=citation, annotations=annotations, subject_modifier=subject_modifier, object_modifier=object_modifier, **attr) def add_node_from_data(self, node): """Convert a PyBEL node data dictionary to a canonical PyBEL node and ensures it is in the graph. :param BaseEntity node: A PyBEL node :rtype: BaseEntity """ assert isinstance(node, BaseEntity) if node in self: return node self.add_node(node) if VARIANTS in node: self.add_has_variant(node.get_parent(), node) elif MEMBERS in node: for member in node[MEMBERS]: self.add_has_component(node, member) elif PRODUCTS in node and REACTANTS in node: for reactant_tokens in node[REACTANTS]: self.add_unqualified_edge(node, reactant_tokens, HAS_REACTANT) for product_tokens in node[PRODUCTS]: self.add_unqualified_edge(node, product_tokens, HAS_PRODUCT) return node def add_qualified_edge(self, u, v, relation, evidence, citation, annotations=None, subject_modifier=None, object_modifier=None, **attr): """Add a qualified edge. Qualified edges have a relation, evidence, citation, and optional annotations, subject modifications, and object modifications. :param BaseEntity u: The source node :param BaseEntity v: The target node :param str relation: The type of relation this edge represents :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and auto-fills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] subject_modifier: The modifiers (like activity) on the subject node. See data model documentation. :param Optional[dict] object_modifier: The modifiers (like activity) on the object node. See data model documentation. :return: The hash of the edge :rtype: str """ attr.update({ RELATION: relation, EVIDENCE: evidence, }) if isinstance(citation, string_types): attr[CITATION] = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: citation } elif isinstance(citation, dict): attr[CITATION] = citation else: raise TypeError if annotations: # clean up annotations attr[ANNOTATIONS] = _clean_annotations(annotations) if subject_modifier: attr[SUBJECT] = subject_modifier if object_modifier: attr[OBJECT] = object_modifier return self._help_add_edge(u, v, attr) def add_inhibits(self, u, v, evidence, citation, annotations=None, object_modifier=None): """Add an "inhibits" relationship. A more specific version of add_qualified edge that automatically populates the relation and object modifier :param BaseEntity u: The source node :param BaseEntity v: The target node :param str evidence: The evidence string from an article :param dict[str,str] or str citation: The citation data dictionary for this evidence. If a string is given, assumes it's a PubMed identifier and autofills the citation type. :param annotations: The annotations data dictionary :type annotations: Optional[dict[str,str] or dict[str,set] or dict[str,dict[str,bool]]] :param Optional[dict] object_modifier: A non-default activity. :return: The hash of the edge :rtype: str """ return self.add_qualified_edge( u, v, relation=DECREASES, evidence=evidence, citation=citation, annotations=annotations, object_modifier=object_modifier or activity() ) def _has_edge_attr(self, u, v, key, attr): """ :type u: BaseEntity :type v: BaseEntity :type key: str :type attr: str :rtype: bool """ assert isinstance(u, BaseEntity) assert isinstance(v, BaseEntity) return attr in self[u][v][key] def has_edge_citation(self, u, v, key): """Check if the given edge has a citation. :rtype: bool """ return self._has_edge_attr(u, v, key, CITATION) def has_edge_evidence(self, u, v, key): """Check if the given edge has an evidence. :rtype: boolean """ return self._has_edge_attr(u, v, key, EVIDENCE) def _get_edge_attr(self, u, v, key, attr): return self[u][v][key].get(attr) def get_edge_citation(self, u, v, key): """Get the citation for a given edge. :rtype: Optional[dict] """ return self._get_edge_attr(u, v, key, CITATION) def get_edge_evidence(self, u, v, key): """Get the evidence for a given edge. :rtype: Optional[str] """ return self._get_edge_attr(u, v, key, EVIDENCE) def get_edge_annotations(self, u, v, key): """Get the annotations for a given edge. :rtype: Optional[dict] """ return self._get_edge_attr(u, v, key, ANNOTATIONS) def _get_node_attr(self, node, attr): assert isinstance(node, BaseEntity) return self.nodes[node].get(attr) def _has_node_attr(self, node, attr): assert isinstance(node, BaseEntity) return attr in self.nodes[node] def _set_node_attr(self, node, attr, value): assert isinstance(node, BaseEntity) self.nodes[node][attr] = value def get_node_description(self, node): """Get the description for a given node. :type node: BaseEntity :rtype: Optional[str] """ return self._get_node_attr(node, DESCRIPTION) def has_node_description(self, node): """Check if a node description is already present. :param BaseEntity node: A PyBEL node tuple :rtype: bool """ return self._has_node_attr(node, DESCRIPTION) def set_node_description(self, node, description): """Set the description for a given node. :param BaseEntity node: A PyBEL node :type description: str """ self._set_node_attr(node, DESCRIPTION, description) def __add__(self, other): """Creates a deep copy of this graph and full joins another graph with it using :func:`pybel.struct.left_full_join`. :param BELGraph other: Another BEL graph :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> k = g + h """ if not isinstance(other, BELGraph): raise TypeError('{} is not a {}'.format(other, self.__class__.__name__)) result = deepcopy(self) left_full_join(result, other) return result def __iadd__(self, other): """Full joins another graph into this one using :func:`pybel.struct.left_full_join`. :param BELGraph other: Another BEL graph :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> g += h """ if not isinstance(other, BELGraph): raise TypeError('{} is not a {}'.format(other, self.__class__.__name__)) left_full_join(self, other) return self def __and__(self, other): """Creates a deep copy of this graph and outer joins another graph with it using :func:`pybel.struct.left_outer_join`. :param BELGraph other: Another BEL graph :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> k = g & h """ if not isinstance(other, BELGraph): raise TypeError('{} is not a {}'.format(other, self.__class__.__name__)) result = deepcopy(self) left_outer_join(result, other) return result def __iand__(self, other): """Outer joins another graph into this one using :func:`pybel.struct.left_outer_join`. :param BELGraph other: Another BEL graph :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> g &= h """ if not isinstance(other, BELGraph): raise TypeError('{} is not a {}'.format(other, self.__class__.__name__)) left_outer_join(self, other) return self def __xor__(self, other): """Node intersection joins another graph using :func:`pybel.struct.left_node_intersection_join` :param BELGraph other: Another BEL graph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> k = g ^ h """ if not isinstance(other, BELGraph): raise TypeError('{} is not a {}'.format(other, self.__class__.__name__)) return left_node_intersection_join(self, other) @staticmethod def node_to_bel(n): """Serialize a node as BEL. :param BaseEntity n: A PyBEL node :rtype: str """ return n.as_bel() @staticmethod def edge_to_bel(u, v, data, sep=None): """Serialize a pair of nodes and related edge data as a BEL relation. :type u: BaseEntity :type v: BaseEntity :param dict data: A PyBEL edge data dictionary :param Optional[str] sep: The separator between the source, relation, and target. Defaults to ' ' :rtype: str """ return edge_to_bel(u, v, data=data, sep=sep) def _has_no_equivalent_edge(self, u, v): return not any( EQUIVALENT_TO == data[RELATION] for data in self[u][v].values() ) def _equivalent_node_iterator_helper(self, node, visited): """Iterate over nodes and their data that are equal to the given node, starting with the original. :param BaseEntity node: A PyBEL node :rtype: iter[BaseEntity] """ for v in self[node]: if v in visited: continue if self._has_no_equivalent_edge(node, v): continue yield v visited.add(v) for w in self._equivalent_node_iterator_helper(v, visited): yield w def iter_equivalent_nodes(self, node): """Iterate over nodes that are equivalent to the given node, including the original, :param BaseEntity node: A PyBEL node :rtype: iter[BaseEntity] """ yield node for n in self._equivalent_node_iterator_helper(node, {node}): yield n def get_equivalent_nodes(self, node): """Get a set of equivalent nodes to this node, excluding the given node. :param node: A PyBEL node :type node: BaseEntity :rtype: set[BaseEntity] """ if isinstance(node, BaseEntity): return set(self.iter_equivalent_nodes(node)) return set(self.iter_equivalent_nodes(node)) def _node_has_namespace_helper(self, node, namespace): """Check that the node has namespace information. Might have cross references in future. :param BaseEntity node: A PyBEL node :rtype: bool """ return namespace == node.get(NAMESPACE) def node_has_namespace(self, node, namespace): """Check if the node have the given namespace? This also should look in the equivalent nodes. :param BaseEntity node: A PyBEL node :param str namespace: A namespace :rtype: bool """ return any( self._node_has_namespace_helper(n, namespace) for n in self.iter_equivalent_nodes(node) ) def _describe_list(self): """Return useful information about the graph as a list of tuples. :rtype: list[tuple[str,float]] """ number_nodes = self.number_of_nodes() result = [ ('Number of Nodes', number_nodes), ('Number of Edges', self.number_of_edges()), ('Network Density', '{:.2E}'.format(nx.density(self))), ('Number of Components', nx.number_weakly_connected_components(self)), ] if self.warnings: result.append(('Number of Warnings', len(self.warnings))) return result def summary_dict(self): """Return a dictionary that summarizes the graph. :rtype: dict[str,float] """ return dict(self._describe_list()) def summary_str(self): """Return a string that summarizes the graph. :rtype: str """ return '{}\n'.format(self) + '\n'.join( '{}: {}'.format(label, value) for label, value in self._describe_list() ) def summarize(self, file=None): """Print a summary of the graph. :param Optional[file] file: A file or file-like to print to. Defaults to standard out. """ print(self.summary_str(), file=file) pybel-0.12.1/src/pybel/struct/grouping/000077500000000000000000000000001334645200200177655ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/grouping/__init__.py000066400000000000000000000003661334645200200221030ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for grouping BEL graphs into sub-graphs.""" from . import annotations, provenance from .annotations import * from .provenance import * __all__ = ( annotations.__all__ + provenance.__all__ ) pybel-0.12.1/src/pybel/struct/grouping/annotations.py000066400000000000000000000037461334645200200227060ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for grouping sub-graphs.""" import logging from collections import defaultdict from .utils import cleanup from ...constants import ANNOTATIONS log = logging.getLogger(__name__) __all__ = [ 'get_subgraphs_by_annotation', ] def _get_subgraphs_by_annotation_disregard_undefined(graph, annotation): result = defaultdict(graph.fresh_copy) for source, target, key, data in graph.edges(keys=True, data=True): annotation_dict = data.get(ANNOTATIONS) if annotation_dict is None: continue if annotation not in annotation_dict: continue for value in annotation_dict[annotation]: result[value].add_edge(source, target, key=key, **data) return dict(result) def _get_subgraphs_by_annotation_keep_undefined(graph, annotation, sentinel): result = defaultdict(graph.fresh_copy) for source, target, key, data in graph.edges(keys=True, data=True): annotation_dict = data.get(ANNOTATIONS) if annotation_dict is None or annotation not in annotation_dict: result[sentinel].add_edge(source, target, key=key, **data) else: for value in annotation_dict[annotation]: result[value].add_edge(source, target, key=key, **data) return dict(result) def get_subgraphs_by_annotation(graph, annotation, sentinel=None): """Stratify the given graph into sub-graphs based on the values for edges' annotations. :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to group by :param Optional[str] sentinel: The value to stick unannotated edges into. If none, does not keep undefined. :rtype: dict[str,pybel.BELGraph] """ if sentinel is not None: subgraphs = _get_subgraphs_by_annotation_keep_undefined(graph, annotation, sentinel) else: subgraphs = _get_subgraphs_by_annotation_disregard_undefined(graph, annotation) cleanup(graph, subgraphs) return subgraphs pybel-0.12.1/src/pybel/struct/grouping/provenance.py000066400000000000000000000014031334645200200224750ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utility functions for grouping sub-graphs by citation.""" from collections import defaultdict from .utils import cleanup from ...constants import CITATION, CITATION_REFERENCE, CITATION_TYPE __all__ = [ 'get_subgraphs_by_citation', ] def get_subgraphs_by_citation(graph): """Stratify the graph based on citations. :type graph: pybel.BELGraph :rtype: dict[tuple[str,str],pybel.BELGraph] """ rv = defaultdict(graph.fresh_copy) for u, v, key, data in graph.edges(keys=True, data=True): if CITATION not in data: continue dk = data[CITATION][CITATION_TYPE], data[CITATION][CITATION_REFERENCE] rv[dk].add_edge(u, v, key=key, **data) cleanup(graph, rv) return dict(rv) pybel-0.12.1/src/pybel/struct/grouping/utils.py000066400000000000000000000007051334645200200215010ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utility functions for grouping sub-graphs.""" from ..utils import update_metadata, update_node_helper __all__ = [ 'cleanup', ] def cleanup(graph, subgraphs): """Clean up the metadata in the subgraphs. :type graph: pybel.BELGraph :type subgraphs: dict[Any,pybel.BELGraph] """ for subgraph in subgraphs.values(): update_node_helper(graph, subgraph) update_metadata(graph, subgraph) pybel-0.12.1/src/pybel/struct/mutation/000077500000000000000000000000001334645200200177735ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/mutation/__init__.py000066400000000000000000000013211334645200200221010ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains functions that mutate or make transformations on a network.""" from . import collapse, deletion, expansion, induction, induction_expansion, inference, metadata, transfer, utils from .collapse import * from .deletion import * from .expansion import * from .induction import * from .induction_expansion import * from .inference import * from .metadata import * from .transfer import * from .utils import * __all__ = ( collapse.__all__ + deletion.__all__ + expansion.__all__ + induction.__all__ + induction_expansion.__all__ + inference.__all__ + metadata.__all__ + transfer.__all__ + utils.__all__ ) pybel-0.12.1/src/pybel/struct/mutation/collapse/000077500000000000000000000000001334645200200215755ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/mutation/collapse/__init__.py000066400000000000000000000003651334645200200237120ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for collapsing nodes.""" from . import protein_rna_origins, collapse from .protein_rna_origins import * from .collapse import * __all__ = ( protein_rna_origins.__all__ + collapse.__all__ ) pybel-0.12.1/src/pybel/struct/mutation/collapse/collapse.py000066400000000000000000000047241334645200200237600ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for functions for collapsing nodes.""" from ...filters import filter_edges from ...filters.edge_predicate_builders import build_relation_predicate from ...pipeline import in_place_transformation from ....constants import HAS_VARIANT __all__ = [ 'collapse_pair', 'collapse_nodes', 'collapse_all_variants', ] def _remove_self_edges(graph): self_edges = [ (u, u, k) for u in graph if u in graph[u] for k in graph[u][u] ] graph.remove_edges_from(self_edges) @in_place_transformation def collapse_pair(graph, survivor, victim): """Rewire all edges from the synonymous node to the survivor node, then deletes the synonymous node. Does not keep edges between the two nodes. :param pybel.BELGraph graph: A BEL graph :param tuple survivor: The BEL node to collapse all edges on the synonym to :param tuple victim: The BEL node to collapse into the surviving node """ graph.add_edges_from( (survivor, successor, key, data) for _, successor, key, data in graph.out_edges(victim, keys=True, data=True) if successor != survivor ) graph.add_edges_from( (predecessor, survivor, key, data) for predecessor, _, key, data in graph.in_edges(victim, keys=True, data=True) if predecessor != survivor ) graph.remove_node(victim) # TODO what happens when collapsing is not consistent? Need to build intermediate mappings and test their consistency. @in_place_transformation def collapse_nodes(graph, survivor_mapping): """Collapse all nodes in values to the key nodes, in place. :param pybel.BELGraph graph: A BEL graph :param survivor_mapping: A dictionary with survivors as their keys, and iterables of the corresponding victims as values. :type survivor_mapping: dict[tuple,set[tuple]] """ for survivor, victims in survivor_mapping.items(): for victim in victims: collapse_pair(graph, survivor=survivor, victim=victim) _remove_self_edges(graph) @in_place_transformation def collapse_all_variants(graph): """Collapse all genes', RNAs', miRNAs', and proteins' variants to their parents. :param pybel.BELGraph graph: A BEL Graph """ has_variant_predicate = build_relation_predicate(HAS_VARIANT) edges = list(filter_edges(graph, has_variant_predicate)) for u, v, _ in edges: collapse_pair(graph, survivor=u, victim=v) _remove_self_edges(graph) pybel-0.12.1/src/pybel/struct/mutation/collapse/protein_rna_origins.py000066400000000000000000000027371334645200200262320ustar00rootroot00000000000000# -*- coding: utf-8 -*- from collections import defaultdict from .collapse import collapse_nodes from ..inference import enrich_protein_and_rna_origins from ...pipeline.decorators import in_place_transformation, register_deprecated from ....constants import RELATION, TRANSCRIBED_TO, TRANSLATED_TO __all__ = [ 'collapse_to_genes', ] def _build_collapse_to_gene_dict(graph): """ :param pybel.BELGraph graph: A BEL graph :return: A dictionary of {node: set of PyBEL node tuples} :rtype: dict[tuple,set[tuple]] """ collapse_dict = defaultdict(set) r2g = {} for gene_node, rna_node, d in graph.edges(data=True): if d[RELATION] != TRANSCRIBED_TO: continue collapse_dict[gene_node].add(rna_node) r2g[rna_node] = gene_node for rna_node, protein_node, d in graph.edges(data=True): if d[RELATION] != TRANSLATED_TO: continue if rna_node not in r2g: raise ValueError('Should complete origin before running this function') collapse_dict[r2g[rna_node]].add(protein_node) return collapse_dict @register_deprecated('collapse_by_central_dogma') @in_place_transformation def collapse_to_genes(graph): """Collapse all protein, RNA, and miRNA nodes to their corresponding gene nodes. :param pybel.BELGraph graph: A BEL graph """ enrich_protein_and_rna_origins(graph) collapse_dict = _build_collapse_to_gene_dict(graph) collapse_nodes(graph, collapse_dict) pybel-0.12.1/src/pybel/struct/mutation/deletion/000077500000000000000000000000001334645200200215765ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/mutation/deletion/__init__.py000066400000000000000000000004141334645200200237060ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Modules supporting deletion and degradation of graphs.""" from . import protein_rna_origins, deletion from .protein_rna_origins import * from .deletion import * __all__ = ( protein_rna_origins.__all__ + deletion.__all__ ) pybel-0.12.1/src/pybel/struct/mutation/deletion/deletion.py000066400000000000000000000043201334645200200237520ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for deleting nodes and edges in networks.""" from ...filters.edge_filters import filter_edges from ...filters.edge_predicates import is_associative_relation from ...filters.node_filters import filter_nodes from ...filters.node_predicate_builders import function_inclusion_filter_builder from ...pipeline import in_place_transformation from ....constants import BIOPROCESS, PATHOLOGY __all__ = [ 'remove_filtered_edges', 'remove_filtered_nodes', 'remove_associations', 'remove_pathologies', 'remove_biological_processes', ] @in_place_transformation def remove_filtered_edges(graph, edge_predicates=None): """Remove edges passing the given edge predicates. :param pybel.BELGraph graph: A BEL graph :param edge_predicates: A predicate or list of predicates :type edge_predicates: None or ((pybel.BELGraph, tuple, tuple, int) -> bool) or iter[(pybel.BELGraph, tuple, tuple, int) -> bool]] :return: """ edges = list(filter_edges(graph, edge_predicates=edge_predicates)) graph.remove_edges_from(edges) @in_place_transformation def remove_filtered_nodes(graph, node_predicates=None): """Remove nodes passing the given node predicates. :param pybel.BELGraph graph: A BEL graph :type node_predicates: None or ((pybel.BELGraph, tuple) -> bool) or iter[(pybel.BELGraph, tuple) -> bool)] """ nodes = list(filter_nodes(graph, node_predicates=node_predicates)) graph.remove_nodes_from(nodes) @in_place_transformation def remove_associations(graph): """Remove all associative relationships from the graph. :param pybel.BELGraph graph: A BEL graph """ remove_filtered_edges(graph, is_associative_relation) @in_place_transformation def remove_pathologies(graph): """Remove pathology nodes from the graph. :param pybel.BELGraph graph: A BEL graph """ remove_filtered_nodes(graph, node_predicates=function_inclusion_filter_builder(PATHOLOGY)) @in_place_transformation def remove_biological_processes(graph): """Remove biological process nodes from the graph. :param pybel.BELGraph graph: A BEL graph """ remove_filtered_nodes(graph, node_predicates=function_inclusion_filter_builder(BIOPROCESS)) pybel-0.12.1/src/pybel/struct/mutation/deletion/protein_rna_origins.py000066400000000000000000000043031334645200200262220ustar00rootroot00000000000000# -*- coding: utf-8 -*- from ...filters.node_selection import get_nodes_by_function from ...pipeline.decorators import in_place_transformation, register_deprecated from ....constants import GENE, RELATION, RNA, TRANSCRIBED_TO, TRANSLATED_TO __all__ = [ 'prune_protein_rna_origins', ] def get_gene_leaves(graph): """Iterate over all genes who have only one connection, that's a transcription to its RNA. :param pybel.BELGraph graph: A BEL graph :rtype: iter[tuple] """ for node in get_nodes_by_function(graph, GENE): if graph.in_degree(node) != 0: continue if graph.out_degree(node) != 1: continue _, _, d = list(graph.out_edges(node, data=True))[0] if d[RELATION] == TRANSCRIBED_TO: yield node def get_rna_leaves(graph): """Iterate over all RNAs who have only one connection, that's a translation to its protein. :param pybel.BELGraph graph: A BEL graph :rtype: iter[tuple] """ for node in get_nodes_by_function(graph, RNA): if graph.in_degree(node) != 0: continue if graph.out_degree(node) != 1: continue _, _, d = list(graph.out_edges(node, data=True))[0] if d[RELATION] == TRANSLATED_TO: yield node @in_place_transformation def prune_rna_origins(graph): """Delete gene nodes that are only connected to one node, their correspond RNA, by a transcription edge. :param pybel.BELGraph graph: A BEL graph """ gene_leaves = list(get_gene_leaves(graph)) graph.remove_nodes_from(gene_leaves) @in_place_transformation def prune_protein_origins(graph): """Delete RNA nodes that are only connected to one node - their correspond protein - by a translation edge. :param pybel.BELGraph graph: A BEL graph """ rna_leaves = list(get_rna_leaves(graph)) graph.remove_nodes_from(rna_leaves) @register_deprecated('prune_central_dogma') @in_place_transformation def prune_protein_rna_origins(graph): """Delete genes that are only connected to one node, their correspond RNA, by a translation edge. :param pybel.BELGraph graph: A BEL graph """ prune_rna_origins(graph) prune_protein_origins(graph) pybel-0.12.1/src/pybel/struct/mutation/expansion/000077500000000000000000000000001334645200200217775ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/mutation/expansion/__init__.py000066400000000000000000000002711334645200200241100ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import neighborhood, upstream from .neighborhood import * from .upstream import * __all__ = ( neighborhood.__all__ + upstream.__all__ ) pybel-0.12.1/src/pybel/struct/mutation/expansion/neighborhood.py000066400000000000000000000073661334645200200250340ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for expanding the neighborhoods of nodes.""" from ...filters.node_predicates import is_pathology from ...pipeline import uni_in_place_transformation from ...utils import update_metadata, update_node_helper __all__ = [ 'expand_node_predecessors', 'expand_node_successors', 'expand_node_neighborhood', 'expand_nodes_neighborhoods', 'expand_all_node_neighborhoods', ] @uni_in_place_transformation def expand_node_predecessors(universe, graph, node): """Expands around the predecessors of the given node in the result graph by looking at the universe graph, in place. :param pybel.BELGraph universe: The graph containing the stuff to add :param pybel.BELGraph graph: The graph to add stuff to :param tuple node: A BEL node """ skip_successors = set() for successor in universe.successors(node): if successor in graph: skip_successors.add(successor) continue graph.add_node(successor, **universe.node[successor]) graph.add_edges_from( (source, successor, key, data) for source, successor, key, data in universe.out_edges(node, data=True, keys=True) if successor not in skip_successors ) update_node_helper(universe, graph) update_metadata(universe, graph) @uni_in_place_transformation def expand_node_successors(universe, graph, node): """Expands around the successors of the given node in the result graph by looking at the universe graph, in place. :param pybel.BELGraph universe: The graph containing the stuff to add :param pybel.BELGraph graph: The graph to add stuff to :param tuple node: A BEL node """ skip_predecessors = set() for predecessor in universe.predecessors(node): if predecessor in graph: skip_predecessors.add(predecessor) continue graph.add_node(predecessor, **universe.node[predecessor]) graph.add_edges_from( (predecessor, target, key, data) for predecessor, target, key, data in universe.in_edges(node, data=True, keys=True) if predecessor not in skip_predecessors ) update_node_helper(universe, graph) update_metadata(universe, graph) @uni_in_place_transformation def expand_node_neighborhood(universe, graph, node): """Expands around the neighborhoods of the given node in the result graph by looking at the universe graph, in place. :param pybel.BELGraph universe: The graph containing the stuff to add :param pybel.BELGraph graph: The graph to add stuff to :param tuple node: A BEL node """ expand_node_predecessors(universe, graph, node) expand_node_successors(universe, graph, node) @uni_in_place_transformation def expand_nodes_neighborhoods(universe, graph, nodes): """Expands around the neighborhoods of the given node in the result graph by looking at the universe graph, in place. :param pybel.BELGraph universe: The graph containing the stuff to add :param pybel.BELGraph graph: The graph to add stuff to :param list[tuple] nodes: A node tuples from the query graph """ for node in nodes: expand_node_neighborhood(universe, graph, node) @uni_in_place_transformation def expand_all_node_neighborhoods(universe, graph, filter_pathologies=False): """Expands the neighborhoods of all nodes in the given graph based on the universe graph. :param pybel.BELGraph universe: The graph containing the stuff to add :param pybel.BELGraph graph: The graph to add stuff to :param bool filter_pathologies: Should expansion take place around pathologies? """ for node in list(graph): if filter_pathologies and is_pathology(node): continue expand_node_neighborhood(universe, graph, node) pybel-0.12.1/src/pybel/struct/mutation/expansion/upstream.py000066400000000000000000000022141334645200200242100ustar00rootroot00000000000000# -*- coding: utf-8 -*- from ..utils import expand_by_edge_filter from ...filters import build_downstream_edge_predicate, build_upstream_edge_predicate from ...pipeline import uni_in_place_transformation __all__ = [ 'expand_upstream_causal', 'expand_downstream_causal', ] @uni_in_place_transformation def expand_upstream_causal(universe, graph): """Add the upstream causal relations to the given sub-graph. :param pybel.BELGraph universe: A BEL graph representing the universe of all knowledge :param pybel.BELGraph graph: The target BEL graph to enrich with upstream causal controllers of contained nodes """ expand_by_edge_filter(universe, graph, build_upstream_edge_predicate(graph)) @uni_in_place_transformation def expand_downstream_causal(universe, graph): """Add the downstream causal relations to the given sub-graph. :param pybel.BELGraph universe: A BEL graph representing the universe of all knowledge :param pybel.BELGraph graph: The target BEL graph to enrich with upstream causal controllers of contained nodes """ expand_by_edge_filter(universe, graph, build_downstream_edge_predicate(graph)) pybel-0.12.1/src/pybel/struct/mutation/induction/000077500000000000000000000000001334645200200217675ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/mutation/induction/__init__.py000066400000000000000000000007661334645200200241110ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import annotations, citation, neighborhood, paths, random_subgraph, upstream, utils from .annotations import * from .citation import * from .neighborhood import * from .paths import * from .random_subgraph import * from .upstream import * from .utils import * __all__ = ( annotations.__all__ + citation.__all__ + neighborhood.__all__ + paths.__all__ + random_subgraph.__all__ + upstream.__all__ + utils.__all__ ) pybel-0.12.1/src/pybel/struct/mutation/induction/annotations.py000066400000000000000000000034141334645200200247000ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for inducing graphs based on edge annotations.""" import logging from six import string_types from .utils import get_subgraph_by_edge_filter from ...filters import build_annotation_dict_all_filter, build_annotation_dict_any_filter from ...pipeline import transformation log = logging.getLogger(__name__) __all__ = [ 'get_subgraph_by_annotation_value', 'get_subgraph_by_annotations', ] @transformation def get_subgraph_by_annotations(graph, annotations, or_=None): """Induce a sub-graph given an annotations filter. :param graph: pybel.BELGraph graph: A BEL graph :param dict[str,iter[str]] annotations: Annotation filters (match all with :func:`pybel.utils.subdict_matches`) :param boolean or_: if True any annotation should be present, if False all annotations should be present in the edge. Defaults to True. :return: A subgraph of the original BEL graph :rtype: pybel.BELGraph """ edge_filter_builder = ( build_annotation_dict_any_filter if (or_ is None or or_) else build_annotation_dict_all_filter ) return get_subgraph_by_edge_filter(graph, edge_filter_builder(annotations)) @transformation def get_subgraph_by_annotation_value(graph, annotation, values): """Induce a sub-graph over all edges whose annotations match the given key and value. :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to group by :param values: The value(s) for the annotation :type values: str or iter[str] :return: A subgraph of the original BEL graph :rtype: pybel.BELGraph """ if isinstance(values, string_types): values = {values} return get_subgraph_by_annotations(graph, {annotation: values}) pybel-0.12.1/src/pybel/struct/mutation/induction/citation.py000066400000000000000000000023071334645200200241550ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Induction functions based on provenance information.""" import logging from .utils import get_subgraph_by_edge_filter from ...filters.edge_predicate_builders import build_author_inclusion_filter, build_pmid_inclusion_filter from ...pipeline import transformation __all__ = [ 'get_subgraph_by_pubmed', 'get_subgraph_by_authors', ] log = logging.getLogger(__name__) @transformation def get_subgraph_by_pubmed(graph, pubmed_identifiers): """Induce a sub-graph over the edges retrieved from the given PubMed identifier(s). :param pybel.BELGraph graph: A BEL graph :param str or list[str] pubmed_identifiers: A PubMed identifier or list of PubMed identifiers :rtype: pybel.BELGraph """ return get_subgraph_by_edge_filter(graph, build_pmid_inclusion_filter(pubmed_identifiers)) @transformation def get_subgraph_by_authors(graph, authors): """Induce a sub-graph over the edges retrieved publications by the given author(s). :param pybel.BELGraph graph: A BEL graph :param str or list[str] authors: An author or list of authors :rtype: pybel.BELGraph """ return get_subgraph_by_edge_filter(graph, build_author_inclusion_filter(authors)) pybel-0.12.1/src/pybel/struct/mutation/induction/neighborhood.py000066400000000000000000000017621334645200200250160ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for selecting by the neighborhoods of nodes.""" import itertools as itt from ...pipeline import transformation from ...utils import update_metadata, update_node_helper __all__ = [ 'get_subgraph_by_neighborhood', ] @transformation def get_subgraph_by_neighborhood(graph, nodes): """Get a BEL graph around the neighborhoods of the given nodes. Returns none if no nodes are in the graph. :param pybel.BELGraph graph: A BEL graph :param iter[tuple] nodes: An iterable of BEL nodes :return: A BEL graph induced around the neighborhoods of the given nodes :rtype: Optional[pybel.BELGraph] """ node_set = set(nodes) if not any(node in graph for node in node_set): return rv = graph.fresh_copy() rv.add_edges_from(itt.chain( graph.in_edges(nodes, keys=True, data=True), graph.out_edges(nodes, keys=True, data=True), )) update_node_helper(graph, rv) update_metadata(graph, rv) return rv pybel-0.12.1/src/pybel/struct/mutation/induction/paths.py000066400000000000000000000100411334645200200234540ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Induction methods for graphs over shortest paths.""" import itertools as itt import logging import random import networkx as nx from .utils import get_subgraph_by_induction from ...pipeline import transformation from ....constants import FUNCTION, PATHOLOGY __all__ = [ 'get_nodes_in_all_shortest_paths', 'get_subgraph_by_all_shortest_paths', 'get_random_path', ] log = logging.getLogger(__name__) def _remove_pathologies_oop(graph): """Remove pathology nodes from the graph.""" rv = graph.copy() for node, data in rv.nodes(data=True): if data[FUNCTION] == PATHOLOGY: rv.remove_node(node) return rv def _iterate_nodes_in_shortest_paths(graph, nodes, weight=None): """Iterate over nodes in the shortest paths between all pairs of nodes in the given list. :type graph: pybel.BELGraph :type nodes: list[tuple] :param weight: Optional[str] :rtype: iter[tuple] """ for source, target in itt.product(nodes, repeat=2): try: paths = nx.all_shortest_paths(graph, source, target, weight=weight) for path in paths: for node in path: yield node except nx.exception.NetworkXNoPath: continue def get_nodes_in_all_shortest_paths(graph, nodes, weight=None, remove_pathologies=False): """Get a set of nodes in all shortest paths between the given nodes. Thinly wraps :func:`networkx.all_shortest_paths`. :param pybel.BELGraph graph: A BEL graph :param iter[tuple] nodes: The list of nodes to use to use to find all shortest paths :param Optional[str] weight: Edge data key corresponding to the edge weight. If none, uses unweighted search. :param bool remove_pathologies: Should pathology nodes be removed first? :return: A set of nodes appearing in the shortest paths between nodes in the BEL graph :rtype: set[tuple] .. note:: This can be trivially parallelized using :func:`networkx.single_source_shortest_path` """ if remove_pathologies: graph = _remove_pathologies_oop(graph) return set(_iterate_nodes_in_shortest_paths(graph, nodes, weight=weight)) @transformation def get_subgraph_by_all_shortest_paths(graph, nodes, weight=None, remove_pathologies=False): """Induce a subgraph over the nodes in the pairwise shortest paths between all of the nodes in the given list. :param pybel.BELGraph graph: A BEL graph :param iter[tuple] nodes: A set of nodes over which to calculate shortest paths :param str weight: Edge data key corresponding to the edge weight. If None, performs unweighted search :param bool remove_pathologies: Should the pathology nodes be deleted before getting shortest paths? :return: A BEL graph induced over the nodes appearing in the shortest paths between the given nodes :rtype: Optional[pybel.BELGraph] """ query_nodes = [] for node in nodes: if node not in graph: log.debug('%s not in %s', node, graph) continue query_nodes.append(node) if not query_nodes: return induced_nodes = get_nodes_in_all_shortest_paths(graph, query_nodes, weight=weight, remove_pathologies=remove_pathologies) if not induced_nodes: return return get_subgraph_by_induction(graph, induced_nodes) def get_random_path(graph): """Get a random path from the graph as a list of nodes. :param pybel.BELGraph graph: A BEL graph :rtype: list[BaseEntity] """ wg = graph.to_undirected() nodes = wg.nodes() def pick_random_pair(): """Get a pair of random nodes. :rtype: tuple """ return random.sample(nodes, k=2) source, target = pick_random_pair() tries = 0 sentinel_tries = 5 while not nx.has_path(wg, source, target) and tries < sentinel_tries: tries += 1 source, target = pick_random_pair() if tries == sentinel_tries: return [source] return nx.shortest_path(wg, source=source, target=target) pybel-0.12.1/src/pybel/struct/mutation/induction/random_subgraph.py000066400000000000000000000161061334645200200255200ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for inducing random sub-graphs.""" import bisect import logging import random from operator import itemgetter from ..utils import remove_isolated_nodes from ...pipeline import transformation from ...utils import update_metadata, update_node_helper __all__ = [ 'get_graph_with_random_edges', 'get_random_node', 'get_random_subgraph', ] log = logging.getLogger(__name__) def _random_edge_iterator(graph, n_edges): """Get a random set of edges from the graph and randomly samples a key from each. :type graph: pybel.BELGraph :param int n_edges: Number of edges to randomly select from the given graph :rtype: iter[tuple[tuple,tuple,int,dict]] """ universe_edges = list(graph.edges()) random.shuffle(universe_edges) for u, v in universe_edges[:n_edges]: keys = list(graph[u][v]) k = random.choice(keys) yield u, v, k, graph[u][v][k] @transformation def get_graph_with_random_edges(graph, n_edges): """Build a new graph from a seeding of edges. :type graph: pybel.BELGraph :param int n_edges: Number of edges to randomly select from the given graph :rtype: pybel.BELGraph """ result = graph.fresh_copy() result.add_edges_from(_random_edge_iterator(graph, n_edges)) update_metadata(graph, result) update_node_helper(graph, result) return result #: How many edges should be sampled from a graph that's still reasonable to display SAMPLE_RANDOM_EDGE_COUNT = 250 #: How many edges should be sampled as "seed" edges SAMPLE_RANDOM_EDGE_SEED_COUNT = 5 class WeightedRandomGenerator(object): """A weighted random number generator. Adapted from: https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python """ def __init__(self, values, weights): """Build a weighted random generator. :param Any values: A sequence corresponding to the weights :param weights: Weights for each. Should all be positive, but not necessarily normalized. """ self.values = values self.totals = [] weight_total = 0 for weight in weights: weight_total += weight self.totals.append(weight_total) @property def total(self): """Get the total weight stored.""" return self.totals[-1] def next_index(self): """Get a random index. :rtype: int """ return bisect.bisect_right(self.totals, random.random() * self.total) def next(self): """Get a random value. :rtype: Any """ return self.values[self.next_index()] def get_random_node(graph, node_blacklist, invert_degrees=None): """Choose a node from the graph with probabilities based on their degrees. :type graph: networkx.Graph :param set[tuple] node_blacklist: Nodes to filter out :param Optional[bool] invert_degrees: Should the degrees be inverted? Defaults to true. :rtype: Optional[tuple] """ try: nodes, degrees = zip(*( (node, degree) for node, degree in sorted(graph.degree(), key=itemgetter(1)) if node not in node_blacklist )) except ValueError: # something wrong with graph, probably no elements in graph.degree_iter return if invert_degrees is None or invert_degrees: # More likely to choose low degree nodes to explore, so don't make hubs degrees = [1 / degree for degree in degrees] wrg = WeightedRandomGenerator(nodes, degrees) return wrg.next() def _helper(result, graph, number_edges_remaining, no_grow, invert_degrees=None): """Help build a random graph. :type result: networkx.Graph :type graph: networkx.Graph :type number_edges_remaining: int :type no_grow: set :type invert_degrees: Optional[bool] """ original_node_count = graph.number_of_nodes() log.debug('adding remaining %d edges', number_edges_remaining) for _ in range(number_edges_remaining): source, possible_step_nodes, c = None, set(), 0 while not source or not possible_step_nodes: source = get_random_node(result, no_grow, invert_degrees=invert_degrees) c += 1 if c >= original_node_count: log.warning('infinite loop happening') log.warning('source: %s', source) log.warning('no grow: %s', no_grow) return # Happens when after exhausting the connected components. Try increasing the number seed edges if source is None: continue # maybe do something else? # Only keep targets in the original graph that aren't in the result graph possible_step_nodes = set(graph[source]) - set(result[source]) if not possible_step_nodes: no_grow.add(source) # there aren't any possible nodes to step to, so try growing from somewhere else step_node = random.choice(list(possible_step_nodes)) # it's not really a big deal which, but it might be possible to weight this by the utility of edges later key, attr_dict = random.choice(list(graph[source][step_node].items())) result.add_edge(source, step_node, key=key, **attr_dict) @transformation def get_random_subgraph(graph, number_edges=None, number_seed_edges=None, seed=None, invert_degrees=None): """Generate a random subgraph based on weighted random walks from random seed edges. :type graph: pybel.BELGraph graph :param Optional[int] number_edges: Maximum number of edges. Defaults to :data:`pybel_tools.constants.SAMPLE_RANDOM_EDGE_COUNT` (250). :param Optional[int] number_seed_edges: Number of nodes to start with (which likely results in different components in large graphs). Defaults to :data:`SAMPLE_RANDOM_EDGE_SEED_COUNT` (5). :param Optional[int] seed: A seed for the random state :param Optional[bool] invert_degrees: Should the degrees be inverted? Defaults to true. :rtype: pybel.BELGraph """ if number_edges is None: number_edges = SAMPLE_RANDOM_EDGE_COUNT if number_seed_edges is None: number_seed_edges = SAMPLE_RANDOM_EDGE_SEED_COUNT if seed is not None: random.seed(seed) # Check if graph will sample full graph, and just return it if it would if graph.number_of_edges() <= number_edges: log.info('sampled full graph') return graph.copy() log.debug('getting random sub-graph with %d seed edges, %d final edges, and seed=%s', number_seed_edges, number_edges, seed) # Get initial graph with `number_seed_edges` edges result = get_graph_with_random_edges(graph, number_seed_edges) number_edges_remaining = number_edges - result.number_of_edges() _helper( result, graph, number_edges_remaining, no_grow=set(), # This is the set of nodes that should no longer be chosen to grow from invert_degrees=invert_degrees, ) log.debug('removing isolated nodes') remove_isolated_nodes(result) # update metadata update_node_helper(graph, result) update_metadata(graph, result) return result pybel-0.12.1/src/pybel/struct/mutation/induction/upstream.py000066400000000000000000000021551334645200200242040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for inducing up/downstream causal subgraphs.""" import logging from .utils import get_subgraph_by_edge_filter from ...filters import build_downstream_edge_predicate, build_upstream_edge_predicate from ...pipeline import transformation __all__ = [ 'get_upstream_causal_subgraph', 'get_downstream_causal_subgraph', ] log = logging.getLogger(__name__) @transformation def get_upstream_causal_subgraph(graph, nbunch): """Induce a sub-graph from all of the upstream causal entities of the nodes in the nbunch. :type graph: pybel.BELGraph :type nbunch: BaseEntity or iter[BaseEntity] :rtype: pybel.BELGraph """ return get_subgraph_by_edge_filter(graph, build_upstream_edge_predicate(nbunch)) @transformation def get_downstream_causal_subgraph(graph, nbunch): """Induce a sub-graph from all of the downstream causal entities of the nodes in the nbunch. :type graph: pybel.BELGraph :type nbunch: BaseEntity or iter[BaseEntity] :rtype: pybel.BELGraph """ return get_subgraph_by_edge_filter(graph, build_downstream_edge_predicate(nbunch)) pybel-0.12.1/src/pybel/struct/mutation/induction/utils.py000066400000000000000000000023711334645200200235040ustar00rootroot00000000000000# -*- coding: utf-8 -*- from ..utils import expand_by_edge_filter from ...operations import subgraph from ...pipeline import transformation __all__ = [ 'get_subgraph_by_edge_filter', 'get_subgraph_by_induction', ] @transformation def get_subgraph_by_edge_filter(graph, edge_predicates=None): """Induce a sub-graph on all edges that pass the given filters. :param pybel.BELGraph graph: A BEL graph :param edge_predicates: An edge predicate or list of edge predicates :type edge_predicates: None or ((pybel.BELGraph, tuple, tuple, int) -> bool) or iter[(pybel.BELGraph, tuple, tuple, int) -> bool] :return: A BEL sub-graph induced over the edges passing the given filters :rtype: pybel.BELGraph """ rv = graph.fresh_copy() expand_by_edge_filter(graph, rv, edge_predicates=edge_predicates) return rv @transformation def get_subgraph_by_induction(graph, nodes): """Induce a sub-graph over the given nodes or return None if none of the nodes are in the given graph. :param pybel.BELGraph graph: A BEL graph :param iter[tuple] nodes: A list of BEL nodes in the graph :rtype: Optional[pybel.BELGraph] """ if all(node not in graph for node in nodes): return return subgraph(graph, nodes) pybel-0.12.1/src/pybel/struct/mutation/induction_expansion.py000066400000000000000000000045131334645200200244300ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for building graphs that use both expansion and induction procedures.""" import logging from .expansion import expand_all_node_neighborhoods from .expansion.upstream import expand_downstream_causal, expand_upstream_causal from .induction.neighborhood import get_subgraph_by_neighborhood from .induction.upstream import get_downstream_causal_subgraph, get_upstream_causal_subgraph from ..pipeline import transformation __all__ = [ 'get_multi_causal_upstream', 'get_multi_causal_downstream', 'get_subgraph_by_second_neighbors', ] log = logging.getLogger(__name__) @transformation def get_multi_causal_upstream(graph, nbunch): """Get the union of all the 2-level deep causal upstream subgraphs from the nbunch. :param pybel.BELGraph graph: A BEL graph :param nbunch: A BEL node or list of BEL nodes :type nbunch: tuple or list[tuple] :return: A subgraph of the original BEL graph :rtype: pybel.BELGraph """ result = get_upstream_causal_subgraph(graph, nbunch) expand_upstream_causal(graph, result) return result @transformation def get_multi_causal_downstream(graph, nbunch): """Get the union of all of the 2-level deep causal downstream subgraphs from the nbunch. :param pybel.BELGraph graph: A BEL graph :param nbunch: A BEL node or list of BEL nodes :type nbunch: tuple or list[tuple] :return: A subgraph of the original BEL graph :rtype: pybel.BELGraph """ result = get_downstream_causal_subgraph(graph, nbunch) expand_downstream_causal(graph, result) return result @transformation def get_subgraph_by_second_neighbors(graph, nodes, filter_pathologies=False): """Get a graph around the neighborhoods of the given nodes and expand to the neighborhood of those nodes. Returns none if none of the nodes are in the graph. :param pybel.BELGraph graph: A BEL graph :param iter[tuple] nodes: An iterable of BEL nodes :param bool filter_pathologies: Should expansion take place around pathologies? :return: A BEL graph induced around the neighborhoods of the given nodes :rtype: Optional[pybel.BELGraph] """ result = get_subgraph_by_neighborhood(graph, nodes) if result is None: return expand_all_node_neighborhoods(graph, result, filter_pathologies=filter_pathologies) return result pybel-0.12.1/src/pybel/struct/mutation/inference/000077500000000000000000000000001334645200200217315ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/mutation/inference/__init__.py000066400000000000000000000002051334645200200240370ustar00rootroot00000000000000# -*- coding: utf-8 -*- from . import protein_rna_origins from .protein_rna_origins import * __all__ = protein_rna_origins.__all__ pybel-0.12.1/src/pybel/struct/mutation/inference/protein_rna_origins.py000066400000000000000000000034351334645200200263620ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions for enriching the origins of Proteins, RNAs, and miRNAs.""" from pybel.dsl import Protein from ...pipeline import in_place_transformation from ...pipeline.decorators import register_deprecated from ....constants import FUNCTION, FUSION, MIRNA, RNA, VARIANTS __all__ = [ 'enrich_rnas_with_genes', 'enrich_proteins_with_rnas', 'enrich_protein_and_rna_origins', ] @register_deprecated('infer_central_dogmatic_translations') @in_place_transformation def enrich_proteins_with_rnas(graph): """Add the corresponding RNA node for each protein node and connect them with a translation edge. :param pybel.BELGraph graph: A BEL graph """ for protein_node in list(graph): if not isinstance(protein_node, Protein): continue if protein_node.variants: continue rna_node = protein_node.get_rna() graph.add_translation(rna_node, protein_node) @register_deprecated('infer_central_dogmatic_transcriptions') @in_place_transformation def enrich_rnas_with_genes(graph): """Add the corresponding gene node for each RNA/miRNA node and connect them with a transcription edge. :param pybel.BELGraph graph: A BEL graph """ for rna_node in list(graph): if rna_node[FUNCTION] not in {MIRNA, RNA} or FUSION in rna_node or VARIANTS in rna_node: continue gene_node = rna_node.get_gene() graph.add_transcription(gene_node, rna_node) @register_deprecated('infer_central_dogma') @in_place_transformation def enrich_protein_and_rna_origins(graph): """Add the corresponding RNA for each protein then the corresponding gene for each RNA/miRNA. :param pybel.BELGraph graph: A BEL graph """ enrich_proteins_with_rnas(graph) enrich_rnas_with_genes(graph) pybel-0.12.1/src/pybel/struct/mutation/metadata.py000066400000000000000000000037401334645200200221310ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Functions to modify the metadata of graphs, their edges, and their nodes.""" import logging from ..pipeline import in_place_transformation from ...constants import ANNOTATIONS __all__ = [ 'strip_annotations', 'add_annotation_value', 'remove_annotation_value', ] log = logging.getLogger(__name__) @in_place_transformation def strip_annotations(graph): """Strip all the annotations from a BEL graph. :param pybel.BELGraph graph: A BEL graph """ for u, v, k in graph.edges(keys=True): if ANNOTATIONS in graph[u][v][k]: del graph[u][v][k][ANNOTATIONS] @in_place_transformation def add_annotation_value(graph, annotation, value): """Add the given annotation/value pair to all qualified edges. :param pybel.BELGraph graph: :param str annotation: :param str value: """ if annotation not in graph.defined_annotation_keywords: raise ValueError('annotation not defined: {}'.format(annotation)) for u, v, k in graph.edges(keys=True): if ANNOTATIONS not in graph[u][v][k]: continue if annotation not in graph[u][v][k][ANNOTATIONS]: graph[u][v][k][ANNOTATIONS] = {annotation: {}} graph[u][v][k][ANNOTATIONS][annotation][value] = True @in_place_transformation def remove_annotation_value(graph, annotation, value): """Remove the given annotation/value pair to all qualified edges. :param pybel.BELGraph graph: :param str annotation: :param str value: """ if annotation not in graph.defined_annotation_keywords: log.warning('annotation was not defined: %s', annotation) return for u, v, k in graph.edges(keys=True): if ANNOTATIONS not in graph[u][v][k]: continue if annotation not in graph[u][v][k][ANNOTATIONS]: continue if value not in graph[u][v][k][ANNOTATIONS][annotation]: continue del graph[u][v][k][ANNOTATIONS][annotation][value] pybel-0.12.1/src/pybel/struct/mutation/transfer.py000066400000000000000000000042261334645200200221750ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module facilitates the transfer of knowledge through ontological relationships.""" from ...constants import ANNOTATIONS, CAUSAL_RELATIONS, CITATION, EVIDENCE, IS_A, OBJECT, RELATION, SUBJECT from ...dsl import BaseEntity __all__ = [ 'infer_child_relations' ] def iter_children(graph, node): """Iterate over children of the node. :type graph: pybel.BELGraph :type node: BaseEntity :rtype: iter[BaseEntity] """ for u, _, d in graph.in_edges(node, data=True): if d[RELATION] != IS_A: continue yield u def transfer_causal_edges(graph, source, target): """Transfer causal edges that the source has to the target. :param pybel.BELGraph graph: :type source: BaseEntity :type target: BaseEntity """ for _, v, data in graph.out_edges(source, data=True): if data[RELATION] not in CAUSAL_RELATIONS: continue graph.add_qualified_edge( target, v, relation=data[RELATION], evidence=data[EVIDENCE], citation=data[CITATION], annotations=data.get(ANNOTATIONS), subject_modifier=data.get(SUBJECT), object_modifier=data.get(OBJECT) ) for u, _, data in graph.in_edges(source, data=True): if data[RELATION] not in CAUSAL_RELATIONS: continue graph.add_qualified_edge( u, target, relation=data[RELATION], evidence=data[EVIDENCE], citation=data[CITATION], annotations=data.get(ANNOTATIONS), subject_modifier=data.get(SUBJECT), object_modifier=data.get(OBJECT) ) def infer_child_relations(graph, node): """Propagate causal relations to children. :param pybel.BELGraph graph: A BEL graph :param node: A PyBEL node tuple, on which to propagate the children's relations :type node: tuple or BaseEntity """ if not isinstance(node, BaseEntity): raise TypeError for child in iter_children(graph, node): transfer_causal_edges(graph, node, child) infer_child_relations(graph, child) pybel-0.12.1/src/pybel/struct/mutation/utils.py000066400000000000000000000033011334645200200215020ustar00rootroot00000000000000# -*- coding: utf-8 -*- import networkx as nx from ..filters import filter_edges from ..pipeline import in_place_transformation, transformation, uni_in_place_transformation from ..utils import update_metadata, update_node_helper __all__ = [ 'remove_isolated_nodes', 'remove_isolated_nodes_op', 'expand_by_edge_filter', ] @in_place_transformation def remove_isolated_nodes(graph): """Remove isolated nodes from the network, in place. :param pybel.BELGraph graph: A BEL graph """ nodes = list(nx.isolates(graph)) graph.remove_nodes_from(nodes) @transformation def remove_isolated_nodes_op(graph): """Build a new graph excluding the isolated nodes. :param pybel.BELGraph graph: A BEL graph :rtype: pybel.BELGraph """ rv = graph.copy() nodes = list(nx.isolates(rv)) rv.remove_nodes_from(nodes) return rv @uni_in_place_transformation def expand_by_edge_filter(source, target, edge_predicates=None): """Expand a target graph by edges in the source matching the given predicates. :param pybel.BELGraph source: A BEL graph :param pybel.BELGraph target: A BEL graph :param edge_predicates: An edge predicate or list of edge predicates :type edge_predicates: None or (pybel.BELGraph, tuple, tuple, int) -> bool or list[(pybel.BELGraph, tuple, tuple, int) -> bool] :return: A BEL sub-graph induced over the edges passing the given filters :rtype: pybel.BELGraph """ target.add_edges_from( (u, v, k, source[u][v][k]) for u, v, k in filter_edges(source, edge_predicates=edge_predicates) ) update_node_helper(source, target) update_metadata(source, target) # TODO smarter ways of ensuring metadata pybel-0.12.1/src/pybel/struct/operations.py000066400000000000000000000114441334645200200206740ustar00rootroot00000000000000# -*- coding: utf-8 -*- import networkx as nx from .utils import update_metadata, update_node_helper __all__ = [ 'subgraph', 'left_full_join', 'left_outer_join', 'union', 'left_node_intersection_join', 'node_intersection', ] def subgraph(graph, nodes): """Induce a sub-graph over the given nodes. :param BELGraph graph: :param set[BaseEntity] nodes: :rtype: BELGraph """ sg = graph.subgraph(nodes) # see implementation for .copy() result = graph.fresh_copy() result.graph.update(sg.graph) for node, data in sg.nodes(data=True): result.add_node(node, **data) result.add_edges_from( (u, v, key, datadict.copy()) for u, v, key, datadict in sg.edges(keys=True, data=True) ) return result def left_full_join(g, h): """Add all nodes and edges from ``h`` to ``g``, in-place for ``g`` :param pybel.BELGraph g: A BEL network :param pybel.BELGraph h: A BEL network Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> left_full_join(g, h) """ g.add_edges_from( (u, v, key, data) for u, v, key, data in h.edges(keys=True, data=True) if u not in g or v not in g[u] or key not in g[u][v] ) update_metadata(h, g) update_node_helper(h, g) def left_outer_join(g, h): """Only add components from the ``h`` that are touching ``g``. Algorithm: 1. Identify all weakly connected components in ``h`` 2. Add those that have an intersection with the ``g`` :param BELGraph g: A BEL network :param BELGraph h: A BEL network Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> left_outer_join(g, h) """ g_nodes = set(g) for comp in nx.weakly_connected_components(h): if g_nodes.intersection(comp): h_subgraph = subgraph(h, comp) left_full_join(g, h_subgraph) def _left_outer_join_networks(target, networks): """Outer join a list of networks to a target network. Note: the order of networks will have significant results! :param BELGraph target: A BEL network :param iter[BELGraph] networks: An iterator of BEL networks :rtype: BELGraph """ for network in networks: left_outer_join(target, network) return target def union(networks): """Take the union over a collection of networks into a new network. Assumes iterator is longer than 2, but not infinite. :param iter[BELGraph] networks: An iterator over BEL networks. Can't be infinite. :return: A merged network :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> k = pybel.from_path('...') >>> merged = union([g, h, k]) """ networks = tuple(networks) n_networks = len(networks) if n_networks == 0: raise ValueError('no networks given') if n_networks == 1: return networks[0] target = networks[0].copy() for network in networks[1:]: left_full_join(target, network) return target def left_node_intersection_join(g, h): """Take the intersection over two networks. This intersection of two graphs is defined by the union of the subgraphs induced over the intersection of their nodes :param BELGraph g: A BEL network :param BELGraph h: A BEL network :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> merged = left_node_intersection_join(g, h) """ intersecting = set(g).intersection(set(h)) g_inter = subgraph(g, intersecting) h_inter = subgraph(h, intersecting) left_full_join(g_inter, h_inter) return g_inter def node_intersection(networks): """Take the node intersection over a collection of networks into a new network. This intersection is defined the same way as by :func:`left_node_intersection_join` :param iter[BELGraph] networks: An iterable of networks. Since it's iterated over twice, it gets converted to a tuple first, so this isn't a safe operation for infinite lists. :rtype: BELGraph Example usage: >>> import pybel >>> g = pybel.from_path('...') >>> h = pybel.from_path('...') >>> k = pybel.from_path('...') >>> merged = node_intersection([g, h, k]) """ networks = tuple(networks) n_networks = len(networks) if n_networks == 0: raise ValueError('no networks given') if n_networks == 1: return networks[0] nodes = set(networks[0].nodes()) for network in networks[1:]: nodes.intersection_update(network) return union( subgraph(network, nodes) for network in networks ) pybel-0.12.1/src/pybel/struct/pipeline/000077500000000000000000000000001334645200200177405ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/pipeline/__init__.py000066400000000000000000000004501334645200200220500ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module assists in running complex workflows on BEL graphs.""" from . import decorators, exc, pipeline from .decorators import * from .exc import * from .pipeline import * __all__ = ( decorators.__all__ + exc.__all__ + pipeline.__all__ ) pybel-0.12.1/src/pybel/struct/pipeline/decorators.py000066400000000000000000000113251334645200200224610ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module contains the functions for decorating transformation functions. A transformation function takes in a :class:`pybel.BELGraph` and either returns None (in-place) or a new :class:`pybel.BELGraph` (out-of-place). """ import logging from .exc import DeprecationMappingError, MissingPipelineFunctionError, PipelineNameError try: from inspect import signature except ImportError: from funcsigs import signature __all__ = [ 'in_place_transformation', 'uni_in_place_transformation', 'uni_transformation', 'transformation', 'register_deprecated', 'get_transformation', 'mapped', 'has_arguments_map', 'no_arguments_map', ] log = logging.getLogger(__name__) mapped = {} universe_map = {} in_place_map = {} has_arguments_map = {} no_arguments_map = {} deprecated = {} def _has_arguments(func, universe): sig = signature(func) return ( (universe and 3 <= len(sig.parameters)) or (not universe and 2 <= len(sig.parameters)) ) def _register_function(name, func, universe, in_place): """Register a transformation function under the given name. :param str name: Name to register the function under :param func: A function :param bool universe: param bool in_place: :return: The same function, with additional properties added """ if name in mapped: mapped_func = mapped[name] raise PipelineNameError('{name} is already registered with {func_mod}.{func_name}'.format( name=name, func_mod=mapped_func.__module__, func_name=mapped_func.__name__ )) mapped[name] = func if universe: universe_map[name] = func if in_place: in_place_map[name] = func if _has_arguments(func, universe): has_arguments_map[name] = func else: no_arguments_map[name] = func return func def _build_register_function(universe, in_place): """Build a decorator function to tag transformation functions. :param bool universe: Does the first positional argument of this function correspond to a universe graph? :param bool in_place: Does this function return a new graph, or just modify it in-place? """ def register(func): """Tag a transformation function. :param func: A function :return: The same function, with additional properties added """ return _register_function(func.__name__, func, universe, in_place) return register #: A decorator for functions that modify BEL graphs in-place in_place_transformation = _build_register_function(universe=False, in_place=True) #: A decorator for functions that require a "universe" graph and modify BEL graphs in-place uni_in_place_transformation = _build_register_function(universe=True, in_place=True) #: A decorator for functions that require a "universe" graph and create new BEL graphs from old BEL graphs uni_transformation = _build_register_function(universe=True, in_place=False) #: A decorator for functions that create new BEL graphs from old BEL graphs transformation = _build_register_function(universe=False, in_place=False) def register_deprecated(deprecated_name): """Register a function as deprecated. :param str deprecated_name: The old name of the function :return: A decorator Usage: This function must be applied last, since it introspects on the definitions from before >>> @register_deprecated('my_function') >>> @transformation >>> def my_old_function() >>> ... pass """ if deprecated_name in mapped: raise DeprecationMappingError('function name already mapped. can not register as deprecated name.') def register_deprecated_f(func): name = func.__name__ log.debug('%s is deprecated. please migrate to %s', deprecated_name, name) if name not in mapped: raise MissingPipelineFunctionError('function not mapped with transformation, uni_transformation, etc.') universe = name in universe_map in_place = name in in_place_map # Add back-reference from deprecated function name to actual function name deprecated[deprecated_name] = name return _register_function(deprecated_name, func, universe, in_place) return register_deprecated_f def get_transformation(name): """Get a transformation function and error if its name is not registered. :param str name: The name of a function to look up :return: A transformation function :raises MissingPipelineFunctionError: If the given function name is not registered """ func = mapped.get(name) if func is None: raise MissingPipelineFunctionError('{} is not registered as a pipeline function'.format(name)) return func pybel-0.12.1/src/pybel/struct/pipeline/exc.py000066400000000000000000000015271334645200200210760ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Exceptions for the :mod:`pybel.struct.pipeline` module.""" __all__ = [ 'MissingPipelineFunctionError', 'MetaValueError', 'MissingUniverseError', 'DeprecationMappingError', 'PipelineNameError', ] class MissingPipelineFunctionError(KeyError): """Raised when trying to run the pipeline with a function that isn't registered""" class MetaValueError(ValueError): """Raised when getting an invalid meta value.""" class MissingUniverseError(ValueError): """Raised when running a universe function without a universe being present.""" class DeprecationMappingError(ValueError): """Raised when appplyng the deprecation function annotation and the given name already is being used.""" class PipelineNameError(ValueError): """Raised when a second function tries to use the same name.""" pybel-0.12.1/src/pybel/struct/pipeline/pipeline.py000066400000000000000000000260021334645200200221170ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module holds the Pipeline class.""" import json import logging import types from functools import wraps from .decorators import get_transformation, in_place_map, mapped, universe_map from .exc import MetaValueError, MissingPipelineFunctionError, MissingUniverseError from ..operations import node_intersection, union __all__ = [ 'Pipeline', ] log = logging.getLogger(__name__) META_UNION = 'union' META_INTERSECTION = 'intersection' def _get_protocol_tuple(data): """Convert a dictionary to a tuple. :param dict data: :rtype: tuple[str,list,dict] """ return data['function'], data.get('args', []), data.get('kwargs', {}) class Pipeline: """Builds and runs analytical pipelines on BEL graphs. Example usage: >>> from pybel import BELGraph >>> from pybel.struct.pipeline import Pipeline >>> from pybel.struct.mutation import enrich_protein_and_rna_origins, prune_protein_rna_origins >>> graph = BELGraph() >>> example = Pipeline() >>> example.append(enrich_protein_and_rna_origins) >>> example.append(prune_protein_rna_origins) >>> result = example.run(graph) """ def __init__(self, protocol=None): """ :param iter[dict] protocol: An iterable of dictionaries describing how to transform a network """ self.universe = None self.protocol = protocol or [] def __len__(self): return len(self.protocol) def __iter__(self): return iter(self.protocol) @staticmethod def from_functions(functions): """Build a pipeline from a list of functions. :param functions: A list of functions or names of functions :type functions: iter[((pybel.BELGraph) -> pybel.BELGraph) or ((pybel.BELGraph) -> None) or str] :rtype: Pipeline Example with function: >>> from pybel.struct.pipeline import Pipeline >>> from pybel.struct.mutation import remove_associations >>> pipeline = Pipeline.from_functions([remove_associations]) Equivalent example with function names: >>> from pybel.struct.pipeline import Pipeline >>> pipeline = Pipeline.from_functions(['remove_associations']) Lookup by name is possible for built in functions, and those that have been registered correctly using one of the four decorators: 1. :func:`pybel.struct.pipeline.transformation`, 2. :func:`pybel.struct.pipeline.in_place_transformation`, 3. :func:`pybel.struct.pipeline.uni_transformation`, 4. :func:`pybel.struct.pipeline.uni_in_place_transformation`, """ result = Pipeline() for func in functions: result.append(func) return result def _get_function(self, name): """Wrap a function with the universe and in-place. :param str name: The name of the function :rtype: types.FunctionType :raises MissingPipelineFunctionError: If the functions is not registered """ f = mapped.get(name) if f is None: raise MissingPipelineFunctionError('{} is not registered as a pipeline function'.format(name)) if name in universe_map and name in in_place_map: return self._wrap_in_place(self._wrap_universe(f)) if name in universe_map: return self._wrap_universe(f) if name in in_place_map: return self._wrap_in_place(f) return f def append(self, name, *args, **kwargs): """Add a function (either as a reference, or by name) and arguments to the pipeline. :param name: The name of the function :type name: str or (pybel.BELGraph -> pybel.BELGraph) :param args: The positional arguments to call in the function :param kwargs: The keyword arguments to call in the function :return: This pipeline for fluid query building :rtype: Pipeline :raises MissingPipelineFunctionError: If the function is not registered """ if isinstance(name, types.FunctionType): return self.append(name.__name__, *args, **kwargs) elif isinstance(name, str): get_transformation(name) else: raise TypeError('invalid function argument: {}'.format(name)) av = { 'function': name, } if args: av['args'] = args if kwargs: av['kwargs'] = kwargs self.protocol.append(av) return self def extend(self, protocol): """Add another pipeline to the end of the current pipeline. :param protocol: An iterable of dictionaries (or another Pipeline) :type protocol: iter[dict] or Pipeline :return: This pipeline for fluid query building :rtype: Pipeline Example: >>> p1 = Pipeline.from_functions(['enrich_protein_and_rna_origins']) >>> p2 = Pipeline.from_functions(['remove_pathologies']) >>> p1.extend(p2) """ for data in protocol: name, args, kwargs = _get_protocol_tuple(data) self.append(name, *args, **kwargs) return self def _run_helper(self, graph, protocol): """Help run the protocol. :param pybel.BELGraph graph: A BEL graph :param list[dict] protocol: The protocol to run, as JSON :rtype: pybel.BELGraph """ result = graph for entry in protocol: meta_entry = entry.get('meta') if meta_entry is None: name, args, kwargs = _get_protocol_tuple(entry) func = self._get_function(name) result = func(result, *args, **kwargs) else: networks = ( self._run_helper(graph, subprotocol) for subprotocol in entry['pipelines'] ) if meta_entry == META_UNION: result = union(networks) elif meta_entry == META_INTERSECTION: result = node_intersection(networks) else: raise MetaValueError('invalid meta-command: {}'.format(meta_entry)) return result def run(self, graph, universe=None): """Run the contained protocol on a seed graph. :param pybel.BELGraph graph: The seed BEL graph :param pybel.BELGraph universe: Allows just-in-time setting of the universe in case it wasn't set before. Defaults to the given network. :param bool in_place: Should the graph be copied before applying the algorithm? :return: The new graph is returned if not applied in-place :rtype: pybel.BELGraph """ self.universe = universe or graph.copy() return self._run_helper(graph.copy(), self.protocol) def __call__(self, graph, universe=None): """Call :meth:`Pipeline.run`. :param pybel.BELGraph graph: The seed BEL graph :param pybel.BELGraph universe: Allows just-in-time setting of the universe in case it wasn't set before. Defaults to the given network. :param bool in_place: Should the graph be copied before applying the algorithm? :return: The new graph is returned if not applied in-place :rtype: pybel.BELGraph Using __call__ allows for methods to be chained together then applied >>> from pybel.struct.mutation import remove_associations, remove_pathologies >>> from pybel.struct.pipeline.pipeline import Pipeline >>> from pybel import BELGraph >>> pipe = Pipeline.from_functions([remove_associations, remove_pathologies]) >>> graph = BELGraph() ... >>> new_graph = pipe(graph) """ return self.run(graph=graph, universe=universe) def _wrap_universe(self, func): """Take a function that needs a universe graph as the first argument and returns a wrapped one.""" @wraps(func) def wrapper(graph, *args, **kwargs): """Applies the enclosed function with the universe given as the first argument""" if self.universe is None: raise MissingUniverseError( 'Can not run universe function [{}] - No universe is set'.format(func.__name__)) return func(self.universe, graph, *args, **kwargs) return wrapper @staticmethod def _wrap_in_place(func): """Take a function that doesn't return the graph and returns the graph.""" @wraps(func) def wrapper(graph, *args, **kwargs): """Applies the enclosed function and returns the graph""" func(graph, *args, **kwargs) return graph return wrapper def to_json(self): """Return this pipeline as a JSON list. :rtype: list """ return self.protocol def dumps(self, **kwargs): """Dump this pipeline as a JSON string. :rtype: str """ return json.dumps(self.to_json(), **kwargs) def dump(self, file, **kwargs): """Dump this protocol to a file in JSON. :param file: A file or file-like to pass to :func:`json.dump` """ return json.dump(self.to_json(), file, **kwargs) @staticmethod def load(file): """Load a protocol from JSON contained in file. :param file: A file or file-like :return: The pipeline represented by the JSON in the file :rtype: Pipeline :raises MissingPipelineFunctionError: If any functions are not registered """ return Pipeline(json.load(file)) @staticmethod def loads(s): """Load a protocol from a JSON string. :param str s: A JSON string :return: The pipeline represented by the JSON in the file :rtype: Pipeline :raises MissingPipelineFunctionError: If any functions are not registered """ return Pipeline(json.loads(s)) def __str__(self): return json.dumps(self.protocol, indent=2) @staticmethod def _build_meta(meta, pipelines): """ :param str meta: either union or intersection :param iter[Pipeline] pipelines: :rtype: Pipeline """ return Pipeline(protocol=[ { 'meta': meta, 'pipelines': [ pipeline.protocol for pipeline in pipelines ] }, ]) @staticmethod def union(pipelines): """Take the union of multiple pipelines. :param iter[Pipeline] pipelines: A list of pipelines :return: The union of the results from multiple pipelines :rtype: Pipeline """ return Pipeline._build_meta(META_UNION, pipelines) @staticmethod def intersection(pipelines): """Take the intersection of the results from multiple pipelines. :param iter[Pipeline] pipelines: A list of pipelines :return: The intersection of results from multiple pipelines :rtype: Pipeline """ return Pipeline._build_meta(META_INTERSECTION, pipelines) pybel-0.12.1/src/pybel/struct/query/000077500000000000000000000000001334645200200173005ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/query/__init__.py000066400000000000000000000002761334645200200214160ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Query builder for PyBEL.""" from .exc import * from .query import Query from .seeding import SEED_DATA, SEED_METHOD, Seeding from .selection import get_subgraph pybel-0.12.1/src/pybel/struct/query/constants.py000066400000000000000000000030741334645200200216720ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Constants for the query builder.""" #: Induce a subgraph over the given nodes SEED_TYPE_INDUCTION = 'induction' #: Induce a subgraph over the given nodes and expand to their first neighbors SEED_TYPE_NEIGHBORS = 'neighbors' #: Induce a subgraph over the given nodes and expand to their second neighbors SEED_TYPE_DOUBLE_NEIGHBORS = 'dneighbors' #: Induce a subgraph over the nodes in all shortest paths between the given nodes SEED_TYPE_PATHS = 'shortest_paths' #: Induce a subgraph over the edges provided by the given authors and their neighboring nodes SEED_TYPE_AUTHOR = 'authors' #: Induce a subgraph over the edges provided by the given citations and their neighboring nodes SEED_TYPE_PUBMED = 'pubmed' #: Generate an upstream candidate mechanism SEED_TYPE_UPSTREAM = 'upstream' #: Generate a downstream candidate mechanism SEED_TYPE_DOWNSTREAM = 'downstream' #: Induce a subgraph over the edges matching the given annotations SEED_TYPE_ANNOTATION = 'annotation' #: Induce a subgraph over a random set of (hopefully) connected edges SEED_TYPE_SAMPLE = 'sample' #: A set of the allowed seed type strings, as defined above SEED_TYPES = { SEED_TYPE_INDUCTION, SEED_TYPE_NEIGHBORS, SEED_TYPE_DOUBLE_NEIGHBORS, SEED_TYPE_PATHS, SEED_TYPE_UPSTREAM, SEED_TYPE_DOWNSTREAM, SEED_TYPE_PUBMED, SEED_TYPE_AUTHOR, SEED_TYPE_ANNOTATION, SEED_TYPE_SAMPLE } #: Seed types that don't take node lists as their arguments NONNODE_SEED_TYPES = { SEED_TYPE_ANNOTATION, SEED_TYPE_AUTHOR, SEED_TYPE_PUBMED, SEED_TYPE_SAMPLE, } pybel-0.12.1/src/pybel/struct/query/exc.py000066400000000000000000000006021334645200200204270ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Exceptions for the query builder.""" __all__ = [ 'QueryMissingNetworksError', 'NodeDegreeIterError', ] class QueryMissingNetworksError(KeyError): """Raised if a query is created from json but doesn't have a listing of network identifiers.""" class NodeDegreeIterError(ValueError): """Raised when failing to iterate over node degrees.""" pybel-0.12.1/src/pybel/struct/query/query.py000066400000000000000000000144331334645200200210240ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Query builder.""" import json import logging from collections import Iterable from .exc import QueryMissingNetworksError from .seeding import Seeding from ...manager.models import Node from ...struct.pipeline import Pipeline __all__ = [ 'Query', ] log = logging.getLogger(__name__) class Query: """Represents a query over a network store.""" def __init__(self, network_ids=None, seeding=None, pipeline=None): """Build a query. :param iter[int] network_ids: Database network identifiers identifiers :type network_ids: None or int or iter[int] :type seeding: Optional[Seeding] :type pipeline: Optional[Pipeline] """ if not network_ids: self.network_ids = [] elif isinstance(network_ids, int): self.network_ids = [network_ids] elif isinstance(network_ids, Iterable): network_ids = list(network_ids) for network_id in network_ids: if not isinstance(network_id, int): raise TypeError(network_ids) self.network_ids = network_ids else: raise TypeError(network_ids) if seeding is not None and not isinstance(seeding, Seeding): raise TypeError('Not a Seeding: {}'.format(seeding)) self.seeding = seeding or Seeding() if pipeline is not None and not isinstance(pipeline, Pipeline): raise TypeError('Not a pipeline: {}'.format(pipeline)) self.pipeline = pipeline or Pipeline() def append_network(self, network_id): """Add a network to this query. :param int network_id: The database identifier of the network :returns: self for fluid API :rtype: Query """ self.network_ids.append(network_id) return self def append_seeding_induction(self, nodes): """Add a seed induction method. :param list[tuple or Node or BaseEntity] nodes: A list of PyBEL node tuples :returns: seeding container for fluid API :rtype: Seeding """ return self.seeding.append_induction(nodes) def append_seeding_neighbors(self, nodes): """Add a seed by neighbors. :param nodes: A list of PyBEL node tuples :type nodes: BaseEntity or iter[BaseEntity] """ return self.seeding.append_neighbors(nodes) def append_seeding_annotation(self, annotation, values): """Add a seed induction method for single annotation's values. :param str annotation: The annotation to filter by :param set[str] values: The values of the annotation to keep """ return self.seeding.append_annotation(annotation, values) def append_seeding_sample(self, **kwargs): """Add seed induction methods. Kwargs can have ``number_edges`` or ``number_seed_nodes``. """ return self.seeding.append_sample(**kwargs) def append_pipeline(self, name, *args, **kwargs): """Add an entry to the pipeline. Defers to :meth:`pybel_tools.pipeline.Pipeline.append`. :param name: The name of the function :type name: str or types.FunctionType :return: This pipeline for fluid query building :rtype: Pipeline """ return self.pipeline.append(name, *args, **kwargs) def __call__(self, manager): """Run this query and returns the resulting BEL graph with :meth:`Query.run`. :param pybel.manager.Manager manager: A cache manager :rtype: Optional[pybel.BELGraph] """ return self.run(manager) def run(self, manager): """Run this query and returns the resulting BEL graph. :param manager: A cache manager :rtype: Optional[pybel.BELGraph] """ universe = self._get_universe(manager) graph = self.seeding.run(universe) return self.pipeline.run(graph, universe=universe) def _get_universe(self, manager): if not self.network_ids: raise QueryMissingNetworksError('can not run query without network identifiers') log.debug('query universe consists of networks: %s', self.network_ids) universe = manager.get_graph_by_ids(self.network_ids) log.debug('query universe has %d nodes/%d edges', universe.number_of_nodes(), universe.number_of_edges()) return universe def to_json(self): """Return this query as a JSON object. :rtype: dict """ rv = { 'network_ids': self.network_ids, } if self.seeding: rv['seeding'] = self.seeding.to_json() if self.pipeline: rv['pipeline'] = self.pipeline.to_json() return rv def dump(self, file, **kwargs): """Dump this query to a file as JSON.""" json.dump(self.to_json(), file, **kwargs) def dumps(self, **kwargs): """Dump this query to a string as JSON :rtype: str """ return json.dumps(self.to_json(), **kwargs) @staticmethod def from_json(data): """Load a query from a JSON dictionary. :param dict data: A JSON dictionary :rtype: Query :raises: QueryMissingNetworksError """ network_ids = data.get('network_ids') if network_ids is None: raise QueryMissingNetworksError('query JSON did not have key "network_ids"') seeding_data = data.get('seeding') seeding = ( Seeding(seeding_data) if seeding_data is not None else None ) pipeline_data = data.get('pipeline') pipeline = ( Pipeline(pipeline_data) if pipeline_data is not None else None ) return Query( network_ids=network_ids, seeding=seeding, pipeline=pipeline, ) @staticmethod def load(file): """Load a query from a JSON file. :param file: A file or file-like :rtype: Query :raises: QueryMissingNetworksError """ return Query.from_json(json.load(file)) @staticmethod def loads(s): """Load a query from a JSON string :param str s: A stringified JSON query :rtype: Query :raises: QueryMissingNetworksError """ return Query.from_json(json.loads(s)) pybel-0.12.1/src/pybel/struct/query/seeding.py000066400000000000000000000112041334645200200212660ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Query builder.""" import json import logging import random from six.moves import UserList from .constants import ( SEED_TYPE_ANNOTATION, SEED_TYPE_INDUCTION, SEED_TYPE_NEIGHBORS, SEED_TYPE_SAMPLE, ) from .selection import get_subgraph from ...dsl import BaseEntity from ...manager.models import Node from ...struct import union from ...tokens import parse_result_to_dsl log = logging.getLogger(__name__) SEED_METHOD = 'type' SEED_DATA = 'data' class Seeding(UserList): """Represents a container of seeding methods to apply to a network.""" def append_induction(self, nodes): """Add a seed induction method. :param list[tuple or Node or BaseEntity] nodes: A list of PyBEL node tuples :returns: self for fluid API :rtype: Seeding """ return self._append_seed(SEED_TYPE_INDUCTION, _handle_nodes(nodes)) def append_neighbors(self, nodes): """Add a seed by neighbors. :param nodes: A list of PyBEL node tuples :type nodes: BaseEntity or iter[BaseEntity] :returns: self for fluid API :rtype: Seeding """ return self._append_seed(SEED_TYPE_NEIGHBORS, _handle_nodes(nodes)) def append_annotation(self, annotation, values): """Add a seed induction method for single annotation's values. :param str annotation: The annotation to filter by :param set[str] values: The values of the annotation to keep :returns: self for fluid API :rtype: Seeding """ return self._append_seed(SEED_TYPE_ANNOTATION, { 'annotations': { annotation: values } }) def append_sample(self, **kwargs): """Add seed induction methods. Kwargs can have ``number_edges`` or ``number_seed_nodes``. :returns: self for fluid API :rtype: Seeding """ data = { 'seed': random.randint(0, 1000000) } data.update(kwargs) return self._append_seed(SEED_TYPE_SAMPLE, data) def _append_seed(self, seed_type, data): """Add a seeding method. :param str seed_type: :param data: :returns: self for fluid API :rtype: Seeding """ self.append({ SEED_METHOD: seed_type, SEED_DATA: data, }) return self def run(self, graph): """Seed the graph or return none if not possible. :type graph: pybel.BELGraph :rtype: Optional[pybel.BELGraph] """ if not self: log.debug('no seeding, returning graph: %s', graph) return graph subgraphs = [] for seed in self: seed_method, seed_data = seed[SEED_METHOD], seed[SEED_DATA] log.debug('seeding with %s: %s', seed_method, seed_data) subgraph = get_subgraph(graph, seed_method=seed_method, seed_data=seed_data) if subgraph is None: log.debug('seed returned empty graph: %s', seed) continue subgraphs.append(subgraph) if not subgraphs: log.debug('no subgraphs returned') return return union(subgraphs) def to_json(self): """Serialize this seeding container to a JSON object. :rtype: list """ return list(self) def dump(self, file, sort_keys=True, **kwargs): """Dump this seeding container to a file as JSON.""" json.dump(self.to_json(), file, sort_keys=sort_keys, **kwargs) def dumps(self, sort_keys=True, **kwargs): """Dump this query to a string as JSON. :rtype: str """ return json.dumps(self.to_json(), sort_keys=sort_keys, **kwargs) @staticmethod def from_json(data): """Build a seeding container from a JSON list. :param dict data: :rtype: Seeding """ return Seeding(data) @staticmethod def load(file): """Load a seeding container from a JSON file. :rtype: Seeding """ return Seeding.from_json(json.load(file)) @staticmethod def loads(s): """Load a seeding container from a JSON string. :rtype: Seeding """ return Seeding.from_json(json.loads(s)) def _handle_nodes(nodes): """Handle nodes that might be dictionaries. :type nodes: BaseEntity or list[dict] or list[BaseEntity] :rtype: list[BaseEntity] """ if isinstance(nodes, BaseEntity): return [nodes] return [ ( parse_result_to_dsl(node) if isinstance(node, dict) else node ) for node in nodes ] pybel-0.12.1/src/pybel/struct/query/selection.py000066400000000000000000000072241334645200200216440ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging from .constants import * from ..mutation import ( expand_nodes_neighborhoods, get_multi_causal_downstream, get_multi_causal_upstream, get_random_subgraph, get_subgraph_by_all_shortest_paths, get_subgraph_by_annotations, get_subgraph_by_authors, get_subgraph_by_induction, get_subgraph_by_neighborhood, get_subgraph_by_pubmed, get_subgraph_by_second_neighbors, ) log = logging.getLogger(__name__) __all__ = [ 'get_subgraph', ] def get_subgraph(graph, seed_method=None, seed_data=None, expand_nodes=None, remove_nodes=None): """Run a pipeline query on graph with multiple sub-graph filters and expanders. Order of Operations: 1. Seeding by given function name and data 2. Add nodes 3. Remove nodes :param pybel.BELGraph graph: A BEL graph :param str seed_method: The name of the get_subgraph_by_* function to use :param seed_data: The argument to pass to the get_subgraph function :param list[tuple] expand_nodes: Add the neighborhoods around all of these nodes :param list[tuple] remove_nodes: Remove these nodes and all of their in/out edges :rtype: Optional[pybel.BELGraph] """ # Seed by the given function if seed_method == SEED_TYPE_INDUCTION: result = get_subgraph_by_induction(graph, seed_data) elif seed_method == SEED_TYPE_PATHS: result = get_subgraph_by_all_shortest_paths(graph, seed_data) elif seed_method == SEED_TYPE_NEIGHBORS: result = get_subgraph_by_neighborhood(graph, seed_data) elif seed_method == SEED_TYPE_DOUBLE_NEIGHBORS: result = get_subgraph_by_second_neighbors(graph, seed_data) elif seed_method == SEED_TYPE_UPSTREAM: result = get_multi_causal_upstream(graph, seed_data) elif seed_method == SEED_TYPE_DOWNSTREAM: result = get_multi_causal_downstream(graph, seed_data) elif seed_method == SEED_TYPE_PUBMED: result = get_subgraph_by_pubmed(graph, seed_data) elif seed_method == SEED_TYPE_AUTHOR: result = get_subgraph_by_authors(graph, seed_data) elif seed_method == SEED_TYPE_ANNOTATION: result = get_subgraph_by_annotations(graph, seed_data['annotations'], or_=seed_data.get('or')) elif seed_method == SEED_TYPE_SAMPLE: result = get_random_subgraph( graph, number_edges=seed_data.get('number_edges'), seed=seed_data.get('seed') ) elif not seed_method: # Otherwise, don't seed a sub-graph result = graph.copy() log.debug('no seed function - using full network: %s', result.name) else: raise ValueError('Invalid seed method: {}'.format(seed_method)) if result is None: log.debug('query returned no results') return log.debug('original graph has (%s nodes / %s edges)', result.number_of_nodes(), result.number_of_edges()) # Expand around the given nodes if expand_nodes: expand_nodes_neighborhoods(graph, result, expand_nodes) log.debug('graph expanded to (%s nodes / %s edges)', result.number_of_nodes(), result.number_of_edges()) # Delete the given nodes if remove_nodes: for node in remove_nodes: if node not in result: log.debug('%s is not in graph %s', node, graph.name) continue result.remove_node(node) log.debug('graph contracted to (%s nodes / %s edges)', result.number_of_nodes(), result.number_of_edges()) log.debug( 'Subgraph coming from %s (seed type) %s (data) contains %d nodes and %d edges', seed_method, seed_data, result.number_of_nodes(), result.number_of_edges() ) return result pybel-0.12.1/src/pybel/struct/summary/000077500000000000000000000000001334645200200176305ustar00rootroot00000000000000pybel-0.12.1/src/pybel/struct/summary/__init__.py000066400000000000000000000005501334645200200217410ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Summary functions for BEL graphs.""" from . import edge_summary, errors, node_summary, provenance from .edge_summary import * from .errors import * from .node_summary import * from .provenance import * __all__ = ( errors.__all__ + node_summary.__all__ + provenance.__all__ + edge_summary.__all__ ) pybel-0.12.1/src/pybel/struct/summary/edge_summary.py000066400000000000000000000072761334645200200226770ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Summary functions for edges in BEL graphs.""" from collections import Counter, defaultdict from ..filters.edge_predicates import edge_has_annotation from ...constants import ANNOTATIONS, RELATION __all__ = [ 'iter_annotation_value_pairs', 'iter_annotation_values', 'get_annotation_values_by_annotation', 'get_annotation_values', 'count_relations', 'get_annotations', 'count_annotations', 'get_unused_annotations', ] def iter_annotation_value_pairs(graph): """Iterate over the key/value pairs, with duplicates, for each annotation used in a BEL graph. :param pybel.BELGraph graph: A BEL graph :rtype: iter[tuple[str,str]] """ return ( (key, value) for _, _, data in graph.edges(data=True) if ANNOTATIONS in data for key, values in data[ANNOTATIONS].items() for value in values ) def iter_annotation_values(graph, annotation): """Iterate over all of the values for an annotation used in the graph. :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to grab :rtype: iter[str] """ return ( value for _, _, data in graph.edges(data=True) if edge_has_annotation(data, annotation) for value in data[ANNOTATIONS][annotation] ) def _group_dict_set(iterator): """Make a dict that accumulates the values for each key in an iterator of doubles. :param iter[tuple[A,B]] iterator: An iterator :rtype: dict[A,set[B]] """ d = defaultdict(set) for key, value in iterator: d[key].add(value) return dict(d) def get_annotation_values_by_annotation(graph): """Get the set of values for each annotation used in a BEL graph. :param pybel.BELGraph graph: A BEL graph :return: A dictionary of {annotation key: set of annotation values} :rtype: dict[str, set[str]] """ return _group_dict_set(iter_annotation_value_pairs(graph)) def get_annotation_values(graph, annotation): """Get all values for the given annotation. :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to summarize :return: A set of all annotation values :rtype: set[str] """ return set(iter_annotation_values(graph, annotation)) def count_relations(graph): """Return a histogram over all relationships in a graph. :param pybel.BELGraph graph: A BEL graph :return: A Counter from {relation type: frequency} :rtype: collections.Counter """ return Counter( data[RELATION] for _, _, data in graph.edges(data=True) ) def get_unused_annotations(graph): """Get the set of all annotations that are defined in a graph, but are never used. :param pybel.BELGraph graph: A BEL graph :return: A set of annotations :rtype: set[str] """ return graph.defined_annotation_keywords - get_annotations(graph) def get_annotations(graph): """Get the set of annotations used in the graph. :param pybel.BELGraph graph: A BEL graph :return: A set of annotation keys :rtype: set[str] """ return set(_annotation_iter_helper(graph)) def count_annotations(graph): """Count how many times each annotation is used in the graph. :param pybel.BELGraph graph: A BEL graph :return: A Counter from {annotation key: frequency} :rtype: collections.Counter """ return Counter(_annotation_iter_helper(graph)) def _annotation_iter_helper(graph): """Iterate over the annotation keys. :type graph: pybel.BELGraph :rtype: iter[str] """ return ( key for _, _, data in graph.edges(data=True) if ANNOTATIONS in data for key in data[ANNOTATIONS] ) pybel-0.12.1/src/pybel/struct/summary/errors.py000066400000000000000000000066761334645200200215350ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Summary functions for errors and warnings encountered during the compilation of BEL script.""" from collections import Counter, Iterable, defaultdict from ..filters.edge_predicates import edge_has_annotation from ...constants import ANNOTATIONS from ...parser.exc import MissingNamespaceNameWarning, MissingNamespaceRegexWarning, NakedNameWarning __all__ = [ 'get_syntax_errors', 'count_error_types', 'count_naked_names', 'get_naked_names', 'calculate_incorrect_name_dict', 'calculate_error_by_annotation', ] def get_syntax_errors(graph): """List the syntax errors encountered during compilation of a BEL script. Uses SyntaxError as a stand-in for :exc:`pybel.parser.exc.BelSyntaxError` :param pybel.BELGraph graph: A BEL graph :return: A list of 4-tuples of line number, line text, exception, and annotations present in the parser :rtype: list[tuple] """ return [ (number, line, exc, an) for number, line, exc, an in graph.warnings if isinstance(exc, SyntaxError) ] def count_error_types(graph): """Count the occurrence of each type of error in a graph. :param pybel.BELGraph graph: A BEL graph :return: A Counter of {error type: frequency} :rtype: collections.Counter """ return Counter(e.__class__.__name__ for _, _, e, _ in graph.warnings) def _naked_names_iter(graph): """Iterate over naked name warnings from a graph. :param pybel.BELGraph graph: A BEL graph :rtype: iter[NakedNameWarning] """ for _, _, e, _ in graph.warnings: if isinstance(e, NakedNameWarning): yield e.name def count_naked_names(graph): """Count the frequency of each naked name (names without namespaces). :param pybel.BELGraph graph: A BEL graph :return: A Counter from {name: frequency} :rtype: collections.Counter """ return Counter(_naked_names_iter(graph)) def get_naked_names(graph): """Get the set of naked names in the graph. :param pybel.BELGraph graph: A BEL graph :rtype: set[str] """ return set(_naked_names_iter(graph)) def _iterate_namespace_name(graph): for _, _, e, _ in graph.warnings: if not isinstance(e, (MissingNamespaceNameWarning, MissingNamespaceRegexWarning)): continue yield e.namespace, e.name def calculate_incorrect_name_dict(graph): """Get missing names grouped by namespace. :param pybel.BELGraph graph: A BEL graph :rtype: dict[str, list[str]] """ missing = defaultdict(list) for namespace, name in _iterate_namespace_name(graph): missing[namespace].append(name) return dict(missing) def calculate_error_by_annotation(graph, annotation): """Group errors by a given annotation. :param pybel.BELGraph graph: A BEL graph :param str annotation: The annotation to group errors by :return: A dictionary from the value of the given annotation to a list of error names :rtype: dict[str, list[str]] """ results = defaultdict(list) for _, _, e, context in graph.warnings: if not context or not edge_has_annotation(context, annotation): continue values = context[ANNOTATIONS][annotation] if isinstance(values, str): results[values].append(e.__class__.__name__) elif isinstance(values, Iterable): for value in values: results[value].append(e.__class__.__name__) return dict(results) pybel-0.12.1/src/pybel/struct/summary/graph_summary.py000066400000000000000000000031231334645200200230570ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Graph summary functions.""" import logging import networkx as nx log = logging.getLogger(__name__) def summary_list(graph): """Return useful information about the graph as a list of tuples. :param pybel.BELGraph graph: A BEL graph :rtype: list """ number_nodes = graph.number_of_nodes() result = [ ('nodes', number_nodes), ('edges', graph.number_of_edges()), ('network density', nx.density(graph)), ('components', nx.number_weakly_connected_components(graph)), ] try: result.append(('average degree', sum(graph.in_degree().values()) / float(number_nodes))) except ZeroDivisionError: log.info('%s has no nodes.', graph) if graph.warnings: result.append(('compilation warnings', len(graph.warnings))) return result def summary_dict(graph): """Return useful information about the graph as a dictionary. :param pybel.BELGraph graph: A BEL graph :rtype: dict """ return dict(summary_list(graph)) def summary_str(graph): """Put useful information about the graph in a string. :param pybel.BELGraph graph: A BEL graph :rtype: str """ return '\n'.join( '{}: {}'.format(statistic.capitalize(), value) for statistic, value in summary_list(graph) ) def print_summary(graph, file=None): """Print useful information about the graph. :param pybel.BELGraph graph: A BEL graph :param file: A writeable file or file-like object. If None, defaults to :data:`sys.stdout` """ print(summary_str(graph), file=file) pybel-0.12.1/src/pybel/struct/summary/node_summary.py000066400000000000000000000163711334645200200227140ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Summary functions for nodes in BEL graphs.""" from collections import Counter, defaultdict import itertools as itt from ..filters.node_predicates import has_variant from ...constants import ( ACTIVITY, EFFECT, FROM_LOC, FUSION, IDENTIFIER, KIND, LOCATION, MEMBERS, MODIFIER, NAME, NAMESPACE, OBJECT, PARTNER_3P, PARTNER_5P, SUBJECT, TO_LOC, TRANSLOCATION, VARIANTS, ) from ...dsl import Pathology __all__ = [ 'get_functions', 'count_functions', 'count_namespaces', 'get_namespaces', 'count_names_by_namespace', 'get_names_by_namespace', 'get_unused_namespaces', 'count_variants', 'get_names', 'count_pathologies', 'get_top_pathologies', 'get_top_hubs', ] def _function_iterator(graph): """Iterate over the functions in a graph. :rtype: iter[str] """ return ( node.function for node in graph ) def get_functions(graph): """Get the set of all functions used in this graph. :param pybel.BELGraph graph: A BEL graph :return: A set of functions :rtype: set[str] """ return set(_function_iterator(graph)) def count_functions(graph): """Count the frequency of each function present in a graph. :param pybel.BELGraph graph: A BEL graph :return: A Counter from {function: frequency} :rtype: collections.Counter """ return Counter(_function_iterator(graph)) def _iterate_namespaces(graph): return ( node[NAMESPACE] for node in graph if NAMESPACE in node ) def count_namespaces(graph): """Count the frequency of each namespace across all nodes (that have namespaces). :param pybel.BELGraph graph: A BEL graph :return: A Counter from {namespace: frequency} :rtype: collections.Counter """ return Counter(_iterate_namespaces(graph)) def get_namespaces(graph): """Get the set of all namespaces used in this graph. :param pybel.BELGraph graph: A BEL graph :return: A set of namespaces :rtype: set[str] """ return set(_iterate_namespaces(graph)) def get_unused_namespaces(graph): """Get the set of all namespaces that are defined in a graph, but are never used. :param pybel.BELGraph graph: A BEL graph :return: A set of namespaces that are included but not used :rtype: set[str] """ return graph.defined_namespace_keywords - get_namespaces(graph) def get_names(graph): """Get all names for each namespace. :type graph: pybel.BELGraph :rtype: dict[str,set[str]] """ rv = defaultdict(set) for namespace, name in _identifier_filtered_iterator(graph): rv[namespace].add(name) return dict(rv) def _identifier_filtered_iterator(graph): """Iterate over names in the given namespace.""" for data in graph: for pair in _get_node_names(data): yield pair for member in data.get(MEMBERS, []): for pair in _get_node_names(member): yield pair for ((_, _, data), side) in itt.product(graph.edges(data=True), (SUBJECT, OBJECT)): side_data = data.get(side) if side_data is None: continue modifier = side_data.get(MODIFIER) effect = side_data.get(EFFECT) if modifier == ACTIVITY and effect is not None and NAMESPACE in effect and NAME in effect: yield effect[NAMESPACE], effect[NAME] elif modifier == TRANSLOCATION and effect is not None: from_loc = effect.get(FROM_LOC) if NAMESPACE in from_loc and NAME in from_loc: yield from_loc[NAMESPACE], from_loc[NAME] to_loc = effect.get(TO_LOC) if NAMESPACE in to_loc and NAME in to_loc: yield to_loc[NAMESPACE], to_loc[NAME] location = side_data.get(LOCATION) if location is not None and NAMESPACE in location and NAME in location: yield location[NAMESPACE], location[NAME] def _get_node_names(data): if NAMESPACE in data: yield data[NAMESPACE], data[NAME] elif FUSION in data: yield data[FUSION][PARTNER_3P][NAMESPACE], data[FUSION][PARTNER_3P][NAME] yield data[FUSION][PARTNER_5P][NAMESPACE], data[FUSION][PARTNER_5P][NAME] if VARIANTS in data: for variant in data[VARIANTS]: identifier = variant.get(IDENTIFIER) if identifier is not None and NAMESPACE in identifier and NAME in identifier: yield identifier[NAMESPACE], identifier[NAME] def _namespace_filtered_iterator(graph, namespace): """Iterate over names in the given namespace.""" for it_namespace, name in _identifier_filtered_iterator(graph): if namespace == it_namespace: yield name def count_names_by_namespace(graph, namespace): """Get the set of all of the names in a given namespace that are in the graph. :param pybel.BELGraph graph: A BEL graph :param str namespace: A namespace keyword :return: A counter from {name: frequency} :rtype: collections.Counter :raises IndexError: if the namespace is not defined in the graph. """ if namespace not in graph.defined_namespace_keywords: raise IndexError('{} is not defined in {}'.format(namespace, graph)) return Counter(_namespace_filtered_iterator(graph, namespace)) def get_names_by_namespace(graph, namespace): """Get the set of all of the names in a given namespace that are in the graph. :param pybel.BELGraph graph: A BEL graph :param str namespace: A namespace keyword :return: A set of names belonging to the given namespace that are in the given graph :rtype: set[str] :raises IndexError: if the namespace is not defined in the graph. """ if namespace not in graph.defined_namespace_keywords: raise IndexError('{} is not defined in {}'.format(namespace, graph)) return set(_namespace_filtered_iterator(graph, namespace)) def count_variants(graph): """Count how many of each type of variant a graph has. :param pybel.BELGraph graph: A BEL graph :rtype: Counter """ return Counter( variant_data[KIND] for data in graph if has_variant(graph, data) for variant_data in data[VARIANTS] ) def get_top_hubs(graph, count=15): """Get the top hubs in the graph by BEL. :param pybel.BELGraph graph: A BEL graph :param Optional[int] count: The number of top hubs to return. If None, returns all nodes :rtype: dict[tuple,int] """ return Counter(dict(graph.degree())).most_common(count) def _pathology_iterator(graph): """Iterate over edges in which either the source or target is a pathology node. :param pybel.BELGraph graph: A BEL graph :rtype: iter """ for node in itt.chain.from_iterable(graph.edges()): if isinstance(node, Pathology): yield node def count_pathologies(graph): """Count the number of edges in which each pathology is incident. :param pybel.BELGraph graph: A BEL graph :rtype: Counter """ return Counter(_pathology_iterator(graph)) def get_top_pathologies(graph, count=15): """Get the top highest relationship-having edges in the graph by BEL. :param pybel.BELGraph graph: A BEL graph :param Optional[int] count: :rtype: dict[tuple,int] """ return count_pathologies(graph).most_common(count) pybel-0.12.1/src/pybel/struct/summary/provenance.py000066400000000000000000000027271334645200200223520ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Summary functions for citation and provenance information in BEL graphs.""" from ..filters.edge_predicates import has_pubmed from ...constants import CITATION, CITATION_REFERENCE, CITATION_TYPE __all__ = [ 'iterate_pubmed_identifiers', 'get_pubmed_identifiers', 'count_citations', ] def iterate_pubmed_identifiers(graph): """Iterate over all PubMed identifiers in a graph. :param pybel.BELGraph graph: A BEL graph :return: An iterator over the PubMed identifiers in the graph :rtype: iter[str] """ return ( data[CITATION][CITATION_REFERENCE].strip() for _, _, data in graph.edges(data=True) if has_pubmed(data) ) def get_pubmed_identifiers(graph): """Get the set of all PubMed identifiers cited in the construction of a graph. :param pybel.BELGraph graph: A BEL graph :return: A set of all PubMed identifiers cited in the construction of this graph :rtype: set[str] """ return set(iterate_pubmed_identifiers(graph)) def _get_citation_pair(data): return data[CITATION][CITATION_TYPE], data[CITATION][CITATION_REFERENCE].strip() def count_citations(graph): """Return the number of unique citations. :param pybel.BELGraph graph: A BEL graph :return: The number of unique citations in the graph. :rtype: int """ return len({ _get_citation_pair(data) for _, _, data in graph.edges(data=True) if CITATION in data }) pybel-0.12.1/src/pybel/struct/utils.py000066400000000000000000000022611334645200200176460ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for :mod:`pybel.struct`.""" import networkx as nx __all__ = [ 'update_metadata', 'update_node_helper', ] def update_metadata(source, target): """Update the namespace and annotation metadata in the target graph. :param pybel.BELGraph source: :param pybel.BELGraph target: """ target.namespace_url.update(source.namespace_url) target.namespace_pattern.update(source.namespace_pattern) target.annotation_url.update(source.annotation_url) target.annotation_pattern.update(source.annotation_pattern) for keyword, values in source.annotation_list.items(): if keyword not in target.annotation_list: target.annotation_list[keyword] = values else: for value in values: target.annotation_list[keyword].add(value) def update_node_helper(source, target): """Update the nodes' data dictionaries in the target graph from the source graph. :param nx.Graph source: The universe of all knowledge :param nx.Graph target: The target BEL graph """ for node in target: if node in source: target.nodes[node].update(source.nodes[node]) pybel-0.12.1/src/pybel/testing/000077500000000000000000000000001334645200200162645ustar00rootroot00000000000000pybel-0.12.1/src/pybel/testing/__init__.py000066400000000000000000000000741334645200200203760ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Testing utilities for PyBEL.""" pybel-0.12.1/src/pybel/testing/cases.py000066400000000000000000000051451334645200200177410ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test cases for PyBEL testing.""" import logging import os import tempfile import unittest from ..manager import Manager __all__ = [ 'TEST_CONNECTION', 'TemporaryCacheMixin', 'TemporaryCacheClsMixin', 'FleetingTemporaryCacheMixin', ] log = logging.getLogger(__name__) TEST_CONNECTION = os.environ.get('PYBEL_TEST_CONNECTION') class TemporaryCacheMixin(unittest.TestCase): """A test case that has a connection and a manager that is created for each test function.""" def setUp(self): """Set up the test function with a connection and manager.""" if TEST_CONNECTION: self.connection = TEST_CONNECTION else: self.fd, self.path = tempfile.mkstemp() self.connection = 'sqlite:///' + self.path log.info('Test generated connection string %s', self.connection) self.manager = Manager(connection=self.connection) self.manager.create_all() def tearDown(self): """Tear down the test functing by closing the session and removing the database.""" self.manager.session.close() if not TEST_CONNECTION: os.close(self.fd) os.remove(self.path) else: self.manager.drop_all() class TemporaryCacheClsMixin(unittest.TestCase): """A test case that has a connection and a manager that is created for each test class.""" fd, path, manager = None, None, None @classmethod def setUpClass(cls): """Set up the test class with a connection and manager.""" if TEST_CONNECTION: cls.connection = TEST_CONNECTION else: cls.fd, cls.path = tempfile.mkstemp() cls.connection = 'sqlite:///' + cls.path log.info('Test generated connection string %s', cls.connection) cls.manager = Manager(connection=cls.connection) cls.manager.create_all() @classmethod def tearDownClass(cls): """Tear down the test class by closing the session and removing the database.""" cls.manager.session.close() if not TEST_CONNECTION: os.close(cls.fd) os.remove(cls.path) else: cls.manager.drop_all() class FleetingTemporaryCacheMixin(TemporaryCacheClsMixin): """A test case that clears the database before each function.""" def setUp(self): """Set up the function by clearing the database.""" super(FleetingTemporaryCacheMixin, self).setUp() self.manager.drop_networks() self.manager.drop_edges() self.manager.drop_nodes() self.manager.drop_namespaces() pybel-0.12.1/src/pybel/testing/constants.py000066400000000000000000000032121334645200200206500ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Testing resources for PyBEL.""" import os from pathlib import Path __all__ = [ # BELNS 'test_ns_1', 'test_ns_2', 'test_ns_nocache', 'test_ns_nocache_path', 'test_ns_empty', # BELANNO 'test_an_1', # BEL 'test_bel_simple', 'test_bel_slushy', 'test_bel_thorough', 'test_bel_isolated', 'test_bel_misordered', 'test_bel_no_identifier_valiation', # JSON 'test_jgif_path', ] HERE = os.path.dirname(os.path.realpath(__file__)) resources_dir = os.path.join(HERE, 'resources') # BELNS Files belns_dir_path = os.path.join(resources_dir, 'belns') test_ns_1 = os.path.join(belns_dir_path, 'test_ns_1.belns') test_ns_2 = os.path.join(belns_dir_path, 'test_ns_1_updated.belns') test_ns_nocache = os.path.join(belns_dir_path, 'test_nocache.belns') test_ns_nocache_path = Path(test_ns_nocache).as_uri() test_ns_empty = os.path.join(belns_dir_path, 'test_ns_empty.belns') # BELANNO Files belanno_dir_path = os.path.join(resources_dir, 'belanno') test_an_1 = os.path.join(belanno_dir_path, 'test_an_1.belanno') # BEL Files bel_dir_path = os.path.join(resources_dir, 'bel') test_bel_simple = os.path.join(bel_dir_path, 'test_bel.bel') test_bel_slushy = os.path.join(bel_dir_path, 'slushy.bel') test_bel_thorough = os.path.join(bel_dir_path, 'thorough.bel') test_bel_isolated = os.path.join(bel_dir_path, 'isolated.bel') test_bel_misordered = os.path.join(bel_dir_path, 'misordered.bel') test_bel_no_identifier_valiation = os.path.join(bel_dir_path, 'no_identifier_validation_test.bel') # JSON Files test_jgif_path = os.path.join(bel_dir_path, 'Cytotoxic T-cell Signaling-2.0-Hs.json') pybel-0.12.1/src/pybel/testing/generate.py000066400000000000000000000016621334645200200204350ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for PyBEL testing.""" import itertools as itt from random import shuffle from .utils import n from ..dsl import protein from ..struct import BELGraph __all__ = [ 'generate_random_graph', ] def generate_random_graph(n_nodes, n_edges, namespace='NS'): """Generate a sub-graph with random nodes and edges. :param int n_nodes: Number of nodes to make :param int n_edges: Number of edges to make :param str namespace: The namespace of the nodes to use :rtype: pybel.BELGraph """ graph = BELGraph() nodes = [ protein(namespace=namespace, name=str(i)) for i in range(1, n_nodes) ] # TODO is there a better way to randomly sample without replacement from an iterator? edges = list(itt.combinations(nodes, r=2)) shuffle(edges) for u, v in edges[:n_edges]: graph.add_increases(u, v, citation=n(), evidence=n()) return graph pybel-0.12.1/src/pybel/testing/mock_manager.py000066400000000000000000000036271334645200200212710ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Mocks for PyBEL testing.""" from ..manager.models import Network from ..struct import union class MockQueryManager: """A mock manager.""" def __init__(self, graphs=None): """Build a mock manager appropriate for testing the pipeline and query builders. :param Optional[list[pybel.BELGraph]] graphs: A list of BEL graphs to index """ self.graphs = [] #: A lookup for nodes from the node hash (string) to the node tuple self.hash_to_node = {} #: A lookup from network identifier to graph self.id_graph = {} if graphs is not None: for graph in graphs: self.insert_graph(graph) def count_networks(self): """Count networks in the manager. :rtype: int """ return len(self.graphs) def insert_graph(self, graph): """Insert a graph and ensure its nodes are cached. :param pybel.BELGraph graph: :rtype: Network """ network_id = len(self.graphs) self.graphs.append(graph) self.id_graph[network_id] = graph for node in graph: self.hash_to_node[node.sha512] = node return Network(id=network_id) def get_graph_by_ids(self, network_ids): """Get a graph from the union of multiple networks. :param iter[int] network_ids: The identifiers of networks in the database :rtype: pybel.BELGraph """ network_ids = list(network_ids) if len(network_ids) == 1: return self.id_graph[network_ids[0]] graphs = [ self.id_graph[graph_id] for graph_id in network_ids ] return union(graphs) def get_dsl_by_hash(self, sha512): """Get a DSL by its hash. :param str sha512: :rtype: Optional[BaseEntity] """ return self.hash_to_node.get(sha512) pybel-0.12.1/src/pybel/testing/mocks.py000066400000000000000000000053221334645200200177540ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Mocks for PyBEL testing.""" import itertools as itt import os from .constants import bel_dir_path, belanno_dir_path, belns_dir_path from .utils import get_uri_name try: from unittest import mock except ImportError: import mock __all__ = [ 'MockResponse', 'MockSession', 'mock_bel_resources', ] _responses = [ ('go-cellular-component-20170511.belns', os.path.join(belns_dir_path, 'hgnc-human-genes.belns')), ('hgnc-human-genes-20170725.belns', os.path.join(belns_dir_path, 'hgnc-human-genes.belns')), ('chebi-20170725.belns', os.path.join(belns_dir_path, 'chebi.belns')), ('go-biological-process-20170725.belns', os.path.join(belns_dir_path, 'go-biological-process.belns')), ('species-taxonomy-id-20170511.belanno', os.path.join(belanno_dir_path, 'species-taxonomy-id.belanno')), ('confidence-1.0.0.belanno', os.path.join(belanno_dir_path, 'confidence-1.0.0.belanno')), ] class MockResponse: """See http://stackoverflow.com/questions/15753390/python-mock-requests-and-the-response.""" def __init__(self, mock_url): """Build a mock for the requests Response object. :param str mock_url: The real URL to mock. """ _r = [ ('.belns', os.path.join(belns_dir_path, get_uri_name(mock_url))), ('.belanno', os.path.join(belanno_dir_path, get_uri_name(mock_url))), ('.bel', os.path.join(bel_dir_path, get_uri_name(mock_url))), ] self.path = None for suffix, path in itt.chain(_responses, _r): if mock_url.endswith(suffix): self.path = path break if self.path is None: raise ValueError('missing file') if not os.path.exists(self.path): raise ValueError("file doesn't exist: {}".format(self.path)) def iter_lines(self): """Iterate the lines of the mock file.""" with open(self.path, 'rb') as file: lines = list(file) for line in lines: yield line def raise_for_status(self): """Mock raising an error, by not doing anything at all.""" class MockSession: """Patches the session object so requests can be redirected through the filesystem without rewriting BEL files.""" def mount(self, prefix, adapter): """Mock mounting an adapter by not doing anything.""" @staticmethod def get(url): """Mock getting a URL by returning a mock response. :param str url: The URL to mock get """ return MockResponse(url) def close(self): """Mock closing a connection by not doing anything.""" mock_bel_resources = mock.patch('pybel.resources.utils.requests.Session', side_effect=MockSession) pybel-0.12.1/src/pybel/testing/resources/000077500000000000000000000000001334645200200202765ustar00rootroot00000000000000pybel-0.12.1/src/pybel/testing/resources/bel/000077500000000000000000000000001334645200200210405ustar00rootroot00000000000000pybel-0.12.1/src/pybel/testing/resources/bel/Cytotoxic T-cell Signaling-2.0-Hs.json000077500000000000000000015225701334645200200276010ustar00rootroot00000000000000{ "graph": { "directed": false, "type": "BEL-V1.0", "label": "Cytotoxic T-cell Signaling-2.0-Hs", "metadata": { "description": "The Cytotoxic T-cell Signaling network depicts the causal mechanisms that are activated in CD8+ cytotoxic T-cells following T-cell receptor (TCR) ligation. Expanding on these processes, the network highlights the chemokines secreted by macrophages and dendritic cells, as well as the cognate T-cell receptors, involved in mediating T-cell recruitment to compromised lung tissue during COPD development.", "species_common_name": "Human", "version": "2.0", "boundary_conditions": "The human model sets represent early COPD (GOLD stage I and II) processes. If supporting literature from early COPD studies were not available, stage-independent COPD studies were used. If COPD studies were not found, we expanded our inclusion criteria to studies from healthy context and incorporated mechanisms active in processes implicated in COPD into the disease models. Literature describing processes active in acute exacerbation in COPD patients was excluded from supporting edges of the network models. We prioritized data collected from studies of lung and COPD-relevant cell types, but excluded literature related to asthma or bronchitis. Human-specific connections were preferred, but rat and mouse were also included where human data was not available." }, "nodes": [ { "id": "p(HGNC:CXCR6)", "label": "p(HGNC:CXCR6)", "metadata": { "coordinate": [ 0.4865359758724688, 0.6775492383816824 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c3405149d" } }, { "id": "p(HGNC:IL15RA)", "label": "p(HGNC:IL15RA)", "metadata": { "coordinate": [ 0.14885825075398534, 0.46895313864364024 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c3405149e" } }, { "id": "cat(p(EGID:21577))", "label": "cat(p(EGID:21577))", "metadata": { "coordinate": [ 0.5867083153813012, 0.26035703890559814 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c3405149f" } }, { "id": "bp(GOBP:\"lymphocyte chemotaxis\")", "label": "bp(GOBP:\"lymphocyte chemotaxis\")", "metadata": { "coordinate": [ 0.5333907798362775, 0.9545454545454545 ], "bel_function_type": "biologicalProcess", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a0" } }, { "id": "cat(p(HGNC:CD28))", "label": "cat(p(HGNC:CD28))", "metadata": { "coordinate": [ 0.8979965532098234, 0.33360822741825946 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a1" } }, { "id": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "label": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "metadata": { "coordinate": [ 0.8053640672124085, 0.49805957116522753 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a2" } }, { "id": "p(HGNC:IL2RG)", "label": "p(HGNC:IL2RG)", "metadata": { "coordinate": [ 0.378810732801822, 0.73519519238509 ], "bel_function_type": "proteinAbundance", "createdBy": "mberra", "nodeId": "5477834189e36203806fca53" } }, { "id": "cat(p(HGNC:IL15RA))", "label": "cat(p(HGNC:IL15RA))", "metadata": { "coordinate": [ 0.10200344679017666, 0.5698554380518095 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a4" } }, { "id": "cat(p(MGI:Il2rg))", "label": "cat(p(MGI:Il2rg))", "metadata": { "coordinate": [ 0.045454545454545456, 0.4451828854176773 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a5" } }, { "id": "p(HGNC:ZAP70)", "label": "p(HGNC:ZAP70)", "metadata": { "coordinate": [ 0.6341016803102112, 0.45973610167847095 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a6" } }, { "id": "kin(p(HGNC:FYN))", "label": "kin(p(HGNC:FYN))", "metadata": { "coordinate": [ 0.7854373115036622, 0.6135150868341903 ], "bel_function_type": "kinaseActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a7" } }, { "id": "a(SCHEM:Calcium)", "label": "a(SCHEM:Calcium)", "metadata": { "coordinate": [ 0.25764756570443775, 0.26278257494906376 ], "bel_function_type": "abundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a8" } }, { "id": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "label": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "metadata": { "coordinate": [ 0.7644334338647135, 0.31856990394877266 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514a9" } }, { "id": "complex(SCOMP:\"T Cell Receptor Complex\")", "label": "complex(SCOMP:\"T Cell Receptor Complex\")", "metadata": { "coordinate": [ 0.8619129685480397, 0.17982924226254002 ], "bel_function_type": "complexAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514aa" } }, { "id": "bp(GOBP:\"T cell activation\")", "label": "bp(GOBP:\"T cell activation\")", "metadata": { "coordinate": [ 0.4294485135717363, 0.2370718928883283 ], "bel_function_type": "biologicalProcess", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514ab" } }, { "id": "p(HGNC:CCL3)", "label": "p(HGNC:CCL3)", "metadata": { "coordinate": [ 0.12785437311503664, 0.7260599592509944 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514ac" } }, { "id": "cat(p(HGNC:IL2RB))", "label": "cat(p(HGNC:IL2RB))", "metadata": { "coordinate": [ 0.3303532959931064, 0.4286892403221112 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514ad" } }, { "id": "p(HGNC:PLCG1,pmod(P,Y))", "label": "p(HGNC:PLCG1,pmod(P,Y))", "metadata": { "coordinate": [ 0.53877638948729, 0.4214126321917144 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514ae" } }, { "id": "p(HGNC:FASLG)", "label": "p(HGNC:FASLG)", "metadata": { "coordinate": [ 0.42137009909521755, 0.045454545454545456 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514af" } }, { "id": "p(HGNC:IDO1)", "label": "p(HGNC:IDO1)", "metadata": { "coordinate": [ 0.37451529513140885, 0.15945473949742892 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b0" } }, { "id": "cat(p(HGNC:CCR5))", "label": "cat(p(HGNC:CCR5))", "metadata": { "coordinate": [ 0.2059457130547178, 0.8007664693897352 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b1" } }, { "id": "p(HGNC:IL2)", "label": "p(HGNC:IL2)", "metadata": { "coordinate": [ 0.7138087031451961, 0.227854855923159 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b2" } }, { "id": "complex(p(HGNC:CD8A),p(HGNC:CD8B))", "label": "complex(p(HGNC:CD8A),p(HGNC:CD8B))", "metadata": { "coordinate": [ 0.9545454545454546, 0.6489279130687882 ], "bel_function_type": "complexAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b3" } }, { "id": "cat(p(HGNC:PLCG1))", "label": "cat(p(HGNC:PLCG1))", "metadata": { "coordinate": [ 0.36859112451529513, 0.33457844183564567 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b4" } }, { "id": "phos(complex(SCOMP:\"Calcineurin Complex\"))", "label": "phos(complex(SCOMP:\"Calcineurin Complex\"))", "metadata": { "coordinate": [ 0.2571090047393365, 0.1662462404191326 ], "bel_function_type": "phosphataseActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b5" } }, { "id": "p(HGNC:PLCG1)", "label": "p(HGNC:PLCG1)", "metadata": { "coordinate": [ 0.24149073675140026, 0.35640826622683613 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b6" } }, { "id": "p(HGNC:BCL2)", "label": "p(HGNC:BCL2)", "metadata": { "coordinate": [ 0.6265618267987937, 0.10512273212379936 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b7" } }, { "id": "p(HGNC:CCR3)", "label": "p(HGNC:CCR3)", "metadata": { "coordinate": [ 0.3799009047824214, 0.8366644028330261 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b8" } }, { "id": "p(HGNC:IL15)", "label": "p(HGNC:IL15)", "metadata": { "coordinate": [ 0.2177940542869453, 0.5189191811390317 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514b9" } }, { "id": "p(HGNC:IL2RB)", "label": "p(HGNC:IL2RB)", "metadata": { "coordinate": [ 0.4326798793623438, 0.4471233142524498 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514ba" } }, { "id": "cat(p(HGNC:CXCR6))", "label": "cat(p(HGNC:CXCR6))", "metadata": { "coordinate": [ 0.5113097802671263, 0.5887746191908412 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514bb" } }, { "id": "kin(p(HGNC:ZAP70))", "label": "kin(p(HGNC:ZAP70))", "metadata": { "coordinate": [ 0.6513356311934512, 0.3709614824876298 ], "bel_function_type": "kinaseActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514bc" } }, { "id": "p(HGNC:CD28)", "label": "p(HGNC:CD28)", "metadata": { "coordinate": [ 0.9448513571736321, 0.42238284660910064 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514bd" } }, { "id": "path(SDIS:\"Cytotoxic T-cell activation\")", "label": "path(SDIS:\"Cytotoxic T-cell activation\")", "metadata": { "coordinate": [ 0.510771219302025, 0.1085184825846512 ], "bel_function_type": "pathology", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514be" } }, { "id": "cat(p(HGNC:CXCR3))", "label": "cat(p(HGNC:CXCR3))", "metadata": { "coordinate": [ 0.6238690219732874, 0.864315513728534 ], "bel_function_type": "catalyticActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514bf" } }, { "id": "p(HGNC:FYN)", "label": "p(HGNC:FYN)", "metadata": { "coordinate": [ 0.8813011632916847, 0.5645192587561851 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c0" } }, { "id": "p(HGNC:CXCL16)", "label": "p(HGNC:CXCL16)", "metadata": { "coordinate": [ 0.41006031882809135, 0.5630639371301057 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c1" } }, { "id": "p(HGNC:CCR5)", "label": "p(HGNC:CCR5)", "metadata": { "coordinate": [ 0.30557949159844894, 0.7250897448336082 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c2" } }, { "id": "p(HGNC:LCK)", "label": "p(HGNC:LCK)", "metadata": { "coordinate": [ 0.6502585092632487, 0.6547491995731056 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c3" } }, { "id": "p(SFAM:\"Chemokine Receptor Family\")", "label": "p(SFAM:\"Chemokine Receptor Family\")", "metadata": { "coordinate": [ 0.4224472210254201, 0.7498302124769574 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c4" } }, { "id": "p(HGNC:CXCL9)", "label": "p(HGNC:CXCL9)", "metadata": { "coordinate": [ 0.7138087031451961, 0.7934898612593383 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c5" } }, { "id": "path(SDIS:\"T-cell migration\")", "label": "path(SDIS:\"T-cell migration\")", "metadata": { "coordinate": [ 0.2565704437742352, 0.6275831958862909 ], "bel_function_type": "pathology", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c6" } }, { "id": "kin(p(HGNC:LCK))", "label": "kin(p(HGNC:LCK))", "metadata": { "coordinate": [ 0.6766479965532098, 0.5499660424953915 ], "bel_function_type": "kinaseActivity", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c7" } }, { "id": "p(HGNC:CXCR3)", "label": "p(HGNC:CXCR3)", "metadata": { "coordinate": [ 0.5355450236966824, 0.7866983603376346 ], "bel_function_type": "proteinAbundance", "createdBy": "selventa", "nodeId": "524b3517d3fbfd4c340514c8" } }, { "id": "p(HGNC:FOXO3)", "label": "p(HGNC:FOXO3)", "metadata": { "coordinate": [ 0.629630565936505, 0.242379491330301 ], "bel_function_type": "proteinAbundance", "createdBy": "edwardsanders", "nodeId": "52d177f3bf21ca0758e0b194" } }, { "id": "bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "label": "bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "metadata": { "coordinate": [ 0.191419861368565, 0.394219729301622 ], "bel_function_type": "biologicalProcess", "createdBy": "edwardsanders", "nodeId": "52d177f3bf21ca0758e0b196" } }, { "id": "a(CHEBI:acrolein)", "label": "a(CHEBI:acrolein)", "metadata": { "coordinate": [ 0.227447056317444, 0.830349611039436 ], "bel_function_type": "abundance", "createdBy": "edwardsanders", "nodeId": "52e8181cbf21ca0b1807399d" } }, { "id": "p(HGNC:IDO2)", "label": "p(HGNC:IDO2)", "metadata": { "coordinate": [ 0.732145296284996, 0.568265673037742 ], "bel_function_type": "proteinAbundance", "createdBy": "csauco", "nodeId": "548b0b2d89e3620fe090276a" } }, { "id": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "metadata": { "coordinate": [ 0.905759461180195, 0.893530480514062 ], "bel_function_type": "pathology", "createdBy": "ganna.androsova", "nodeId": "5538c4a689e362097c008f87" } }, { "id": "p(HGNC:IFNG)", "label": "p(HGNC:IFNG)", "metadata": { "coordinate": [ 0.341815324659373, 0.719730175901079 ], "bel_function_type": "proteinAbundance", "createdBy": "ganna.androsova", "nodeId": "5538c4a689e362097c008f89" } }, { "id": "p(HGNC:TNFRSF4)", "label": "p(HGNC:TNFRSF4)", "metadata": { "coordinate": [ 0.419530366742765, 0.0419037756705209 ], "bel_function_type": "proteinAbundance", "createdBy": "ganna.androsova", "nodeId": "5538c59389e362097c010f28" } }, { "id": "p(HGNC:CTLA4)", "label": "p(HGNC:CTLA4)", "metadata": { "coordinate": [ 0.113417695329253, 0.486434059909747 ], "bel_function_type": "proteinAbundance", "createdBy": "ganna.androsova", "nodeId": "5538dd3389e362097c020e84" } }, { "id": "p(HGNC:GZMA)", "label": "p(HGNC:GZMA)", "metadata": { "coordinate": [ 0.109244544109909, 0.622299612323893 ], "bel_function_type": "proteinAbundance", "createdBy": "ganna.androsova", "nodeId": "5538ddd889e362097c02512d" } }, { "id": "p(HGNC:PRF1)", "label": "p(HGNC:PRF1)", "metadata": { "coordinate": [ 0.530325354789535, 0.247669438946838 ], "bel_function_type": "proteinAbundance", "createdBy": "ganna.androsova", "nodeId": "5538decb89e362097c025641" } }, { "id": "p(HGNC:TNF)", "label": "p(HGNC:TNF)", "metadata": { "coordinate": [ 0.857154278949441, 0.997572151477249 ], "bel_function_type": "proteinAbundance", "createdBy": "ganna.androsova", "nodeId": "5538e40e89e362097c02f11c" } } ], "edges": [ { "source": "kin(p(HGNC:FYN))", "relation": "increases", "target": "kin(p(HGNC:LCK))", "directed": false, "label": "kin(p(HGNC:FYN)) increases kin(p(HGNC:LCK))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405147f", "evidences": [ { "bel_statement": "kin(p(HGNC:FYN)) increases kin(p(HGNC:LCK))", "summary_text": "Fyn plays an essential role by positive regulation of Lck activity.", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2004 Oct 12 101(41) 14859-64", "id": "15465914" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead1" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "naive T-cell", "tissue": "" } } ] } }, { "source": "kin(p(HGNC:ZAP70))", "relation": "directlyIncreases", "target": "p(HGNC:PLCG1,pmod(P,Y))", "directed": false, "label": "kin(p(HGNC:ZAP70)) directlyIncreases p(HGNC:PLCG1,pmod(P,Y))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051499", "evidences": [ { "bel_statement": "kin(p(HGNC:ZAP70)) directlyIncreases p(HGNC:PLCG1,pmod(P,Y))", "summary_text": "dual phosphorylation by ZAP70 and Itk triggers the activation of PLCg1", "citation": { "type": "PubMed", "name": "Mol Immunol 2002 Jun 38(15) 1087-99", "id": "12044776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf7" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "complex(p(HGNC:CD8A),p(HGNC:CD8B))", "relation": "actsIn", "target": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "directed": false, "label": "complex(p(HGNC:CD8A),p(HGNC:CD8B)) actsIn cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405147d", "evidences": [ { "bel_statement": "complex(p(HGNC:CD8A),p(HGNC:CD8B)) actsIn cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eafa" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "a(SCHEM:Calcium)", "relation": "directlyIncreases", "target": "phos(complex(SCOMP:\"Calcineurin Complex\"))", "directed": false, "label": "a(SCHEM:Calcium) directlyIncreases phos(complex(SCOMP:\"Calcineurin Complex\"))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405147b", "evidences": [ { "bel_statement": "a(SCHEM:Calcium) directlyIncreases phos(complex(SCOMP:\"Calcineurin Complex\"))", "summary_text": "NMDA-mediated influx of calcium led to activated of the calcium-dependent phosphatase calcineurin and the subsequent dephosphorylation and activation of the protein-tyrosine phosphatase STEP", "citation": { "type": "PubMed", "name": "Nat Neurosci 2003 Jan 6(1) 34-42", "id": "12483215" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb04" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "neuron", "tissue": "" } } ] } }, { "source": "cat(p(EGID:21577))", "relation": "increases", "target": "bp(GOBP:\"T cell activation\")", "directed": false, "label": "cat(p(EGID:21577)) increases bp(GOBP:\"T cell activation\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051473", "evidences": [ { "bel_statement": "cat(p(EGID:21577)) increases bp(GOBP:\"T cell activation\")", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb30" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:CCR3)", "relation": " isA", "target": "p(SFAM:\"Chemokine Receptor Family\")", "directed": false, "label": "p(HGNC:CCR3) isA p(SFAM:\"Chemokine Receptor Family\")", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051491", "evidences": [ ] } }, { "source": "cat(p(HGNC:IL2RB))", "relation": "decreases", "target": "bp(GOBP:\"T cell activation\")", "directed": false, "label": "cat(p(HGNC:IL2RB)) decreases bp(GOBP:\"T cell activation\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051485", "evidences": [ { "bel_statement": "cat(p(HGNC:IL2RB)) decreases bp(GOBP:\"T cell activation\")", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb39" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:CCR5)", "relation": " isA", "target": "p(SFAM:\"Chemokine Receptor Family\")", "directed": false, "label": "p(HGNC:CCR5) isA p(SFAM:\"Chemokine Receptor Family\")", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051492", "evidences": [ ] } }, { "source": "p(HGNC:BCL2)", "relation": "increases", "target": "path(SDIS:\"Cytotoxic T-cell activation\")", "directed": false, "label": "p(HGNC:BCL2) increases path(SDIS:\"Cytotoxic T-cell activation\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051498", "evidences": [ { "bel_statement": "p(HGNC:BCL2) increases path(SDIS:\"Cytotoxic T-cell activation\")", "summary_text": "While production of IL-2 is confined to periods of lymphocyte activation, the constitutive expression of IL-15 maintains the homeostatic proliferation of lymphocytes, most notably memory CD8 T cells, in the steady-state through the sustained expression of bcl-2 (10–13). ", "citation": { "type": "PubMed", "name": "J Immunol 2011 Jan 1 186(1) 174-82", "id": "21098221" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb60" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "CD8+ T-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:IL15)", "relation": "increases", "target": "path(SDIS:\"T-cell migration\")", "directed": false, "label": "p(HGNC:IL15) increases path(SDIS:\"T-cell migration\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051487", "evidences": [ { "bel_statement": "p(HGNC:IL15) increases path(SDIS:\"T-cell migration\")", "summary_text": "In this paper, we show that localized increases in the homeostatic cytokine IL-15 induced by influenza infection is responsible for the migration of CD8 effector T cells to the site of infection", "citation": { "type": "PubMed", "name": "J Immunol 2011 Jan 1 186(1) 174-82", "id": "21098221" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb66" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "CD8+ T-cell", "tissue": "" } } ] } }, { "source": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "relation": "increases", "target": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "directed": false, "label": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B))) increases cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051481", "evidences": [ { "bel_statement": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B))) increases cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb6a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "path(SDIS:\"Cytotoxic T-cell activation\")", "relation": " isA", "target": "bp(GOBP:\"T cell activation\")", "directed": false, "label": "path(SDIS:\"Cytotoxic T-cell activation\") isA bp(GOBP:\"T cell activation\")", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405149b", "evidences": [ ] } }, { "source": "path(SDIS:\"Cytotoxic T-cell activation\")", "relation": "increases", "target": "p(HGNC:FASLG)", "directed": false, "label": "path(SDIS:\"Cytotoxic T-cell activation\") increases p(HGNC:FASLG)", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051497", "evidences": [ { "bel_statement": "path(SDIS:\"Cytotoxic T-cell activation\") increases p(HGNC:FASLG)", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb9b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "relation": "increases", "target": "kin(p(HGNC:ZAP70))", "directed": false, "label": "cat(complex(SCOMP:\"T Cell Receptor Complex\")) increases kin(p(HGNC:ZAP70))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405148b", "evidences": [ { "bel_statement": "cat(complex(SCOMP:\"T Cell Receptor Complex\")) increases kin(p(HGNC:ZAP70))", "summary_text": "We show that Crry increases early TCR-dependent activation signals, including p56lck-, zeta-associated protein-70 (ZAP-70), Vav-1, Akt, and extracellular signal-regulated kinase (ERK) phosphorylation but also costimulation-dependent mitogen-activated protein kinases (MAPK), such as the stress-activated c-Jun N-terminal kinase (JNK). It is intriguing that Crry costimulus enhanced p38 MAPK activation in T helper cell type 1 (Th1) but not in Th2 cells. ", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2005 Dec 78(6) 1386-96", "id": "16301324" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb3a" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "cat(complex(SCOMP:\"T Cell Receptor Complex\")) increases kin(p(HGNC:ZAP70))", "summary_text": "HIP-55 interacted with ZAP-70, a critical protein-tyrosine kinase in TCR signaling, and this interaction was induced by TCR signaling.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Dec 26 278(52) 52195-202", "id": "14557276" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "cat(complex(SCOMP:\"T Cell Receptor Complex\")) increases kin(p(HGNC:ZAP70))", "summary_text": "This redistribution brings VHR into the vicinity of the triggered TCRs, where VHR is phosphorylated at Tyr138 by ZAP-70. We found that this phosphorylation is required for the function of VHR as an inhibitor of the Erk2 and Jnk MAPKs", "citation": { "type": "PubMed", "name": "Nat Immunol 2003 Jan 4(1) 44-8", "id": "12447358" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb22" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:CXCR6)", "relation": " isA", "target": "p(SFAM:\"Chemokine Receptor Family\")", "directed": false, "label": "p(HGNC:CXCR6) isA p(SFAM:\"Chemokine Receptor Family\")", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051493", "evidences": [ ] } }, { "source": "kin(p(HGNC:LCK))", "relation": "directlyIncreases", "target": "p(HGNC:PLCG1,pmod(P,Y))", "directed": false, "label": "kin(p(HGNC:LCK)) directlyIncreases p(HGNC:PLCG1,pmod(P,Y))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405147e", "evidences": [ { "bel_statement": "kin(p(HGNC:LCK)) directlyIncreases p(HGNC:PLCG1,pmod(P,Y))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebcb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "relation": "increases", "target": "kin(p(HGNC:LCK))", "directed": false, "label": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B))) increases kin(p(HGNC:LCK))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051480", "evidences": [ { "bel_statement": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B))) increases kin(p(HGNC:LCK))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IL15)", "relation": "directlyIncreases", "target": "cat(p(HGNC:IL15RA))", "directed": false, "label": "p(HGNC:IL15) directlyIncreases cat(p(HGNC:IL15RA))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051488", "evidences": [ { "bel_statement": "p(HGNC:IL15) directlyIncreases cat(p(HGNC:IL15RA))", "summary_text": "IL-15 is a common gamma chain cytokine sharing overlapping signaling and biological properties with IL-2 as a result of their mutual usage of the IL-2/15b and common gamma chain (gc) receptor subunits (7,8).", "citation": { "type": "PubMed", "name": "J Immunol 2011 Jan 1 186(1) 174-82", "id": "21098221" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "CD8+ T-cell", "tissue": "" } } ] } }, { "source": "cat(p(HGNC:CD28))", "relation": "increases", "target": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "directed": false, "label": "cat(p(HGNC:CD28)) increases cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051476", "evidences": [ { "bel_statement": "cat(p(HGNC:CD28)) increases cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec0c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:CXCR3)", "relation": " isA", "target": "p(SFAM:\"Chemokine Receptor Family\")", "directed": false, "label": "p(HGNC:CXCR3) isA p(SFAM:\"Chemokine Receptor Family\")", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051494", "evidences": [ ] } }, { "source": "p(HGNC:CXCR3)", "relation": "actsIn", "target": "cat(p(HGNC:CXCR3))", "directed": false, "label": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051490", "evidences": [ { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "The chemokine receptor CXCR3 is critical for the function of activated T cells. We studied the molecular mechanisms of CXCR3 signalling. The addition of CXCR3 ligands to normal human T cells expressing CXCR3 led to the tyrosine phosphorylation of multiple proteins. Addition of the same ligands to Jurkat T cells engineered to express CXCR3 induced tyrosine phosphorylation of proteins with molecular weights similar to those in normal cells. Immunoblotting with phosphotyrosine-specific antibodies identified Zeta-associated protein of 70,000 molecular weight (ZAP-70), linker for the activation of T cells (LAT), and phospholipase-C-gamma1 (PLCgamma1) to be among the proteins that become phosphorylated upon CXCR3 activation. ZAP-70 was phosphorylated on tyrosine 319, LAT on tyrosines 171 and 191, and PLCgamma1 on tyrosine 783", "citation": { "type": "PubMed", "name": "Immunology 2007 Apr 120(4) 467-85", "id": "17250586" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec10" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "By using a CXCR3 ligand reporter mouse, we found that stromal cells predominately expressed the chemokine ligand CXCL9 whereas hematopoietic cells expressed CXCL10 in lymph nodes (LNs). Dendritic cell (DC)-derived CXCL10 facilitated T cell-DC interactions in LNs during T cell priming while both chemokines guided intranodal positioning of CD4(+) T cells to interfollicular and medullary zones. Thus, different chemokines acting on the same receptor can function locally to facilitate DC-T cell interactions and globally to influence intranodal positioning, and both functions contribute to Th1 cell differentiation.", "citation": { "type": "PubMed", "name": "Immunity 2012 Dec 14 37(6) 1091-103", "id": "23123063" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb9d" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "CXCR3, CXCR4, CXCR5, and CCR6 were detected on human chondrocytes. CXCR3 and CXCR4 expression was increased in exponentially growing chondrocyte subcultures. Ligands of all receptors enhanced the release of MMPs 1, 3, and 13. Release of NAG and cathepsin B was significantly higher in chemokine-stimulated cultures than in unstimulated cultures.", "citation": { "type": "PubMed", "name": "Arthritis Rheum 2004 Jan 50(1) 112-22", "id": "14730607" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe5" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "chondrocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "Numerous studies have shown that immature human and mouse blood- and bone marrow-derived DC subsets express a panel of inflammatory chemokine receptors (CCR1-6,8,9, CXCR3,4, CX3CR1) [Table 1 and reviewed in (1-5)]. [Table 1 Chemokine receptors expressed by DC and the functional outcome of receptor ligation}]", "citation": { "type": "PubMed", "name": "Clin Lab Med 2008 Sep 28(3) 375-84, v", "id": "19028258" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "Chemokine receptors signal through Galphai2 proteins to activate DOCK2 (dedicator of cytokinesis 2) and other guanine nucleotide exchange factors (GEFs), leading to the activation of RAC1 and RHOA. ", "citation": { "type": "PubMed", "name": "Nat Rev Immunol 2009 Sep 9(9) 630-44", "id": "19696767" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb9e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "b-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "the CXC chemokines Mig and interferon-? inducible protein 10 can be expressed and bound by endothelium stimulated with interferon-? and tumor necrosis factor (TNF) ?, and can induce the firm adhesion of T lymphocyte in shear flow via CXCR3.", "citation": { "type": "PubMed", "name": "J Mol Med 2003 Jan 81(1) 4-19", "id": "12545245" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec05" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Endothelial Cells", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "supernatants harvested from stimulated PMN induced migration and rapid integrin-dependent adhesion of CXCR3-expressing lymphocytes; these activities were significantly reduced by neutralizing anti-MIG and anti-IP-10 Abs,", "citation": { "type": "PubMed", "name": "J Immunol 1999 Apr 15 162(8) 4928-37", "id": "10202039" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebfc" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "the chemokine Mig, a ligand for CXCR3, activates the small GTPases RhoA and Rac1, induces a reorganization of the actin cytoskeleton, and triggers cell chemotaxis and modulation of integrin VLA-5- and VLA-4-dependent cell adhesion to fibronectin.", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Nov 30 276(48) 45098-105", "id": "11571298" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb42" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "neutralization of CXCR3 reduced MIG/CXCL9-induced T lymphocyte proliferation and the number of IFN-gamma-positive spots", "citation": { "type": "PubMed", "name": "J Immunol 2004 Jun 15 172(12) 7417-24", "id": "15187119" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb2f" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "The ELR-negative CXC chemokines CXCL9, CXCL10, and CXCL11) are potent chemoattractants for mononuclear cells and act through their shared receptor, CXCR3.", "citation": { "type": "PubMed", "name": "J Immunol 2003 Nov 1 171(9) 4844-52", "id": "14568964" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb1b" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "There is increased secretion of Cxcr3-activating chemokines in COPD airways", "citation": { "type": "PubMed", "name": "Pharmacol Rev 2004 Dec 56(4) 515-48", "id": "15602009" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb0d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "Table 1. Lymphoid chemokine receptors", "citation": { "type": "PubMed", "name": "Trends Immunol 2004 Feb 25(2) 67-74", "id": "15102365" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eadf" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR3) actsIn cat(p(HGNC:CXCR3))", "summary_text": "Activation of CXCR3 induces chemotactic responses to I-TAC and reorganization reorganization of the actin cytoskeleton in human airway epithelial cells (20).", "citation": { "type": "PubMed", "name": "Am J Physiol Cell Physiol 2006 Jul 291(1) C34-9", "id": "16467404" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Epithelial Cells", "tissue": "" } } ] } }, { "source": "p(HGNC:IL15)", "relation": "directlyIncreases", "target": "cat(p(HGNC:IL2RB))", "directed": false, "label": "p(HGNC:IL15) directlyIncreases cat(p(HGNC:IL2RB))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051489", "evidences": [ { "bel_statement": "p(HGNC:IL15) directlyIncreases cat(p(HGNC:IL2RB))", "summary_text": "Considering these points in the context of IL-2- and IL-15-dependent signaling under physiological conditions, the lack of a cytoplasmic domain for IL-2R alpha and trans-presentation of IL-15 by IL-15Ralpha to CD122 and gamma C results in a qualitative identical utilization of signaling pathways associated with CD122 and gamma C. High levels of IL-2Ralpha provide a mean for continual capture of IL-2 to sustain signaling whereas limiting IL-15Ralpha tempers engagement of CD122 and gamma C, limiting signal transduction. Thus, varied levels of IL-2Ralpha and IL-15Ralpha provide a simple, yet powerful, mechanism to quantify and tune signaling through common intermediates leading to distinctive biological responses.", "citation": { "type": "PubMed", "name": "J Immunol 2012 May 1 188(9) 4149-57", "id": "22447977" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb3" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:IL15) directlyIncreases cat(p(HGNC:IL2RB))", "summary_text": "Interleukin 15 (IL-15) is a 14-15 kDa polypeptide that belongs to the 4 alpha-helix-bundle family of cytokines and was originally discovered due to its T cell proliferative activity. It utilizes the signal-transducing beta/gamma polypeptides of the IL-2 receptor complex, thus sharing many biological activities with IL-2,", "citation": { "type": "PubMed", "name": "Arch Immunol Ther Exp (Warsz) 2000 48(6) 457-64", "id": "11197599" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec11" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } } ] } }, { "source": "cat(p(EGID:21577))", "relation": " isA", "target": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "directed": false, "label": "cat(p(EGID:21577)) isA cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405148c", "evidences": [ ] } }, { "source": "phos(complex(SCOMP:\"Calcineurin Complex\"))", "relation": "increases", "target": "bp(GOBP:\"T cell activation\")", "directed": false, "label": "phos(complex(SCOMP:\"Calcineurin Complex\")) increases bp(GOBP:\"T cell activation\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051474", "evidences": [ { "bel_statement": "phos(complex(SCOMP:\"Calcineurin Complex\")) increases bp(GOBP:\"T cell activation\")", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec1e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IL2)", "relation": "increases", "target": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "directed": false, "label": "p(HGNC:IL2) increases cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405148d", "evidences": [ { "bel_statement": "p(HGNC:IL2) increases cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec2d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "cat(p(HGNC:PLCG1))", "relation": "increases", "target": "a(SCHEM:Calcium)", "directed": false, "label": "cat(p(HGNC:PLCG1)) increases a(SCHEM:Calcium)", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051483", "evidences": [ { "bel_statement": "cat(p(HGNC:PLCG1)) increases a(SCHEM:Calcium)", "summary_text": "In this study we demonstrate that acute addition of monomeric IgE elicits a wide spectrum of responses in the rat basophilic leukemia-2H3 mast cell line, including activation of phospholipases Cgamma and D, a rise in cytosol Ca(2+), NFAT translocation, degranulation, and membrane ruffling within minutes. Calcium transients persist for hours as long as IgE is present resulting in the maintained translocation of the transcription factor NFAT to the nucleus", "citation": { "type": "PubMed", "name": "J Immunol 2004 Apr 1 172(7) 4048-58", "id": "15034016" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae4" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "cat(p(HGNC:PLCG1)) increases a(SCHEM:Calcium)", "summary_text": "Interestingly, the depletion of Grb2 from HEK-293 cells by RNA interference significantly enhanced increased EGF-induced PLC-gamma1 enzymatic activity and mobilization of the intracellular Ca2+, while it did not affect EGF-induced tyrosine phosphorylation of PLC-gamma1.", "citation": { "type": "PubMed", "name": "Cell Signal 2005 Oct 17(10) 1289-99", "id": "16038803" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec36" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "cat(p(HGNC:PLCG1)) increases a(SCHEM:Calcium)", "summary_text": "Reconstitution of deficient mast cells with Vav1 restored normal tyrosine phosphorylation of PLCgamma1 and PLCgamma2 and calcium responses. Thus, Vav1 is essential to FcepsilonRI-mediated activation of PLCgamma and calcium mobilization in mast cells. ", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Jun 21(11) 3763-74", "id": "11340169" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec2e" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "cat(p(HGNC:PLCG1)) increases a(SCHEM:Calcium)", "summary_text": "activation of c-src was reported to mediate VEGF signaling through PLC-gamma, leading to inositol 1,4,5-trisphosphate formation and calcium mobilization", "citation": { "type": "PubMed", "name": "Am J Physiol Cell Physiol 2001 Jun 280(6) C1375-86", "id": "11350732" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "cat(p(HGNC:PLCG1)) increases a(SCHEM:Calcium)", "summary_text": "Fig.1", "citation": { "type": "PubMed", "name": "Science 2004 Nov 26 306(5701) 1506-7", "id": "15567848" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb29" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "cat(p(HGNC:CXCR3))", "relation": "increases", "target": "bp(GOBP:\"lymphocyte chemotaxis\")", "directed": false, "label": "cat(p(HGNC:CXCR3)) increases bp(GOBP:\"lymphocyte chemotaxis\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405148e", "evidences": [ { "bel_statement": "cat(p(HGNC:CXCR3)) increases bp(GOBP:\"lymphocyte chemotaxis\")", "summary_text": "neutralization of CXCR3 reduced MIG/CXCL9-induced T lymphocyte proliferation and the number of IFN-gamma-positive spots", "citation": { "type": "PubMed", "name": "J Immunol 2004 Jun 15 172(12) 7417-24", "id": "15187119" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec39" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "cat(p(HGNC:CXCR3)) increases bp(GOBP:\"lymphocyte chemotaxis\")", "summary_text": "supernatants harvested from stimulated PMN induced migration and rapid integrin-dependent adhesion of CXCR3-expressing lymphocytes; these activities were significantly reduced by neutralizing anti-MIG and anti-IP-10 Abs,", "citation": { "type": "PubMed", "name": "J Immunol 1999 Apr 15 162(8) 4928-37", "id": "10202039" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb40" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:IL2RG)", "relation": "actsIn", "target": "cat(p(MGI:Il2rg))", "directed": false, "label": "p(HGNC:IL2RG) actsIn cat(p(MGI:Il2rg))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051479", "evidences": [ { "bel_statement": "p(HGNC:IL2RG) actsIn cat(p(MGI:Il2rg))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec3f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:PLCG1)", "relation": "actsIn", "target": "cat(p(HGNC:PLCG1))", "directed": false, "label": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405147a", "evidences": [ { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "The gene whose expression correlated most strongly with lack of invasion was identified as a potential invasion suppressor and called prostin-1. Pharmacological inhibition of PLC gamma (U73122) confirmed that PLC gamma signaling suppressed prostin-1 in that U73122 treatment caused induction of prostin-1 in PLC gamma competent cells. The prostin-1 gene, conserved through phylogeny, is induced by androgen in LNCaP cells and encodes a 92 amino acid protein. The protein shares no extensive homologies with other known genes, yet was recently identified as a small stabilizer subunit of the dolichol-phosphate-mannose (DPM) synthase complex. ***Did not curate*** line below because COS cells are monkey That DPM3/prostin-1 might suppress tumor progression was supported by the finding that exogenous expression in COS cells leads to apoptosis. These findings support the use of model cell lines to identify putative tumor suppressors and promoters.", "citation": { "type": "PubMed", "name": "Oncogene 2001 May 17 20(22) 2781-90", "id": "11420690" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec4f" }, "experiment_context": { "species_common_name": "Human", "disease": "Prostatic Neoplasms", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Induction of germline C epsilon transcripts in DND39 cells by IL-4 required at least two distinct signaling cascades. One was mediated by enhancement of tyrosine phosphorylation of a 57 kd protein associated with phospholipase C-gamma 1 (PLC-gamma 1) that resulted in PLC-gamma 1 activation, inositol lipid hydrolysis, and protein kinase C delta translocation. The other was dependent on phosphatidylinositol 3-kinase, whose activation induced protein kinase C zeta translocation.", "citation": { "type": "PubMed", "name": "J Allergy Clin Immunol 1995 Dec 96(6 Pt 2) 1145-51", "id": "8543771" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec2c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "b-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "\tPLC gamma is phosphorylated by activated FGFR, resulting in PLC gamma activation, stimulation of phosphatidyl inositol hydrolysis and generation of two second messengers, diacylglycerol and inositol (1,4,5) P3. Tyrosine phosphorylation of PLCgamma by FGFR4 is weaker than that seen by other isoforms of FGFR.Three tyrosine residues in PLC gamma have been identified as sites of receptor tyrosine kinase phosphorylation. Mutagenesis indicates that the phosphorylation at Tyr 783 is essential for IP3 formation, phosphorylation of Tyr 771 is dispensable, and phosphorylation of Tyr 1254 is necessary to achieve maximal IP3 formation. ", "citation": { "type": "PubMed", "name": "Cytokine Growth Factor Rev 2005 Apr 16(2) 139-49", "id": "15863030" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec00" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Intracellular calcium signaling is regulated by PLC?, which generates inositol 1,4,5-trisphosphate and diacylglycerol from PIP2. Inositol 1,4,5-trisphosphate stimulates the release of intracellular calcium stores upon binding to its receptor in the ER. The depletion of ER calcium stores then triggers extracellular calcium influx. Diacylglycerol and intracellular calcium signals cooperate to activate PKCs, which then activate other pathways such as the NF-?B pathway, ultimately leading to mast cell degranulation and cytokine production.", "citation": { "type": "PubMed", "name": "J Biol Chem 2011 Sep 23 286(38) 32891-7", "id": "21799019" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "A phospholipase C inhibitor, U73122, abrogated Rap1 activation triggered by both the TCR and SDF-1 (CXCL12). PLC-gamma1-deficient Jurkat T cells showed a marked reduction of TCR-triggered Rap1 activation and adhesion to intercellular adhesion molecule-1 (ICAM-1) mediated by LFA-1. In contrast, SDF-1-triggered Rap1 activation and adhesion were not affected in these cells. Transfection of these cells with an expression plasmid encoding PLC-gamma1 restored Rap1 activation by the TCR and the ability to adhere to ICAM-1, accompanied by polarized LFA-1 surface clustering colocalized with regulator of adhesion and polarization enriched in lymphoid tissues (RAPL).", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Mar 19 279(12) 11875-81", "id": "14702343" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Despite extensive overlap in the molecules recruited to the active receptors, there is some preferential modulation of signalling pathways. Tumour cells that express EGFR with kinase-domain mutations preferentially activate the pro-survival PI3K?AKT and signal transducer and activator of transcription (STAT) pathways67. Although EGFR has no consensus sequence for the p85 adaptor subunit of PI3K, it couples to this pathway through GAB1, which binds growth-factorreceptor- bound protein 2 (GRB2).", "citation": { "type": "PubMed", "name": "Nat Rev Cancer 2005 May 5(5) 341-54", "id": "15864276" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "In this study we demonstrate that acute addition of monomeric IgE elicits a wide spectrum of responses in the rat basophilic leukemia-2H3 mast cell line, including activation of phospholipases Cgamma and D, a rise in cytosol Ca(2+), NFAT translocation, degranulation, and membrane ruffling within minutes. Calcium transients persist for hours as long as IgE is present resulting in the maintained translocation of the transcription factor NFAT to the nucleus", "citation": { "type": "PubMed", "name": "J Immunol 2004 Apr 1 172(7) 4048-58", "id": "15034016" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebca" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "E2-stimulated ERK required ER in breast cancer and endothelial cells and was substantially prevented by expression of a dominant negative EGFR or by tyrphostin AG1478, a specific inhibitor for EGFR tyrosine kinase activity. Transactivation/phosphorylation of EGFR by E2 was dependent on the rapid liberation of heparin-binding EGF (HB-EGF) from cultured MCF-7 cells and was blocked by antibodies to this ligand for EGFR. Expression of dominant negative mini-genes for Galpha(q) and Galpha(i) blocked E2-induced, EGFR-dependent ERK activation, and Gbetagamma also contributed. G protein activation led to activation of matrix metalloproteinases (MMP)-2 and -9. This resulted from Src-induced MMP activation, implicated using PP2 (Src family kinase inhibitor) or the expression of a dominant negative Src protein.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Jan 24 278(4) 2701-12", "id": "12421825" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb7" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "By substituting these tyrosine residues in LAT with phenylalanine and by utilizing phosphorylated peptides derived from these sites, we mapped the tyrosine residues in LAT required for the direct interaction and activation of Vav, p85/p110alpha and phospholipase Cgamma1 (PLCgamma1). Our results indicate that Tyr(226) and Tyr(191) are required for Vav binding, whereas Tyr(171) and Tyr(132) are necessary for association and activation of phosphoinositide 3-kinase activity and PLCgamma1 respectively. ", "citation": { "type": "PubMed", "name": "Biochem J 2001 Jun 1 356(Pt 2) 461-71", "id": "11368773" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb8b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "natural killer cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "# Ariadne: Ly-GDI is phosphorylated on tyrosine residues following T cell receptor stimulation, and it associates with the Src homology 2 region of an adapter protein, Shc. [Regulation] Ly-GDI is phosphorylated on tyrosine residues following T cell receptor stimulation, and it associates with the Src homology 2 region of an adapter protein, Shc. In addition, the interaction between Ly-GDI and Vav1 requires tyrosine phosphorylation. Overexpression of Ly-GDI alone is inhibitory to NFAT stimulation and calcium mobilization. However, when co-expressed with Vav1, Ly-GDI enhances Vav1 induction of NFAT activation, phospholipase Cgamma phosphorylation, and calcium mobilization.", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Dec 20 277(51) 50121-30", "id": "12386169" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb51" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Mutation of Lys650-->Glu in the activation loop of the FGFR3 kinase domain causes the lethal human skeletal disorder thanatophoric dysplasia type II (TDII) and is also found in patients with multiple myeloma, bladder and cervical carcinomas. This mutation leads to constitutive activation of FGFR3. We show that the kinase domains of FGFR1, FGFR3, and FGFR4 containing the activation loop mutation, when targeted to the plasma membrane by a myristylation signal, can transform NIH3T3 cells and induce neurite outgrowth in PC12 cells. Phosphorylation of Shp2, PLC-gamma, and MAPK was also stimulated by all three 'TDII-like' FGFR derivatives. Additionally, activation of Stat1 and Stat3 was observed in cells expressing the activated FGFR derivatives. Finally, we demonstrate that FGFR1, FGFR3, and FGFR4 derivatives can stimulate PI-3 kinase activity. ", "citation": { "type": "PubMed", "name": "Oncogene 2000 Jul 6 19(29) 3309-20", "id": "10918587" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb3d" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "We analyzed here the possible role of the tyrosine 769 in KGFR, corresponding to tyrosine 766 in FGFR1, in the regulation of KGFR signal transduction and MAPK activation as well as in the control of the endocytic process of KGFR. A mutant KGFR in which tyrosine 769 was substituted by phenylalanine was generated and transfected in NIH3T3 and HeLa cells. Our results indicate that tyrosine 769 is required for the binding to KGFR and tyrosine phosphorylation of PLCgamma as well as for the full activation of MAPKs and for cell proliferation through the regulation of FRS2 tyrosine phosphorylation", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2005 Feb 11 327(2) 523-32", "id": "15629145" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb2b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Many angiogenic growth factors, including bFGF and VEGF, stimulate endothelial cell invasion (51) through transmembrane protein kinase receptor dimerization and phosphorylation (11, 53), leading to the activation of phospholipase Cg (PLCg) and Ras, both of which ultimately result in activation of cPLA2", "citation": { "type": "PubMed", "name": "FASEB J 2004 Mar 18(3) 568-70", "id": "14715700" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec3e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "From full text: in null cells the expression of FIC or JE, respectively, is severely compromised in null cells treated with EGF or PDGF. In contrast, each growth factor provokes a similar increase in mRNA levels when added to null+ cells. These data indicated that PLC-?1 is an essential signaling component for both EGFand PDGF-dependent expression of these mRNAs.", "citation": { "type": "PubMed", "name": "Exp Cell Res 2006 Apr 1 312(6) 807-16", "id": "16427622" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec25" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "This Syk-mediated signal amplification results in a direct or indirect activation of several proteins, including linker for activation of T cells (LAT), Vav, phospholipase C-?1 (PLC-?1), and PLC-?2.", "citation": { "type": "PubMed", "name": "J Lipids 2011 2011 752906", "id": "21490812" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebfa" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Further analysis showed that tyrosine 1006 is responsible for phospholipase Cgamma1 (PLCgamma1) activation and intracellular calcium release in endothelial cells. Activation of PLCgamma1 was selectively mediated by tyrosine 1006.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 May 2 278(18) 16347-55", "id": "12598525" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebec" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "In this study, we have identified the Rac-GAP beta2-chimaerin as an effector of the epidermal growth factor receptor (EGFR) via coupling to phospholipase Cgamma (PLCgamma) and generation of the lipid second messenger diacylglycerol (DAG).", "citation": { "type": "PubMed", "name": "EMBO J 2006 May 17 25(10) 2062-74", "id": "16628218" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc5" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "These results indicate that continuous stretch-induced IL-6 secretion in HUVECs depends on outside-in signaling via integrins followed by a PI3-K-PLC-gamma-PKC-IKK-NF-kappaB signaling cascade.", "citation": { "type": "PubMed", "name": "Am J Physiol Cell Physiol 2005 May 288(5) C1012-22", "id": "15613495" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebae" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "umbilical vein endothelial cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Table 1 Docking rules for adaptors on EGFR cytoplasmic tails, as established by coarse-grained molecular docking modeling simulations", "citation": { "type": "PubMed", "name": "BMC Syst Biol 2010 4 57", "id": "20459599" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba1" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "GIT1 interaction with PLCgamma is required for PLCgamma activation based on inhibition of tyrosine phosphorylation and calcium mobilization after GIT1 knockdown with antisense GIT1 oligonucleotides.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Dec 12 278(50) 49936-44", "id": "14523024" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "CD40 lacks intrinsic catalytic activity, but the cytoplasmic domain of CD40 has two binding sites for TRAF...TRAFs 1, 2, 3, 5, and 6 have been found in association with CD40, and these adaptors couple CD40 to the phosphoinositide 3-kinase (PI3K), phospholipase Cγ (PLC-γ), mitogen-activated protein kinase (MAPK-ERK, p38, and JNK), and nuclear factor κB (NF-κB) signaling pathways", "citation": { "type": "PubMed", "name": "Sci STKE 2004 Jun 15 2004(237) pe25", "id": "15199223" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb9f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "The phosphatidylinositol 4,5-bisphosphate (PIP(2)) hydrolyzing activity of PLC-gamma1 was substantially increased in the presence of purified tubulin in vitro, whereas the activity was not promoted by bovine serum albumin, suggesting that beta-tubulin activates PLC-gamma1.", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Feb 25 280(8) 6897-905", "id": "15579910" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb83" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "The P-1 domain mediates a constitutive interaction of SLP-76 with the SH3 domain of PLC-gamma1 and is required for TCR-mediated activation of Erk, PLC-gamma1, and NFAT (nuclear factor of activated T cells).", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Jul 21(13) 4208-18", "id": "11390650" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb7b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Phospholipase Cgamma1 is expressed ubiquitously, especially in the brain, thymus and lungs. PLCgamma1 can be activated by receptor tyrosine kinases (i.e.: PDGFR, EGFR, FGFR, Trk), as well as non-receptor protein kinases (Src, Syk, Tec)", "citation": { "type": "PubMed", "name": "Postepy Hig Med Dosw (Online) 2011 65 470-7", "id": "21918248" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb78" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "The FGFR-1 mediates activation of protein kinase C (PKC) through direct binding and activation of phospholipase C-gamma (PLC-gamma)", "citation": { "type": "PubMed", "name": "Oncogene 1999 Jun 3 18(22) 3354-64", "id": "10362356" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb72" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Endothelial Cells", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Activation of FGF receptors can activate multiple signal transduction pathways including the phospholipase Cgamma, phosphatidyl inositol 3-kinase, mitogen-activated protein kinase and signal transducers and activators of transcription (STAT) pathways", "citation": { "type": "PubMed", "name": "Endocr Relat Cancer 2004 Dec 11(4) 709-24", "id": "15613447" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb68" }, "experiment_context": { "species_common_name": "Human", "disease": "Prostatic Neoplasms", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "PLC-gamma 1 contains three tyrosine phosphorylation sites, which have been identified as residues 771, 783 and 1254. Phosphorylation of tyrosine residues is sufficient to increase the catalytic activity of PLC-gamma 1,", "citation": { "type": "PubMed", "name": "Ciba Found Symp 1992 164 223-33; discussion 233-9", "id": "1395933" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb45" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "CD38 dimerization induced tyrosine phosphorylation of the protein kinase syk and increased syk kinase activity. CD38 dimerization also induced tyrosine phosphorylation of phospholipase C-gamma and of the p85 subunit of phosphatidylinositol 3-kinase (PI 3-K)", "citation": { "type": "PubMed", "name": "J Immunol 1996 Jan 1 156(1) 100-7", "id": "8598449" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb21" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Through its phosphorylated tyrosine residues, the activated VEGFR-2 associates with the adapter molecules Shc, Grb2 and Nck, to Ras GTPase activating protein, p59fyn, pp62yes and phospholipase Cg, and to the tyrosine phosphatases SHP-1 and SHP-2", "citation": { "type": "PubMed", "name": "EMBO J 1999 Feb 15 18(4) 882-92", "id": "10022831" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eafd" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 1997 Dec 19 272(51) 32411-8", "id": "9405450" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec52" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Collectively, these data suggest that the EGF receptor triggers activation of Rap2B via PLC-gamma1 activation and tyrosine phosphorylation of RasGRP3 by c-Src, finally resulting in stimulation of PLC-epsilon.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2004 Jun 24(11) 4664-76", "id": "15143162" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec4a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "dual phosphorylation by ZAP70 and Itk triggers the activation of PLCg1", "citation": { "type": "PubMed", "name": "Mol Immunol 2002 Jun 38(15) 1087-99", "id": "12044776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec47" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "Cell 1989 Jun 30 57(7) 1101-7", "id": "2472218" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec2f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Phosphorylation of Tyr-783, which is essential for lipase activation, was observed in all stimulated cell types examined. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Jul 30 279(31) 32181-90", "id": "15161916" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec0d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "Oncogene 2003 Apr 17 22(15) 2248-59", "id": "12700661" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec0b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Based on these results, we propose that CD148 negatively regulates TCR signaling by interfering with the phosphorylation and function of PLCgamma1 and LAT.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Apr 21(7) 2393-403", "id": "11259588" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec08" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Aug 1 278(31) 29208-15", "id": "12738795" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec07" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Jun 21(11) 3763-74", "id": "11340169" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebfe" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 1999 Jul 16 274(29) 20421-4", "id": "10400667" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebfb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Our results suggest that gelsolin modulates bradykinin-mediated PLD activation via suppression of PLC and PKC activities but did not affect S1P-mediated PLD activation.", "citation": { "type": "PubMed", "name": "J Biol Chem 1999 Sep 24 274(39) 27385-91", "id": "10488069" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Fig.1", "citation": { "type": "PubMed", "name": "Science 2004 Nov 26 306(5701) 1506-7", "id": "15567848" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Table 2 | The effects on mast-cell function of gene knockout or knockdown of key signalling molecules", "citation": { "type": "PubMed", "name": "Nat Rev Immunol 2006 Mar 6(3) 218-30", "id": "16470226" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf1" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Monitoring of the intracellular levels of inositol phosphates in c-Src-as1 SNF cells revealed a moderate (~30%), but statistically significant, decrease in PLC{gamma} activity when c-Src was inhibited", "citation": { "type": "PubMed", "name": "Mol Biol Cell 2005 Nov 16(11) 5418-32", "id": "16135530" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe1" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Phosphorylated Y1175 creates a binding site for phospholipase Cgamma1 (PLC-gamma1) and Shb. Activation of PLC-gamma1 and Shb regulates VEGF-A-dependent cell proliferation and cell migration, respectively.", "citation": { "type": "PubMed", "name": "J Atheroscler Thromb 2006 Jun 13(3) 130-5", "id": "16835467" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Exp Med 2004 Mar 15 199(6) 785-95", "id": "15007095" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc2" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Recently, we reported that phospholipase Cgamma1 (PLC-gamma1) binds to and regulates TRPC3 channels, components of agonist-induced Ca2+ entry into cells. ", "citation": { "type": "PubMed", "name": "Nature 2005 Mar 3 434(7029) 99-104", "id": "15744307" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Upon the stimulation of growth factors and hormones, PLC-gamma1 is rapidly phosphorylated at three known sites; Tyr771, Tyr783 and Tyr1254 and its enzymatic activity is up-regulated.", "citation": { "type": "PubMed", "name": "Cell Signal 2005 Oct 17(10) 1289-99", "id": "16038803" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb9a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "the activation of Erk and phospholipase C (PLC)-g observed following stimulation of HeLa cells with a low dose of EGF was accelerated and potentiated following co-treatment with a low dose of GA (Fig 2E).", "citation": { "type": "PubMed", "name": "EMBO Rep 2004 Dec 5(12) 1165-70", "id": "15568014" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb97" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Adhesion to fibronectin induced PLC-gamma1 tyrosine phosphorylation that was inhibited by a Src-kinase inhibitor", "citation": { "type": "PubMed", "name": "J Cell Sci 2005 Feb 1 118(Pt 3) 601-10", "id": "15657076" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb7d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Fibroblasts", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "In CHO cells expressing the wild-type B2 receptor, bradykinin-induced transient recruitment and activation of PLCgamma1.", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2005 Jan 28 326(4) 894-900", "id": "15607753" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb71" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "NGF induces prolonged activation of the Shc/MAP kinase pathway and phospholipase Cgamma compared with PDGF-BB.", "citation": { "type": "PubMed", "name": "Arterioscler Thromb Vasc Biol 1999 Apr 19(4) 1041-50", "id": "10195934" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb6e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "Muscle, Smooth" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Phospholipase C gamma 1 (PLC gamma 1) and p21ras guanosine triphosphatase (GTPase) activating protein (GAP) bind to and are phosphorylated by activated growth factor receptors", "citation": { "type": "PubMed", "name": "Science 1990 Nov 16 250(4983) 979-82", "id": "2173144" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb4b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "some studies have reported activation of PLC-gamma in VEGFR1 expressing cells", "citation": { "type": "PubMed", "name": "Am J Physiol Cell Physiol 2001 Jun 280(6) C1375-86", "id": "11350732" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb2d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 1999 Nov 12 274(46) 33057-63", "id": "10551875" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb27" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "These findings suggest that protein kinase C phosphorylates PI-PLC, resulting in a decrease in PI-PLC activity", "citation": { "type": "PubMed", "name": "Biochim Biophys Acta 1994 Nov 10 1224(2) 302-10", "id": "7981246" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb24" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Adipocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 1998 May 29 273(22) 13808-18", "id": "9593725" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb13" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Reconstitution of deficient mast cells with Vav1 restored normal tyrosine phosphorylation of PLCgamma1 and PLCgamma2 and calcium responses. Thus, Vav1 is essential to FcepsilonRI-mediated activation of PLCgamma and calcium mobilization in mast cells. ", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Jun 21(11) 3763-74", "id": "11340169" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb11" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Figure 1 | The ErbB signalling network.", "citation": { "type": "PubMed", "name": "Nat Rev Mol Cell Biol 2001 Feb 2(2) 127-37", "id": "11252954" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "Phospholipase C-gamma1 (PLC-gamma1) is a lipase that hydrolyzes PIP2 to generate two second messengers, IP3 and DAG.", "citation": { "type": "PubMed", "name": "Mol Cells 1999 Dec 31 9(6) 631-7", "id": "10672930" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1) actsIn cat(p(HGNC:PLCG1))", "summary_text": "PLC? hydrolyzes phosphatidylinositol-4,5-biphosphate to produce inositol-1,4,5-triphosphate (IP3) and diacylglycerol (DAG). ", "citation": { "type": "PubMed", "name": "BMC Genomics 2009 10 233", "id": "19450280" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eadc" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "CD4+ T-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:CXCR6)", "relation": "actsIn", "target": "cat(p(HGNC:CXCR6))", "directed": false, "label": "p(HGNC:CXCR6) actsIn cat(p(HGNC:CXCR6))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405148f", "evidences": [ { "bel_statement": "p(HGNC:CXCR6) actsIn cat(p(HGNC:CXCR6))", "summary_text": "Exposure of HASMC to CXCL16 increased NF-kappa B DNA binding activity, induced kappa B-driven luciferase activity, and up-regulated tumor necrosis factor-alpha expression in an NF-kappa B-dependent manner. However, treatment with pertussis toxin (G(i) inhibitor), wortmannin or LY294002 (phosphatidylinositol 3-kinase (PI3K inhibitors)), or Akt inhibitor or overexpression of dominant-negative (dn) PI3K gamma, dnPDK-1, kinase-dead (kd) Akt, kdIKK-beta, dnIKK-gamma, dnI kappa B-alpha, or dnI kappa B-beta significantly attenuated CXCL16-induced NF-kappa B activation.", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Jan 30 279(5) 3188-96", "id": "14625285" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec20" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR6) actsIn cat(p(HGNC:CXCR6))", "summary_text": "Chemokine receptors signal through Galphai2 proteins to activate DOCK2 (dedicator of cytokinesis 2) and other guanine nucleotide exchange factors (GEFs), leading to the activation of RAC1 and RHOA. ", "citation": { "type": "PubMed", "name": "Nat Rev Immunol 2009 Sep 9(9) 630-44", "id": "19696767" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec55" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "b-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCR6) actsIn cat(p(HGNC:CXCR6))", "summary_text": "Table 1. Lymphoid chemokine receptors", "citation": { "type": "PubMed", "name": "Trends Immunol 2004 Feb 25(2) 67-74", "id": "15102365" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead7" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "T-cell", "tissue": "" } } ] } }, { "source": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B)))", "relation": "increases", "target": "kin(p(HGNC:FYN))", "directed": false, "label": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B))) increases kin(p(HGNC:FYN))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051482", "evidences": [ { "bel_statement": "cat(complex(p(HGNC:CD8A),p(HGNC:CD8B))) increases kin(p(HGNC:FYN))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec56" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IL2RB)", "relation": "actsIn", "target": "cat(p(HGNC:IL2RB))", "directed": false, "label": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051471", "evidences": [ { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "Considering these points in the context of IL-2- and IL-15-dependent signaling under physiological conditions, the lack of a cytoplasmic domain for IL-2R alpha and trans-presentation of IL-15 by IL-15Ralpha to CD122 and gamma C results in a qualitative identical utilization of signaling pathways associated with CD122 and gamma C. High levels of IL-2Ralpha provide a mean for continual capture of IL-2 to sustain signaling whereas limiting IL-15Ralpha tempers engagement of CD122 and gamma C, limiting signal transduction. Thus, varied levels of IL-2Ralpha and IL-15Ralpha provide a simple, yet powerful, mechanism to quantify and tune signaling through common intermediates leading to distinctive biological responses.", "citation": { "type": "PubMed", "name": "J Immunol 2012 May 1 188(9) 4149-57", "id": "22447977" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebdc" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "Stat5 proteins activated by IL-2. The phosphorylated tyrosines on IL-2Rbeta can then serve as docking sites for signaling molecules that otherwise cannot associate with IL-2Rbeta, including the adaptor protein Shc, Stat5a, and Stat5b (Figure 2). For example, only phosphorylated (but not non-phosphorylated) peptides spanning either Tyr-392 or Tyr-510 of IL-2Rbeta can efficiently compete with IL-2-induced Stat5 DNA binding to a GAS motif IL-2-mediated hetero-dimerization of its receptor triggers a rapid increase in the recruitment of Jak3 and activation of both Jak1 and Jak3 (Johnston et al., 1994; Witthuhn et al., 1994). These kinases phosphorylate the receptor as well as each other, and activate other signaling molecules associated with the receptor. The phosphorylated tyrosines on IL-2Rbeta can then serve as docking sites for signaling molecules that otherwise cannot associate with IL-2Rbeta, including the adaptor protein Shc, Stat5a, and Stat5b", "citation": { "type": "PubMed", "name": "Oncogene 2000 May 15 19(21) 2566-76", "id": "10851055" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb54" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "We show that Gab2 was transiently phosphorylated by tyrosine in human mycosis fungoides (MF) tumor T cells upon IL-2 stimulation and that SHP2 as well as Stat5a associated inducibly with Gab2. IL-15, but not IL-4, also induced tyrosine phosphorylation of Gab2, suggesting that the IL-2 receptor beta-chain is important for IL-2-induced Gab2 phosphorylation. ", "citation": { "type": "PubMed", "name": "Exp Clin Immunogenet 2001 18(2) 86-95", "id": "11340297" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec57" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "the association of lyn with IL-2Rbeta was markedly elevated by IL-2 stimulation. Furthermore the activity of lyn kinase, evaluated by an in vitro kinase assay with enolase as a substrate, increased following IL-2 stimulation.", "citation": { "type": "PubMed", "name": "Immunobiology 2000 Nov 202(4) 363-82", "id": "11131153" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebcd" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "Moreover, the finding that at least a small proportion of the p53/56'yn kinase in F7 cells can be coimmunoprecipitated by using a mAb specific for the p75/IL-2Rj3 molecule preliminarily suggests that the lyn kinase may receive its activation signals directly from the IL-2R (Fig. 4).", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 1992 Apr 1 89(7) 2674-8", "id": "1557373" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "b-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "Stat5 proteins activated by IL-2. The phosphorylated tyrosines on IL-2Rbeta can then serve as docking sites for signaling molecules that otherwise cannot associate with IL-2Rbeta, including the adaptor protein Shc, Stat5a, and Stat5b (Figure 2). For example, only phosphorylated (but not non-phosphorylated) peptides spanning either Tyr-392 or Tyr-510 of IL-2Rbeta can efficiently compete with IL-2-induced Stat5 DNA binding to a GAS motif ", "citation": { "type": "PubMed", "name": "Oncogene 2000 May 15 19(21) 2566-76", "id": "10851055" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebac" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "Interleukin 15 (IL-15) is a 14-15 kDa polypeptide that belongs to the 4 alpha-helix-bundle family of cytokines and was originally discovered due to its T cell proliferative activity. It utilizes the signal-transducing beta/gamma polypeptides of the IL-2 receptor complex, thus sharing many biological activities with IL-2,", "citation": { "type": "PubMed", "name": "Arch Immunol Ther Exp (Warsz) 2000 48(6) 457-64", "id": "11197599" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb23" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": " TM-beta1 (anti-CD122) antibody blocks the interaction of trans-presented IL-15 by IL-15Ralpha with the CD122/CD132 signaling receptor complex on responsive NK, and CD8+T cell subsets. As shown in Fig. S2, the TM-beta1 antibody was very effective in inhibiting IL-15-induced proliferation of murine splenocytes", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2009 Sep 15 106(37) 15849-54", "id": "19805228" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec35" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "Lyn kinase phosphorylates both IL-5Ralpha and beta in vitro.", "citation": { "type": "PubMed", "name": "J Immunol 2002 Feb 15 168(4) 1978-83", "id": "11823534" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb6b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "In hematopoietic cell line BAF-B03 F7 cells, gene transfer mediated expression of the IL-2R beta c chain is sufficient to confer proliferation and cell survival responses to IL-2. In these IL-2R beta c-expressing cells, BAG-1 mRNA was dramatically induced by IL-2. The IL-2-mediated induction of BAG-1 expression required the activation of tyrosine kinase(s) and was sensitive to rapamycin as the induction of bcl-2 expression was.", "citation": { "type": "PubMed", "name": "Blood 1996 Dec 1 88(11) 4118-23", "id": "8943845" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf6" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL2RB) actsIn cat(p(HGNC:IL2RB))", "summary_text": "These proteins have been implicated in immune regulation, apoptosis, activation-induced cell death, and control of autoimmunity.", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Mar 19 279(12) 11553-61", "id": "14701862" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae7" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:PLCG1,pmod(P,Y))", "relation": "directlyIncreases", "target": "cat(p(HGNC:PLCG1))", "directed": false, "label": "p(HGNC:PLCG1,pmod(P,Y)) directlyIncreases cat(p(HGNC:PLCG1))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405148a", "evidences": [ { "bel_statement": "p(HGNC:PLCG1,pmod(P,Y)) directlyIncreases cat(p(HGNC:PLCG1))", "summary_text": "CD38 dimerization induced tyrosine phosphorylation of the protein kinase syk and increased syk kinase activity. CD38 dimerization also induced tyrosine phosphorylation of phospholipase C-gamma and of the p85 subunit of phosphatidylinositol 3-kinase (PI 3-K)", "citation": { "type": "PubMed", "name": "J Immunol 1996 Jan 1 156(1) 100-7", "id": "8598449" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec5d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1,pmod(P,Y)) directlyIncreases cat(p(HGNC:PLCG1))", "summary_text": "SLP-76 is an adapter protein required for T-cell receptor (TCR) signaling. In particular, TCR-induced tyrosine phosphorylation and activation of phospholipase C-gamma1 (PLC-gamma1), and the resultant TCR-inducible gene expression, depend on SLP-76.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Jul 21(13) 4208-18", "id": "11390650" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebdf" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1,pmod(P,Y)) directlyIncreases cat(p(HGNC:PLCG1))", "summary_text": "dual phosphorylation by ZAP70 and Itk triggers the activation of PLCg1", "citation": { "type": "PubMed", "name": "Mol Immunol 2002 Jun 38(15) 1087-99", "id": "12044776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb58" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:PLCG1,pmod(P,Y)) directlyIncreases cat(p(HGNC:PLCG1))", "summary_text": "Based on these results, we propose that CD148 negatively regulates TCR signaling by interfering with the phosphorylation and function of PLCgamma1 and LAT.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Apr 21(7) 2393-403", "id": "11259588" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb14" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IL15RA)", "relation": "actsIn", "target": "cat(p(HGNC:IL15RA))", "directed": false, "label": "p(HGNC:IL15RA) actsIn cat(p(HGNC:IL15RA))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051484", "evidences": [ { "bel_statement": "p(HGNC:IL15RA) actsIn cat(p(HGNC:IL15RA))", "summary_text": "The re-establishment of the interaction of IL-15 with the IL-15Ralpha by incubating IL-15(-/-) DC with IL-15 completely restored the capacity to prime T cells for DTH induction in vivo. Moreover, IL-15 also enhanced secretion of pro-inflammatory cytokines by DC and triggered in vitro CD8(+) T cell proliferation and IL-2 release. Taken together, the data suggest that an autocrine IL-15/IL-15Ralpha signaling loop in DC is essential for inducing CD8(+)-dependent Th1 immune responses in mice", "citation": { "type": "PubMed", "name": "Eur J Immunol 2003 Dec 33(12) 3493-503", "id": "14635060" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec64" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:IL15RA) actsIn cat(p(HGNC:IL15RA))", "summary_text": " Our current study provided 2 pieces of evidence that IL-15RA presented IL-15 in trans to memory CD8 T cells in vivo. First, we demonstrated that IL-15RA expression by opposing BM-derived cells was required for long-term basal proliferation of memory CD8 T cells. Second, proliferation of memory CD8 T cells in response to soluble IL-15 required IL-15RA expression by the host cells and IL-15RB expression by the responding CD8 T cells. ", "citation": { "type": "PubMed", "name": "Blood 2004 Feb 1 103(3) 988-94", "id": "14512307" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebdb" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:IL15RA) actsIn cat(p(HGNC:IL15RA))", "summary_text": "In addition, IL-15 was found to induce tyrosine phosphorylation of Syk that was largely inhibited by pretreating cells with piceatannol. Moreover, we found that Syk kinase is physically associated with IL-15Ralpha", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2004 Jul 76(1) 162-8", "id": "15123770" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec61" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL15RA) actsIn cat(p(HGNC:IL15RA))", "summary_text": "Lyn kinase phosphorylates both IL-5Ralpha and beta in vitro.", "citation": { "type": "PubMed", "name": "J Immunol 2002 Feb 15 168(4) 1978-83", "id": "11823534" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec5f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IL15RA) actsIn cat(p(HGNC:IL15RA))", "summary_text": "IL-15 is a common gamma chain cytokine sharing overlapping signaling and biological properties with IL-2 as a result of their mutual usage of the IL-2/15b and common gamma chain (gc) receptor subunits (7,8).", "citation": { "type": "PubMed", "name": "J Immunol 2011 Jan 1 186(1) 174-82", "id": "21098221" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "CD8+ T-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:CXCL9)", "relation": "directlyIncreases", "target": "cat(p(HGNC:CXCR3))", "directed": false, "label": "p(HGNC:CXCL9) directlyIncreases cat(p(HGNC:CXCR3))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051477", "evidences": [ { "bel_statement": "p(HGNC:CXCL9) directlyIncreases cat(p(HGNC:CXCR3))", "summary_text": "By using a CXCR3 ligand reporter mouse, we found that stromal cells predominately expressed the chemokine ligand CXCL9 whereas hematopoietic cells expressed CXCL10 in lymph nodes (LNs). Dendritic cell (DC)-derived CXCL10 facilitated T cell-DC interactions in LNs during T cell priming while both chemokines guided intranodal positioning of CD4(+) T cells to interfollicular and medullary zones. Thus, different chemokines acting on the same receptor can function locally to facilitate DC-T cell interactions and globally to influence intranodal positioning, and both functions contribute to Th1 cell differentiation.", "citation": { "type": "PubMed", "name": "Immunity 2012 Dec 14 37(6) 1091-103", "id": "23123063" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec66" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCL9) directlyIncreases cat(p(HGNC:CXCR3))", "summary_text": " T cells in peripheral airways of COPD patients show increased expression of CXCR3, a receptor activated by interferon-{gamma} inducible protein of 10 kDa (IP-10; CXCL10), monokine induced by interferon-{gamma} (Mig; CXCL9), and interferon-inducible T cell-{alpha} chemoattractant (I-TAC; CXCL11). All three cytokines activate CXCR3, although CXCL11 has the highest affinity.", "citation": { "type": "PubMed", "name": "Pharmacol Rev 2004 Dec 56(4) 515-48", "id": "15602009" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec58" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCL9) directlyIncreases cat(p(HGNC:CXCR3))", "summary_text": "Numerous studies have shown that immature human and mouse blood- and bone marrow-derived DC subsets express a panel of inflammatory chemokine receptors (CCR1-6,8,9, CXCR3,4, CX3CR1) [Table 1 and reviewed in (1-5)]. [Table 1 Chemokine receptors expressed by DC and the functional outcome of receptor ligation}]", "citation": { "type": "PubMed", "name": "Clin Lab Med 2008 Sep 28(3) 375-84, v", "id": "19028258" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "dendritic cell", "tissue": "" } } ] } }, { "source": "p(HGNC:CCR5)", "relation": "actsIn", "target": "cat(p(HGNC:CCR5))", "directed": false, "label": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051496", "evidences": [ { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "HIV envelope binds to and signals through its primary cellular receptor, CD4, and through a coreceptor, either CC chemokine receptor 5 (CCR5) or CXC chemokine receptor 4 (CXCR4). Here, we evaluate the response of peripheral blood mononuclear cells to a panel of genetically diverse R5 and X4 envelope proteins. Modulation of gene expression was evaluated by using oligonucleotide microarrays. Activation of transcription factors was evaluated by using an array of oligonucleotides encoding transcription factor binding sites. Responses were strongly influenced by coreceptor specificity. Treatment of cells from CCR5delta32 homozygous donors with glycoprotein (gp)120 derived from an R5 virus demonstrated that the majority of responses elicited by R5 envelopes required engagement of CCR5. R5 envelopes, to a greater extent than X4 envelopes, induced the expression of genes belonging to mitogen-activated protein kinase signal transduction pathways and genes regulating the cell cycle. A number of genes induced by R5, but not X4, envelopes were also up-regulated in the resting CD4+ T cell population of HIV-infected individuals. These results suggest that R5 envelope facilitates replication of HIV in the pool of resting CD4+ T cells. Additionally, signaling by R5 gp120 may facilitate the transmission of R5 viruses by inducing a permissive environment for HIV replication.", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2006 Mar 7 103(10) 3746-51", "id": "16505369" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec65" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "peripheral blood mononuclear cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "RANTES treatment of PM1 T cells results in the rapid phosphorylation-activation of CCR5, Jak2, and Jak3. RANTES-inducible Jak phosphorylation is insensitive to pertussis toxin inhibition, indicating that RANTES-CCR5-mediated tyrosine phosphorylation events are not coupled directly to Galpha(i) protein-mediated events. In addition to Jaks, several other proteins are rapidly phosphorylated on tyrosine residues in a RANTES-dependent manner, including the Src kinase p56(lck), which associates with Jak3", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Apr 6 276(14) 11427-31", "id": "11278738" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec38" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "In vivo, different chemokines orchestrate the recruitment of DCs into the lung depending on the inflammatory stimulus present. In a rat model of inhaled heat-killed Moraxella catarrhalis, the phenomenon seemed dependent on the expression of CCR1 and CCR5, which are receptors for the chemokines CCL5 (regulated on activation, normal T-cell expressed and secreted [RANTES]) and/or CCL3 (macrophage inflammatory protein [MIP]-1{alpha}) produced at the airway level ", "citation": { "type": "PubMed", "name": "Am J Respir Crit Care Med 2005 Sep 1 172(5) 530-51", "id": "15879415" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb20" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Anti-CCR1, anti-CCR5, or BX471 also inhibited the upregulation of beta1 integrin mRNA in myeloma cells induced by MIP-1alpha, as well as the adherence of myeloma cells to stromal cells and IL-6 production by stromal cells in response to myeloma cells. ", "citation": { "type": "PubMed", "name": "Exp Hematol 2005 Mar 33(3) 272-8", "id": "15730850" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec67" }, "experiment_context": { "species_common_name": "Human", "disease": "Multiple Myeloma", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "CCL3- and CCL4-triggered migration of GM-CSF-primed neutrophils was inhibited by the CCR5 antagonist TAK-779. Accordingly, freshly isolated neutrophils express CCR5. Extracellular signal-regulated kinases (ERK)-1/2 and p38 mitogen-activated protein kinase (MAPK) inhibitors blocked CCL3-induced migration of GM-CSF-primed neutrophils.", "citation": { "type": "PubMed", "name": "Cell Signal 2005 Mar 17(3) 355-63", "id": "15567066" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec51" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Neutrophils", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Numerous studies have shown that immature human and mouse blood- and bone marrow-derived DC subsets express a panel of inflammatory chemokine receptors (CCR1-6,8,9, CXCR3,4, CX3CR1) [Table 1 and reviewed in (1-5)]. [Table 1 Chemokine receptors expressed by DC and the functional outcome of receptor ligation}]", "citation": { "type": "PubMed", "name": "Clin Lab Med 2008 Sep 28(3) 375-84, v", "id": "19028258" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec50" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Syk was also activated upon MIP1beta stimulation of CCR5 L1.2 transfectants or T-cells and associated with RAFTK. Overexpression of a dominant-negative Src-binding mutant of RAFTK (RAFTK(m402)) significantly attenuated Syk activation, whereas overexpression of wild-type RAFTK enhanced Syk activity, indicating that RAFTK acts upstream of CCR5-mediated Syk activation.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Jun 9 275(23) 17263-8", "id": "10747947" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec48" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "The CCR5 chemokine receptor is a member of the G protein-coupled receptor (GPCR) family that is expressed by macrophages, memory T-lymphocytes and dendritic cells and is activated by chemotactic proteins (e.g. MIP-1alpha [CCL3], MIP-1beta [CCL4] and RANTES [CCL5])", "citation": { "type": "PubMed", "name": "Br J Pharmacol 2008 Apr 153(7) 1513-27", "id": "18223665" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb49" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Expression of beta-arrestin2 also augmented chemokine receptor CCR5-mediated but not epidermal growth factor receptor-mediated chemotaxis, indicating the specific effect of beta-arrestin2", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Dec 20 277(51) 49212-9", "id": "12370187" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec43" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "CCR1, CCR2 and CCR5 mediate recruitment of both infiltrating macrophages", "citation": { "type": "PubMed", "name": "J Neuroinflammation 2007 4 14", "id": "17484785" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec1d" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Eotaxin-3/CCL26 is an agonist for chemokine receptor 3 (CCR3) and a natural antagonist for CCR1, CCR2 and CCR5. ", "citation": { "type": "PubMed", "name": "Immunology 2010 May 130(1) 74-82", "id": "20059579" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec14" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Monocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "In addition, CXCL12 selectively activates STAT 5 whereas CCL5 activates STAT 1.", "citation": { "type": "PubMed", "name": "Stem Cells 2005 Oct 27", "id": "16253981" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec09" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Table 1. Lymphoid chemokine receptors", "citation": { "type": "PubMed", "name": "Trends Immunol 2004 Feb 25(2) 67-74", "id": "15102365" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc1" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Th1 cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "In addition, CXCL12 selectively activates STAT 5 whereas CCL5 activates STAT 1.", "citation": { "type": "PubMed", "name": "Stem Cells 2005 Oct 27", "id": "16253981" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb6c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "mesenchyme" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Additional studies demonstrate that isolated CCR5 activation by R5 Env leads to both de novo expression of FasL and induction of susceptibility to Fas-mediated apoptosis in resting primary CD4 T cells", "citation": { "type": "PubMed", "name": "AIDS 2002 Jul 26 16(11) 1467-78", "id": "12131184" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb4c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "these in vivo data demonstrate that Ccr1, Ccr2, and Ccr5 mediate the postischemic recruitment of neutrophils through effects on intravascular adherence and subsequent transmigration.", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2006 Jan 79(1) 114-22", "id": "16275892" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb43" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Taken together, these results showed that MIP- 1b released from ET-1–treated PBMs was biologically functional in mediating chemotaxis of THP-1 cells via the CCR5 receptor.", "citation": { "type": "PubMed", "name": "J Immunol 2010 Oct 15", "id": "20952681" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb26" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Monocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Chemokine receptors signal through Galphai2 proteins to activate DOCK2 (dedicator of cytokinesis 2) and other guanine nucleotide exchange factors (GEFs), leading to the activation of RAC1 and RHOA. ", "citation": { "type": "PubMed", "name": "Nat Rev Immunol 2009 Sep 9(9) 630-44", "id": "19696767" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb1a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "b-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Previous studies have shown that CCL16 is a low-affinity ligand for CCR1, CCR2, CCR5, and CCR8 and attracts monocytes and T cells. ", "citation": { "type": "PubMed", "name": "J Immunol 2004 Aug 1 173(3) 2078-83", "id": "15265943" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb09" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "This study establishes CCR5 as a critical receptor guiding NK cell trafficking in host defense", "citation": { "type": "PubMed", "name": "PLoS Pathog 2006 Jun 2(6) e49", "id": "16789839" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CCR5) actsIn cat(p(HGNC:CCR5))", "summary_text": "Although the mechanisms of DC precursor recruitment to the lung mucosa are incompletely defined, CCR1 and CCR5 have been implicated under both homeostatic and pathogen-induced conditions", "citation": { "type": "PubMed", "name": "Cell Res 2010 Aug 20(8) 872-85", "id": "20603644" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaed" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "dendritic cell", "tissue": "" } } ] } }, { "source": "p(HGNC:ZAP70)", "relation": "actsIn", "target": "kin(p(HGNC:ZAP70))", "directed": false, "label": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405149a", "evidences": [ { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "[From Introduction] Here we report that LMPTP, like CD45, plays a positive role in TCR signaling by preferentially dephosphorylating a negative regulatory site, namely Tyr-292 of ZAP-70. This leads to a severalfold increase in the tyrosine phosphorylation of the kinase at its positive regulatory sites and enhanced kinase activity. [From abstract] Expression of low levels of LMPTP resulted in increased ZAP-70 phosphorylation, presumably at the activating Tyr-493 and other sites, increased kinase activity, and augmented downstream signaling to the mitogen-activated protein kinase pathway.", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Jul 5 277(27) 24220-4", "id": "11976341" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebea" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Simultaneous overexpression of selenophosphate synthetase and phospholipid-hydroperoxide GSH peroxidase (PHGPx) [250] blocks activation of NF-kB by IL-1. Overexpression of SOD [84] or GSH peroxidase [81, 211] abolished NF-kB activation by preventing degradation of IkB after stimulation with TNF-a. The precise mechanism(s) through which oxidants and reductants influence activation of NF-kB is presently unknown; however, there is evidence that antioxidant enzyme (AOE372), a redox-sensitive thioredoxin peroxidase, regulates IkB phosphorylation [246]. Phosphatases The phosphatases are an important component of most signal transduction pathways, because failure to reverse kinase actions can disrupt normal cellular functions. For example, transfection of human fibroblasts with constitutively active ras (hRasV12) inhibits cell growth and ultimately results in a senescentlike phenotype [441]. Similarly, constitutive ERK activation has an inhibitory effect on cell cycle progression [442,443]. Both the serine/threonine phosphatases and the PTPs are known to be redox-sensitive [82,144,153,156,271,281, 444-449]. The mechanism of redox effects on activity is probably best understood for the PTPs. Without exception, the PTPs contain a highly conserved region of 11 amino acid residues in their catalytic domain; specifi- cally, (Ile/Val)-His-Cys-X-Ala-Gly-X-X-Arg-(Ser/Thr)- Gly, where X is a nonconserved amino acid [17]. Either oxidation or mutation of the cysteine renders these molecules inactive [17,281]. H2O2 is a potent inhibitor of PTPs. As in the case of other oxidants, H2O2 probably oxidizes the thiolate anion at the catalytic site [280]. Because formation of a phosphorylcysteine intermediate seems to be critical to PTP activity [450-452], blocking it through oxidation of the cysteine inactivates the molecules. In many cases, treatment of cells with H2O2 stimulates increases in protein phosphorylation by inhibiting phosphatase-catalyzed removal of phosphate groups. Furthermore, mitogens that increase cellular ox- idant production may stimulate phosphorylation indirectly by decreasing phosphatase activity. Additional mechanisms are involved in stimulation of pathways activated by growth factors that increase oxidant production, however, because there are known instances in which the oxidants they produce have no effect on protein phosphorylation. For example, TGF-b1 stimulates phosphorylation of numerous proteins and has been shown to cause a large increase in H2O2 production; however, its effects on protein phosphorylation are not blocked by catalase [453]. Furthermore, H2O2 is effective in promoting phosphorylation of phospholipase D, the PDGF receptor, and PKC-a even after pretreatment of Swiss 3T3 fibroblasts with orthovanadate to inhibit phosphatases [454]. Thus, although diminished phosphatase activity may partially account for increased phosphorylation in some cases, it cannot totally account for oxidation effects on phosphorylation in every case. SPECIFICITY In general, there is good agreement between studies on redox effects on any given gene; albeit, not all oxidizing or reducing treatments exert equivalent effects. This is clearly demonstrated in studies of pag , which encodes a protein associated with cellular proliferation. Pag protein inhibits the tyrosine kinase activity of the Abelson (abl ) protein by binding to its SH3-binding domain [455]. BSO, menadione, sodium arsenate, and diethyl maleate all stimulate pag expression, but H2O2 does not [269]. Conversely, H2O2 stimulates c-fos expression (Table 1), although 4-hydroynonenal (a product of v-6-polyunsaturated fatty acid peroxidation) not only fails to induce c-fos expression but is actually inhibitory to c-fos induction by EGF and PDGF [185]. Similarly, some oxidants such as diamide decrease hypoxia-induced signals [201], although others such as H2O2 increase them [124]. As might be expected, the effects of any stimu...", "citation": { "type": "PubMed", "name": "Free Radic Biol Med 2000 Feb 1 28(3) 463-99", "id": "10699758" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb8d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Simultaneous overexpression of selenophosphate synthetase and phospholipid-hydroperoxide GSH peroxidase (PHGPx) [250] blocks activation of NF-kB by IL-1. Overexpression of SOD [84] or GSH peroxidase [81, 211] abolished NF-kB activation by preventing degradation of IkB after stimulation with TNF-a. The precise mechanism(s) through which oxidants and reductants influence activation of NF-kB is presently unknown; however, there is evidence that antioxidant enzyme (AOE372), a redox-sensitive thioredoxin peroxidase, regulates IkB phosphorylation [246]. Phosphatases The phosphatases are an important component of most signal transduction pathways, because failure to reverse kinase actions can disrupt normal cellular functions. For example, transfection of human fibroblasts with constitutively active ras (hRasV12) inhibits cell growth and ultimately results in a senescentlike phenotype [441]. Similarly, constitutive ERK activation has an inhibitory effect on cell cycle progression [442,443]. Both the serine/threonine phosphatases and the PTPs are known to be redox-sensitive [82,144,153,156,271,281, 444-449]. The mechanism of redox effects on activity is probably best understood for the PTPs. Without exception, the PTPs contain a highly conserved region of 11 amino acid residues in their catalytic domain; specifi- cally, (Ile/Val)-His-Cys-X-Ala-Gly-X-X-Arg-(Ser/Thr)- Gly, where X is a nonconserved amino acid [17]. Either oxidation or mutation of the cysteine renders these molecules inactive [17,281]. H2O2 is a potent inhibitor of PTPs. As in the case of other oxidants, H2O2 probably oxidizes the thiolate anion at the catalytic site [280]. Because formation of a phosphorylcysteine intermediate seems to be critical to PTP activity [450-452], blocking it through oxidation of the cysteine inactivates the molecules. In many cases, treatment of cells with H2O2 stimulates increases in protein phosphorylation by inhibiting phosphatase-catalyzed removal of phosphate groups. Furthermore, mitogens that increase cellular ox- idant production may stimulate phosphorylation indirectly by decreasing phosphatase activity. Additional mechanisms are involved in stimulation of pathways activated by growth factors that increase oxidant production, however, because there are known instances in which the oxidants they produce have no effect on protein phosphorylation. For example, TGF-b1 stimulates phosphorylation of numerous proteins and has been shown to cause a large increase in H2O2 production; however, its effects on protein phosphorylation are not blocked by catalase [453]. Furthermore, H2O2 is effective in promoting phosphorylation of phospholipase D, the PDGF receptor, and PKC-a even after pretreatment of Swiss 3T3 fibroblasts with orthovanadate to inhibit phosphatases [454]. Thus, although diminished phosphatase activity may partially account for increased phosphorylation in some cases, it cannot totally account for oxidation effects on phosphorylation in every case. SPECIFICITY In general, there is good agreement between studies on redox effects on any given gene; albeit, not all oxidizing or reducing treatments exert equivalent effects. This is clearly demonstrated in studies of pag , which encodes a protein associated with cellular proliferation. Pag protein inhibits the tyrosine kinase activity of the Abelson (abl ) protein by binding to its SH3-binding domain [455]. BSO, menadione, sodium arsenate, and diethyl maleate all stimulate pag expression, but H2O2 does not [269]. Conversely, H2O2 stimulates c-fos expression (Table 1), although 4-hydroynonenal (a product of v-6-polyunsaturated fatty acid peroxidation) not only fails to induce c-fos expression but is actually inhibitory to c-fos induction by EGF and PDGF [185]. Similarly, some oxidants such as diamide decrease hypoxia-induced signals [201], although others such as H2O2 increase them [124]. As might be expected, the effects of any stimu...", "citation": { "type": "PubMed", "name": "Free Radic Biol Med 2000 Feb 1 28(3) 463-99", "id": "10699758" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "The proto-oncogene product Cbl has emerged as a negative regulator of a number of protein-tyrosine kinases, including the ZAP-70/Syk tyrosine kinases that are critical for signaling in hematopoietic cells.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Jan 7 275(1) 414-22", "id": "10617633" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf5" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "We show that Crry increases early TCR-dependent activation signals, including p56lck-, zeta-associated protein-70 (ZAP-70), Vav-1, Akt, and extracellular signal-regulated kinase (ERK) phosphorylation but also costimulation-dependent mitogen-activated protein kinases (MAPK), such as the stress-activated c-Jun N-terminal kinase (JNK). It is intriguing that Crry costimulus enhanced p38 MAPK activation in T helper cell type 1 (Th1) but not in Th2 cells. ", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2005 Dec 78(6) 1386-96", "id": "16301324" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd8" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Engagement of the T-cell receptor (TCR) triggers a series of signaling events that lead to the activation of T cells. HIP-55 (SH3P7 or mAbp1), an actin-binding adaptor protein, interacts with and is tyrosine phosphorylated by ZAP-70, which is a crucial proximal protein tyrosine kinase for TCR signaling.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2005 Aug 25(16) 6869-78", "id": "16055701" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebbc" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "SHP-1 was found to bind to the protein tyrosine kinase ZAP-70. This interaction resulted in an increase in SHP-1 phosphatase activity and a decrease in ZAP-70 kinase activity. ", "citation": { "type": "PubMed", "name": "Science 1996 May 24 272(5265) 1173-6", "id": "8638162" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebab" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "We show that Crry increases early TCR-dependent activation signals, including p56lck-, zeta-associated protein-70 (ZAP-70), Vav-1, Akt, and extracellular signal-regulated kinase (ERK) phosphorylation but also costimulation-dependent mitogen-activated protein kinases (MAPK), such as the stress-activated c-Jun N-terminal kinase (JNK). It is intriguing that Crry costimulus enhanced p38 MAPK activation in T helper cell type 1 (Th1) but not in Th2 cells. ", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2005 Dec 78(6) 1386-96", "id": "16301324" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba7" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Here we show that a spontaneous activating point mutation of the gene encoding an SH2 domain of ZAP-70 (tryptophan-to-cysteine substitution at residue 163 (W163C) ), a key signal transduction molecule in T cells, causes chronic autoimmune arthritis in mice that resembles human rheumatoid arthritis in many aspects.", "citation": { "type": "PubMed", "name": "Nature 2003 Nov 27 426(6965) 454-60", "id": "14647385" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb63" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Linker for activation of T cells (LAT) is an adaptor protein whose tyrosine phosphorylation is critical for transduction of the T cell receptor (TCR) signal. LAT phosphorylation is accomplished by the protein tyrosine kinase ZAP-70, but it is not at all clear how LAT (which is not associated with the TCR) encounters ZAP-70 (which is bound to the TCR).", "citation": { "type": "PubMed", "name": "J Exp Med 1999 Nov 15 190(10) 1517-26", "id": "10562325" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb34" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "8892604;9047237;15388330;12817019", "citation": { "type": "PubMed", "name": "BMC Bioinformatics 2004 Jun 22 5 79", "id": "15212693" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec6b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "We report here that VHR, a Vaccinia virus VH1-related dual-specific protein phosphatase that inactivates the mitogen-activated kinases Erk2 and Jnk, is phosphorylated at Y138 by ZAP-70.", "citation": { "type": "PubMed", "name": "Nat Immunol 2003 Jan 4(1) 44-8", "id": "12447358" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec60" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Nat Immunol 2005 Apr 6(4) 390-5", "id": "15735648" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec49" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "ZAP-70 phosphorylated HIP-55 at Tyr-334 and Tyr-344 in vitro and in vivo,", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Dec 26 278(52) 52195-202", "id": "14557276" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Zap-70 efficiently phosphorylates LAT on tyrosine residues at positions 226, 191, 171, 132 and 127", "citation": { "type": "PubMed", "name": "Biochem J 2001 Jun 1 356(Pt 2) 461-71", "id": "11368773" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebcc" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "natural killer cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Nov 30 276(48) 45175-83", "id": "11572860" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb1" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Immunol 1999 Jul 15 163(2) 844-53", "id": "10395678" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb84" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "10811803;11368773", "citation": { "type": "PubMed", "name": "BMC Bioinformatics 2004 Jun 22 5 79", "id": "15212693" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb3b" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "In this report, we show that during TCR signaling, the tyrosines Y239, Y240 and Y317 of Shc are the primary sites of tyrosine phosphorylation.", "citation": { "type": "PubMed", "name": "Eur J Immunol 1998 Aug 28(8) 2265-75", "id": "9710204" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb1f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 1997 Jun 6 272(23) 14562-70", "id": "9169414" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb06" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "In heterogeneous COS-1 cells, Cbl-b was phosphorylated on tyrosine residues by both Syk- (Syk/Zap-70) and Src- (Fyn/Lck) family kinases, with Syk kinase inducing the most prominent effect.", "citation": { "type": "PubMed", "name": "Oncogene 1999 Feb 4 18(5) 1147-56", "id": "10022120" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaec" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Immunol 1996 Nov 1 157(9) 3769-73", "id": "8892604" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaeb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Nat Immunol 2003 Jan 4(1) 44-8", "id": "12447358" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 1994 Nov 25 269(47) 29520-9", "id": "7961936" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:ZAP70) actsIn kin(p(HGNC:ZAP70))", "summary_text": "dual phosphorylation by ZAP70 and Itk triggers the activation of PLCg1", "citation": { "type": "PubMed", "name": "Mol Immunol 2002 Jun 38(15) 1087-99", "id": "12044776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eadd" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IDO1)", "relation": "increases", "target": "bp(GOBP:\"T cell activation\")", "directed": false, "label": "p(HGNC:IDO1) increases bp(GOBP:\"T cell activation\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405149c", "evidences": [ { "bel_statement": "p(HGNC:IDO1) increases bp(GOBP:\"T cell activation\")", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec72" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:CD28)", "relation": "actsIn", "target": "cat(p(HGNC:CD28))", "directed": false, "label": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051475", "evidences": [ { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "To analyze whether SARA and Hgs mRNA expression could be regulated by TCR-mediated signals, expression of resting or 1, 3, and 5 days with allergen or anti-CD3/CD28 mAbs-activated CD4+ T cells was analyzed. SARA and Hgs mRNA were detected in resting T cells, but their expression was reduced 1 day after the CD3/CD28 activation. The reduction was even more pronounced on days 3 and 5 of stimulation (Fig. 3 A, B). As reported (26) , we found a down-regulation of TGF-bRII transcripts after CD3/CD28 activation. TGF-bRI expression was unchanged under these conditions, whereas IL-10 expression used as a positive control was enhanced.", "citation": { "type": "PubMed", "name": "FASEB J 2003 Feb 17(2) 194-202", "id": "12554698" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec24" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "CD28 does not stimulate all signaling effectors that are activated by the TCR, but primarily provides a potent synergistic signal for transcription factors such as nuclear factor-?B (NF-?B), nuclear factor of activated T cells (NFAT), and activator protein-1 (AP1) [24-26]. ", "citation": { "type": "PubMed", "name": "Arthritis Res Ther 2008 10 Suppl 1 S3", "id": "19007423" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec73" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "naive T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "Entrez Gene:Human: The B-lymphocyte activation antigen B7-1 (formerly referred to as B7) provides regulatory signals for T lymphocytes as a consequence of binding to the CD28 (MIM 186760) and CTLA4 (MIM 123890) ligands of T cells.[supplied by OMIM]", "citation": { "type": "PubMed", "name": "Chem Biol 2004 Dec 11(12) 1651-8", "id": "15610849" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec12" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "Kinetic studies reveal that an early phase (1 to 5 min) of IKK activation following TCR/CD28 cross-linking is PKCalpha dependent and that a later phase (5 to 25 min) of IKK activation is PKCtheta dependent", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2003 Oct 23(19) 7068-81", "id": "12972622" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb1c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "Jurkat transfectants overexpressing Chat-H show a marked increase in interleukin-2 production after costimulation of T cell receptor and CD28. The degree of JNK activation is enhanced substantially in the Chat-H transfectants upon costimulation. WE found that Chat-H forms a complex with Pyk2H and enhances its tyrosine 402 phosphorylation, an up-regulator of the JNK pathway. The Src homology-2 domain mutant of Chat-H loses this signal modulating activity.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Feb 21 278(8) 6012-7", "id": "12486027" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb07" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Immunol 2001 Jan 1 166(1) 197-206", "id": "11123293" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec6a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "EMBO J 2003 Sep 15 22(18) 4689-98", "id": "12970181" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec15" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "CTLA-4 delivers signals that inhibit selection, indicating that CTLA-4 and CD28 have opposing functions in thymic development.", "citation": { "type": "PubMed", "name": "J Immunol 2003 Jun 1 170(11) 5421-8", "id": "12759417" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb88" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Sep 12 278(37) 35812-8", "id": "12842899" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eafe" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CD28) actsIn cat(p(HGNC:CD28))", "summary_text": "CTLA-4-deficient mice develop a lethal autoimmune lymphoproliferative disorder that is strictly dependent on in vivo CD28 costimulation", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2007 Aug 21 104(34) 13756-61", "id": "17702861" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead5" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:FYN)", "relation": "actsIn", "target": "kin(p(HGNC:FYN))", "directed": false, "label": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405147c", "evidences": [ { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "TNF-alpha activated multiple PTKs, including src family PTKs.To identify which src family kinase(s) was required for TNF-alpha-induced vascular permeability, small interfering RNA (siRNA) targeting each of the three src family PTKs expressed in human EC, c-src, fyn, and yes, were introduced into the barrier function assay. Only fyn siRNA protected against the TNF-alpha effect, whereas the c-src and yes siRNAs did not. ", "citation": { "type": "PubMed", "name": "Am J Physiol Lung Cell Mol Physiol 2006 Dec 291(6) L1232-45", "id": "16891393" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec68" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "Endothelium, Vascular" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Using Fyn as bait, p190 RhoGAP was isolated in the screen of an oligodendrocyte cDNA library. Coimmunoprecipitation and in vitro binding assays verified that p190 RhoGAP bound to the Fyn SH2 domain. These findings define a pathway in which Fyn activity regulates the phosphorylation of p190, leading to an increase in RhoGAP activity with a subsequent increase in RhoGDP, which in turn, regulates the morphological changes that accompany oligodendrocyte differentiation. ", "citation": { "type": "PubMed", "name": "J Neurobiol 2001 Oct 49(1) 62-78", "id": "11536198" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec3a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "oligodendrocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In this study, we provide evidence that the VEGF-dependent tyrosine phosphorylation of caveolin-1 induces interaction of the protein with the membrane-type 1 matrix metalloproteinase (MT1-MMP). This interaction requires the phosphorylation of caveolin-1 on tyrosine 14 by members of the Src family of protein kinases, such as Src and Fyn, because it is completely abolished by expression of a catalytically inactive Src mutant or by site-directed mutagenesis of tyrosine 14 of caveolin-1. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Dec 10 279(50) 52132-40", "id": "15466865" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec18" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Following the anti-PrPc antibody-mediated stimulation of live GN11 cells, we observed that PrPc clustered on plasma membrane domains rich in Cav-1 in which Fyn kinase converged to be activated. After these events, a signaling cascade through p42/44 MAP kinase (Erk 1/2) was triggered, suggesting that following translocations from rafts to caveolae or caveolae-like domains PrPc could interact with Cav-1 and induce signal transduction events.", "citation": { "type": "PubMed", "name": "J Biomed Biotechnol 2006 2006(5) 69469", "id": "17489019" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebff" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "neuron", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Caveolin-1 functions as a membrane adaptor to link the integrin alpha subunit to the tyrosine kinase Fyn. Upon integrin ligation, Fyn is activated and binds, via its SH3 domain, to Shc. Shc is subsequently phosphorylated at tyrosine 317 and recruits Grb2. This sequence of events is necessary to couple integrins to the Ras-ERK pathway and promote cell cycle progression. These findings reveal an unexpected function of caveolin-1 and Fyn in integrin signaling and anchorage-dependent cell growth.", "citation": { "type": "PubMed", "name": "Cell 1998 Sep 4 94(5) 625-34", "id": "9741627" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb96" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Here we have examined the role of p62(dok) in CD2-dependent signaling in Jurkat T cells. As previously reported, we find that ligation of the CD2 molecule by mitogenic pairs of anti-CD2 mAbs led to phosphorylation of p62(dok). While CD2-induced p62(dok) tyrosine phosphorylation was independent of both the p36/38 membrane adapter protein linker of activated T cells (LAT) and the ZAP70/Syk family of kinases, it was dependent upon the Src family of kinases including Lck and Fyn. We find further that CD2 engagement induced the association of tyrosine-phosphorylated p62(dok) to Crk-L. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Dec 7 276(49) 45654-61", "id": "11553620" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb94" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Significant tyrosine phosphorylation of cortactin, stable complex formation between activated Fyn and cortactin, and co-localization of cortactin with Fyn at cell membranes were all observed only in cells with high metastatic potential. Both integrin-mediated Fyn activation and hyperphosphorylation of cortactin were observed 2-5 h after stimulation in highly metastatic cells, and they required de novo protein synthesis. We demonstrate that cortactin is a specific substrate and cooperative effector of Fyn in integrin-mediated signaling processes regulating metastatic potential.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Nov 28 278(48) 48367-76", "id": "13129922" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb85" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We observed that overexpression of active Src and Fyn resulted in tyrosine phosphorylation of RSK2....We identified that RSK2 was tyrosinephosphorylated at a group of tyrosine sites (Table 1), including Tyr-529 (spectra presented in Fig. 4B), due to expression of the constitutively activated Src and Fyn....As shown in Fig. 7, wild-type RSK2 CTD domain was highly tyrosine-phosphorylated at Tyr-529 by rSrc or rFyn, whereas Tyr-529 phosphorylation was abolished in the RSK2 CTD Y529F mutant (in vitro)", "citation": { "type": "PubMed", "name": "J Biol Chem 2008 Feb 22 283(8) 4652-7", "id": "18156174" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb79" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We have studied the phosphorylation of the closely related lck, fyn, and c-src tyrosine protein kinases in leukemic murine T-cell lines that have lost the expression of CD45. The phosphorylation of the lck kinase at an inhibitory site of tyrosine phosphorylation, Tyr-505, was increased by two-, six-, and eightfold in three different cell lines. Phosphorylation of the fyn kinase at the homologous site, Tyr-531, was unaltered in one of these cell lines, but increased by 2.5-fold in the two others.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1993 Mar 13(3) 1651-6", "id": "8441403" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb5e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Basic fibroblast growth factor (FGF)-2 induced a specific signaling response within the caveolae-like domain of LAN-1 cells, characterized by the tyrosine phosphorylation of a 75-80-kDa protein. This protein present in the caveolae-like domains has properties suggesting that it is a member of the SNT family of adapter proteins. The signaling event originating in the caveolae-like domains in response to FGF-2 appeared to require the activation of at least Fyn and Lyn, two members of the Src family of tyrosine kinases.", "citation": { "type": "PubMed", "name": "J Neurochem 2000 Feb 74(2) 676-83", "id": "10646519" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb46" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "A caveolin peptide derived from this region (residues 82-101) functionally suppressed the auto-activation of purified recombinant c-Src tyrosine kinase and Fyn, a related Src family tyrosine kinase. We further analyzed the effect of caveolin on c-Src activity in vivo by transiently co-expressing full-length caveolin and c-Src tyrosine kinase in 293T cells. Co-expression with caveolin dramatically suppressed the tyrosine kinase activity of c-Src as measured via an immune complex kinase assay. Thus, it appears that caveolin structurally and functionally interacts with wild-type c-Src via caveolin residues 82-101. ", "citation": { "type": "PubMed", "name": "J Biol Chem 1996 Nov 15 271(46) 29182-90", "id": "8910575" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb2e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In the present study, we identified a Rho GTPase-activating protein (GAP), TCGAP (Tc10/Cdc42 GTPase-activating protein), as a novel Fyn substrate. TCGAP interacted with Fyn and was phosphorylated by Fyn, with Tyr-406 in the GAP domain as a major Fyn-mediated phosphorylation site. Fyn suppressed the GAP activity of wild-type TCGAP but not the Y406F mutant of TCGAP in a phosphorylation-dependent manner, suggesting that Fyn-mediated Tyr-406 phosphorylation negatively regulated the TCGAP activity.", "citation": { "type": "PubMed", "name": "J Biol Chem 2006 Aug 18 281(33) 23611-9", "id": "16777849" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb12" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Given that the three ubiquitously expressed SFK members c-Src, Fyn, and Yes have been previously identified in 3T3-L1 cells (34) , we immunoprecipitated Src, Fyn, and Yes from IGF-I-stimulated proliferating cells and measured kinase activity with an in vitro assay using enolase as a substrate (35) . IGF-I stimulated c-Src kinase activity in proliferating cells at a peak stimulation time of 1 min (Fig. 5A)Citation . The kinetics of IGF-I-stimulated c-Src and MAPK are consistent with SFK activation occurring upstream of MAPK: both activities are back to baseline by 10 min, and our previous studies demonstrated peak MAPK activation was at 5 min (15) . IGF-I stimulation of c-Src and Fyn activity was comparable, ~3-fold more than baseline, and dependent on immunoprecipitation with specific antibodies (Fig. 5B)Citation . IGF-I activation of both c-Src and Fyn is not surprising,", "citation": { "type": "PubMed", "name": "Cell Growth Differ 2001 Jul 12(7) 379-86", "id": "11457735" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb0f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "To determine if Src family kinases (SFKs) are involved, we demonstrated that CSF-1 activated Fyn and Lyn in cells expressing wild-type (WT) or DeltaKI receptors. Moreover, CSF-1-induced Akt activity in cells expressing DeltaKI is SFK dependent since Akt activation was prevented by pharmacological or genetic inhibition of SFK activity. The docking protein Gab2 may link SFK to PI3-kinase. CSF-1 induced Gab2 tyrosyl phosphorylation and association with PI3-kinase in cells expressing WT or DeltaKI receptors. ", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2000 Sep 20(18) 6779-98", "id": "10958675" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb0e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "CD146 cross-linking induces the tyrosine phosphorylation of the protein tyrosine kinase p125(FAK) as well as p125(FAK) association with paxillin, both events being inhibited by cytochalasin D. No direct association of CD146 with p125(FAK) was observed. Consistent with these data, CD146 associates with p59(fyn), a Src family kinase known to phosphorylate p125(FAK).", "citation": { "type": "PubMed", "name": "J Biol Chem 1998 Oct 9 273(41) 26852-6", "id": "9756930" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec6d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We found that UVB only stimulated ERKs (Thr-202/Tyr-204) and Akt (serine 473) phosphorylation (Fig. 8, E and G) but not p38 MAP kinase or JNKs (Fig. 8, A and C) in WT-Fyn cells. ERKs and Akt were not affected by UVB in DNM-Fyn cells.", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Jan 28 280(4) 2446-54", "id": "15537652" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec69" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "To unravel the cellular functions of magicin, we used a yeast two-hybrid system and identified Fyn tyrosine kinase as a specific binding partner for magicin. Fyn phosphorylates magicin in vitro. In addition to Fyn, Src and Lck also interact with magicin. ", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2006 Sep 29 348(3) 826-31", "id": "16899217" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec62" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The CD45 tyrosine phosphatase has been reported to activate the src family tyrosine kinases Lck and Fyn by dephosphorylating regulatory COOH-terminal tyrosine residues 505 and 528, respectively. ", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1996 Sep 16(9) 4996-5003", "id": "8756658" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec5b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Moreover, activation of the LHR in MA-10 cells results in the stimulation of the activity of Fyn and Yes, and overexpression of either of these two tyrosine kinases enhances the LHR-mediated phosphorylation of FAK-Tyr576. ", "citation": { "type": "PubMed", "name": "Mol Endocrinol 2006 Mar 20(3) 619-30", "id": "16293639" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec59" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "RESULTS: Immunoprecipitation experiments showed that Crk-associated substrate (Cas) was tyrosine-phosphorylated in response to ethanol administration. Fyn kinase was shown to be activated by ethanol administration and to phosphorylate Cas on tyrosine residue in vitro. CONCLUSIONS: Cas was tyrosine-phosphorylated in rat brain by ethanol administration, and Fyn kinase was most likely involved in the process.", "citation": { "type": "PubMed", "name": "Alcohol Clin Exp Res 2002 Aug 26(8 Suppl) 38S-43S", "id": "12198373" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec45" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The phosphorylation generated a consensus sequence for the binding of the SH2 domain of Grb2 (pYSN). Pull-down assays with SH2-Grb2 from human fetal brain homogenates, and co-immunoprecipitation of Grb2 and MAP-2 confirmed the interaction in vivo, and demonstrated that MAP-2c is tyrosine-phosphorylated in human fetal brain. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Jan 21 280(3) 1962-70", "id": "15536091" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec3d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The binding of beta-catenin to these partners is regulated by phosphorylation of at least three critical tyrosine residues. Each of these residues is targeted by one or more specific kinases: Y142 by Fyn, Fer and cMet; Y489 by Abl; and Y654 by Src and the epidermal growth factor receptor.", "citation": { "type": "PubMed", "name": "Curr Opin Cell Biol 2005 Oct 17(5) 459-65", "id": "16099633" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec32" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We show that the overactive FGFR2 S252W mutation induced decreased Src family kinase tyrosine phosphorylation and activity associated with decreased Lyn and Fyn protein expression in human osteoblasts. Thus, constitutive FGFR2 activation induces c-Cbl-dependent Lyn and Fyn proteasome degradation, resulting in reduced Lyn and Fyn kinase activity, increased ALP expression, and FGFR2 down-regulation.", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Aug 27 279(35) 36259-67", "id": "15190072" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec31" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In transfected COS cells, Dok-4 was a substrate for the cytosolic tyrosine kinases Src and Fyn as well as for Jak2. Dok-4 could also be phosphorylated by the receptor tyrosine kinase Ret ", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Apr 30 279(18) 19335-49", "id": "14963042" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec2b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Data presented here demonstrate that epidermal growth factor (EGF) receptor ligands promote the tyrosine phosphorylation of endogenous and adenovirally transduced Srcasm in keratinocytes, and that increased levels of Srcasm activate endogenous SFKs, with a preference for Fyn and Src. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Feb 18 280(7) 6036-46", "id": "15579470" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec29" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Keratinocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Recombinant PSD-93 was phosphorylated by Fyn in vitro, and Tyr-384 was identified as a major phosphorylation site", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Nov 28 278(48) 47610-21", "id": "13129934" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec28" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Increased PRK2 expression induces catenin tyrosine phosphorylation and Fyn activation Tyrosine phosphorylation of beta and gamma catenin and p120ctn was also induced by PRK2 overexpression", "citation": { "type": "PubMed", "name": "J Cell Biol 2002 Jan 7 156(1) 137-48", "id": "11777936" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec27" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Keratinocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "and dominant-negative and constitutively active Fyn mutants rescue and recapitulate the Sca-1 antisense phenotype, respectively.", "citation": { "type": "PubMed", "name": "J Cell Sci 2004 Dec 1 117(Pt 25) 6185-95", "id": "15546912" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec1c" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "These observations collectively suggest that Fyn plays critical roles in promoting accelerated MBP expression during myelinogenesis in a MBP isoform-preferential manner, and QKI may act in the same pathway downstream of Fyn for MBP mRNA homeostasis.", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Jan 7 280(1) 389-95", "id": "15528192" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec1b" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "oligodendrocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "\\\"Ly-6A is required for T cell receptor expression and protein tyrosine kinase fyn activity.\\\"", "citation": { "type": "PubMed", "name": "EMBO J 1994 May 1 13(9) 2167-76", "id": "8187770" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec1a" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PTPalpha-null thymocytes develop normally, but unstimulated PTPalpha-/- cells exhibit increased tyrosine phosphorylation of specific proteins, increased Fyn activity, and hyperphosphorylation of Cbp/PAG that promotes its association with C-terminal Src kinase. Elevated Fyn activity in the absence of PTPalpha is due to enhanced phosphorylation of Fyn tyrosines 528 and 417. ", "citation": { "type": "PubMed", "name": "J Immunol 2005 Dec 15 175(12) 7947-56", "id": "16339530" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec19" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "TCR mediated activation of protein tyrosine kinases, such as lck (LCK), fyn (FYN), and ZAP-70 occurs, resulting in the phosphorylation of a number of substrates, including the transmembrane adaptor proteins LAT and TRIM, which can both bind, and therefore recruit PI3K", "citation": { "type": "PubMed", "name": "Mol Immunol 2002 Jun 38(15) 1087-99", "id": "12044776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec0a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Depletion of Sam68 by RNA interference caused accumulation of antiapoptotic Bcl-x(L), whereas its up-regulation increased the levels of proapoptotic Bcl-x(s). Tyrosine phosphorylation of Sam68 by Fyn inverted this effect and favored the Bcl-x(L) splice site selection. ", "citation": { "type": "PubMed", "name": "J Cell Biol 2007 Mar 26 176(7) 929-39", "id": "17371836" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Transient expression of mSKAP55R in COS cells demonstrated that tyrosine 260 was the predominant site of phosphorylation by FYN kinase. Furthermore, this phosphotyrosine was essential for coimmunoprecipitation of FYN with mSKAP55R", "citation": { "type": "PubMed", "name": "Exp Hematol 2000 Nov 28(11) 1250-9", "id": "11063873" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebef" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PKCh is a direct upstream activator of Fyn; PKCh associates with, and activates Fyn, leading to keratinocyte growth arrest and differentiation", "citation": { "type": "PubMed", "name": "J Biochem (Tokyo) 2002 Dec 132(6) 853-7", "id": "12473186" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebeb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Transient expression of actively mutated Fyn, having Phe-528 instead of Tyr-528, in Jurkat T cells stimulated the fos promoter and serum response element (SRE), suggesting that the Fyn kinase stimulates c-fos expression through SRE.", "citation": { "type": "PubMed", "name": "Princess Takamatsu Symp 1991 22 293-305", "id": "1668889" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe7" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Srcasm itself exerts a negative feedback, as it inhibits its activator, the Src kinase Fyn (Li et al., 2007).", "citation": { "type": "PubMed", "name": "J Invest Dermatol 2008 Mar 128(3) 501-16", "id": "18268536" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Keratinocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Reconstitution of complexes containing p62 and the src family kinase p59fyn in HeLa cells demonstrated that complex formation resulted in tyrosine phosphorylation of p62 and was mediated by both the SH3 and SH2 domains of p59fyn.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1995 Jan 15(1) 186-97", "id": "7799925" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Furthermore, we show that NRG1 signaling, through activation of Fyn and Pyk2 kinases, stimulates phosphorylation of Y1472 on the NR2B subunit of the NMDA receptor (NMDAR), a key regulatory site that modulates channel properties. ", "citation": { "type": "PubMed", "name": "J Neurosci 2007 Apr 25 27(17) 4519-29", "id": "17460065" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebce" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "As shown in Fig. 2A, whereas hypertonic induction for 4 h greatly stimulated the ORE-SVLuc activity in mock-transfected cells, co-transfection with increasing amounts of pCMV-FynDN resulted in an incremental reduction in the hypertonicity-induced ORE-Luc activity.", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Nov 29 277(48) 46085-92", "id": "12359721" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebba" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Upon integrin binding to ECM, Fyn becomes activated, and its SH3 domain interacts with a proline-rich site in Shc. Shc is then phosphorylated by Fyn at Tyr317 and combines with the Grb2-mSOS complex (4) (Fig. 3B).", "citation": { "type": "PubMed", "name": "Science 1999 Aug 13 285(5430) 1028-32", "id": "10446041" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Pleiotrophin (PTN the protein, Ptn the gene) signals downstream targets through inactivation of its receptor, the transmembrane receptor protein tyrosine phosphatase (RPTP)beta zeta We further demonstrate that Fyn is a substrate of RPTPbeta zeta, and that tyrosine phosphorylation of Fyn is sharply increased in PTN-stimulated cells.", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2005 Jul 8 332(3) 664-9", "id": "15925565" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "This hypothesis is supported by the findings that shear stress activation of ERK is inhibited by a polyclonal anti–Cav-158 and that shear stress can activate Fyn (S. Jalali, S. Chien, unpublished data, 1998).", "citation": { "type": "PubMed", "name": "Circ Res 2002 Nov 1 91(9) 769-75", "id": "12411390" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba5" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Consistent with an involvement of this kinase, fyn-deficient keratinocytes have strongly decreased tyrosine phosphorylation levels of beta- and gamma-catenins and p120-Cas, and structural and functional abnormalities in cell adhesion similar to those caused by tyrosine kinase inhibitors.", "citation": { "type": "PubMed", "name": "J Cell Biol 1998 Jun 15 141(6) 1449-65", "id": "9628900" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb91" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Keratinocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Using chemical and genetic inhibitors, we show that Fyn activity is required for SAPK2/p38 but not for FAK activation in response to VEGF. In contrast, c-Src permits activation of FAK, but not that of SAPK2/p38. In addition, Fyn is required for stress fiber formation and endothelial cell migration. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2006 Nov 10 281(45) 34009-20", "id": "16966330" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb8a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Because Fyn is the kinase primarily responsible for the phosphorylation of PAG (the phosphoprotein associated with glycosphingolipid-enriched microdomains), which negatively regulates Src-kinase activity by recruiting Csk (the C-terminal Src kinase) to the membrane, we investigated whether anergy induction also affects PAG.", "citation": { "type": "PubMed", "name": "Blood 2007 Jul 15 110(2) 596-625", "id": "17389760" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb82" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We examined the effect of PEDF on kinase activity of Fyn and found that PEDF downregulated FGF-2-promoted Fyn activity by tyrosine phosphorylation at the C-terminus in a Fes-dependent manner. ", "citation": { "type": "PubMed", "name": "J Cell Sci 2005 Mar 1 118(Pt 5) 961-70", "id": "15713745" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb7a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Through its phosphorylated tyrosine residues, the activated VEGFR-2 associates with the adapter molecules Shc, Grb2 and Nck, to Ras GTPase activating protein, p59fyn, pp62yes and phospholipase Cg, and to the tyrosine phosphatases SHP-1 and SHP-2", "citation": { "type": "PubMed", "name": "EMBO J 1999 Feb 15 18(4) 882-92", "id": "10022831" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb6f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The protein-tyrosine kinase Csk is one of the main down-regulators of the Src family of kinases. Csk may be involved in the down-regulation of T cell receptor (TCR) signaling by C-terminal tyrosine phosphorylation of Lck and Fyn; however, it is not known how Csk activity is regulated or how it targets these Src family members.", "citation": { "type": "PubMed", "name": "J Biol Chem 1996 Apr 19 271(16) 9698-703", "id": "8621646" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb6d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Immunoprecipitation experiments showed that the amount of coimmunoprecipitated Fyn kinase with an anti-Cbl antibody increased in extracts from ethanol-administered rats compared to those from saline-administered rats. Exogenous Fyn kinase was shown to phosphorylate on tyrosine residue(s) of Cbl from the cerebellum in vitro.", "citation": { "type": "PubMed", "name": "Brain Res 2002 Sep 20 950(1-2) 203-9", "id": "12231245" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb64" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "JAK2 tyrosine kinase and the Src family p59 Fyn tyrosine kinase are required for Ang II-induced STAT1 tyrosine phosphorylation in VSMCs.", "citation": { "type": "PubMed", "name": "J Biol Chem 1999 Jul 9 274(28) 19846-51", "id": "10391929" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb5c" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "vascular smooth muscle" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We conclude that p59(fyn) and p56(lck) differently participate in regulating the phosphorylation state of Sam68 in T cells and that ZAP-70 may contribute to Sam68 tyrosine phosphorylation and to the specific recruitment of this molecule after CD3 stimulation.", "citation": { "type": "PubMed", "name": "Eur J Immunol 1997 Dec 27(12) 3360-7", "id": "9464824" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb5a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Yes tyrosine kinase also binds to p120 catenin but only upon activation, and stimulates Fer and Fyn tyrosine kinases. p120 catenin acts as a docking protein facilitating the activation of Fer/Fyn tyrosine kinases by Yes", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2003 Apr 23(7) 2287-97", "id": "12640114" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb57" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Employing fyn-deficient mouse embryonic fibroblast cells and tissues, we demonstrate that fyn is essential for phosphorylating PIKE-A and protects it from apoptotic cleavage. Active but not kinase-dead fyn interacts with PIKE-A and phosphorylates it on both Y682 and Y774 residues. ", "citation": { "type": "PubMed", "name": "Cell Death Differ 2007 Feb 14(2) 368-77", "id": "16841086" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb56" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We also provide evidence that upon CD43 cross-linking, Fyn is tyrosine-phosphorylated in a time-dependent manner. Our results suggest that CD43 cross-linking on the T cell surface induces the interaction between CD43 and Fyn, presumably through the Fyn SH3 domain and a putative SH3 binding site in CD43, leading to Fyn tyrosine phosphorylation and signal propagation.", "citation": { "type": "PubMed", "name": "J Biol Chem 1996 Nov 1 271(44) 27564-8", "id": "8910342" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb1d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Carlos Ibanez and collaborators demonstrated that neural cell adhesion molecule (NCAM) functions as an alternative signaling receptor for GFLs (fig. 2). In the presence of GFR-a, GDNF binds with high affinity to p140-NCAM and intracellularly activates the Src-like kinase c-Fyn and the focal adhesion kinase FAK [29].", "citation": { "type": "PubMed", "name": "Cell Mol Life Sci 2004 Dec 61(23) 2954-64", "id": "15583857" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "On the contrary, the Cas-associated kinase activity was remarkably decreased in Fyn-/- cells.", "citation": { "type": "PubMed", "name": "Oncogene 1997 Mar 27 14(12) 1419-26", "id": "9136985" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec74" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Fibroblasts", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "ROS activate Fyn, which phosphorylates JAK2", "citation": { "type": "PubMed", "name": "Arterioscler Thromb Vasc Biol 2002 Dec 1 22(12) 1962-71", "id": "12482820" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec70" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "and dominant-negative and constitutively active Fyn mutants rescue and recapitulate the Sca-1 antisense phenotype, respectively.", "citation": { "type": "PubMed", "name": "J Cell Sci 2004 Dec 1 117(Pt 25) 6185-95", "id": "15546912" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec6f" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "Muscle, Skeletal" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Hypertonicity provoked Fyn-dependent tyrosine phosphorylation in beta-catenin, alpha-catenin, and p120(Cas) and caused the dissociation of beta-catenin from the contacts.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Oct 13 275(41) 32289-98", "id": "10921917" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec6c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Caveolin-1 is a substrate for nonreceptor tyrosine kinases including Src, Fyn, and Abl.", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Mar 15 277(11) 8771-4", "id": "11805080" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec5e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Hck negatively regulates Lyn, and Lyn negatively regulates Fyn", "citation": { "type": "PubMed", "name": "Blood 2007 May 18", "id": "17513616" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec5c" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Further experiments with small interfering RNA revealed that Fyn phosphorylated Nrf 2Y568 leading to nuclear export and degradation of Nrf 2.", "citation": { "type": "PubMed", "name": "J Biol Chem 2006 Apr 28 281(17) 12132-42", "id": "16513647" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec5a" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Caveolin-1 is phosphorylated on Tyr(14) in response to both oxidative and hyperosmotic stress. In the present paper, we show that this phosphorylation requires activation of the Src family kinase Fyn. ", "citation": { "type": "PubMed", "name": "Biochem J 2003 Nov 15 376(Pt 1) 159-68", "id": "12921535" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec54" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "MAPK. Stimulation of ERK by endothelial-cell integrins is mediated by either FAK, or the Src-family kinases Fyn and Yes.", "citation": { "type": "PubMed", "name": "Biochim Biophys Acta 2004 Mar 4 1654(1) 51-67", "id": "14984767" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec53" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "exposure to PDGF resulted in a marked increase in c-Src, Fyn, and Yes kinase activities", "citation": { "type": "PubMed", "name": "Mol Biol Cell 2005 Nov 16(11) 5418-32", "id": "16135530" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec4e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Jan 5 276(1) 693-9", "id": "11024032" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec4d" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Importantly, tyrosine phosphorylation of Itch appears to reduce its interaction with its substrate JunB. The turnover of JunB is accelerated in Fyn-deficient T cells, which is further reconstituted by Itch Tyr371 mutation. ", "citation": { "type": "PubMed", "name": "Mol Cell 2006 Jan 6 21(1) 135-41", "id": "16387660" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec4b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "p90RSK activation by H(2)O(2) was significantly reduced in fibroblasts derived from transgenic mice deficient in Fyn, but not c-Src. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Jan 21 275(3) 1739-48", "id": "10636870" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec46" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PRAP was found to function as a substrate for Src family kinases, such as c-Src or Fyn, but not for Pyk2/RAFTK", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Oct 24 278(43) 42225-33", "id": "12893833" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec44" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In cells lacking RET, GDNF binds with high affinity to the NCAM and GFRalpha1 complex, which activates Fyn and FAK.", "citation": { "type": "PubMed", "name": "J Cell Sci 2003 Oct 1 116(Pt 19) 3855-62", "id": "12953054" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec42" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The Csk tyrosine kinase negatively regulates the Src family kinases Lck and Fyn in T cells.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2005 Mar 25(6) 2227-41", "id": "15743820" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec41" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Biochemical and in vitro experiments implicate Src and Fyn in the Reelin-dependent tyrosine phosphorylation of Dab1, which controls the positioning of radially migrating neurons in many brain regions. ", "citation": { "type": "PubMed", "name": "J Neurosci 2005 Sep 14 25(37) 8578-86", "id": "16162939" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec40" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Immunol 1998 Apr 1 160(7) 3305-14", "id": "9531288" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec3c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "CD146 engagement initiates an outside-in signaling pathway involving the protein tyrosine kinases FYN and FAK as well as paxillin.", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Jan 12 276(2) 1564-9", "id": "11036077" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec34" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Virol 1991 Jan 65(1) 170-9", "id": "1985196" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec2a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Fyn, by phosphorylating a residue located in the regulatory domain of p120-catenin (Tyr112), inhibits the interaction of this protein with RhoA", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2007 Mar 27(5) 1745-57", "id": "17194753" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec22" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "FEBS Lett 2000 Jun 2 474(2-3) 179-83", "id": "10838081" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec16" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 1997 Dec 18 241(2) 355-62", "id": "9425276" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec06" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2004 Aug 24(16) 6980-92", "id": "15282299" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec04" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Mutational analysis revealed that tyrosine 271 in SKAP55 played a pivotal role for interaction with both Fyn kinase and adapter protein Grb-2, indicating that the Fyn-phosphorylated SKAP55 transiently associates with adapter Grb-2 to mediate mitogen-activated protein kinase activation. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Oct 25 277(43) 40420-7", "id": "12171928" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec03" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Moreover, expression of a constitutive active form of Fyn also promoted the recruitment of Tom1L1 to enlarged endosomes.", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Mar 11 280(10) 9258-64", "id": "15611048" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebfd" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We show herethat RPTP is a physiological activator of two additional Src family kinases, Yes and Fyn.", "citation": { "type": "PubMed", "name": "Exp Cell Res 2004 Mar 10 294(1) 236-43", "id": "14980517" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf7" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Src is associated with c-Cbl, and we have previously demonstrated that the Src-like kinase Fyn can phosphorylate c-Cbl at a preferred binding site for the p85 subunit of phosphatidylinositol 3'-kinase. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Jul 12 277(28) 24967-75", "id": "11994282" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebee" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "N.N.;12374739", "citation": { "type": "PubMed", "name": "BMC Bioinformatics 2004 Jun 22 5 79", "id": "15212693" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebed" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Blood 2002 Feb 1 99(3) 957-65", "id": "11806999" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We found previously that a Src type tyrosine kinase Fyn and cyclin-dependent kinase 5 (Cdk5) mediate Sema3A-signaling.", "citation": { "type": "PubMed", "name": "J Neurosci 2004 Jul 7 24(27) 6161-70", "id": "15240808" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Here we show that 3BP2 is tyrosine phosphorylated following BCR aggregation on B lymphoma cells, and that 3BP2 is a substrate for Syk and Fyn, but not Btk. ", "citation": { "type": "PubMed", "name": "Blood 2005 Feb 1 105(3) 1106-13", "id": "15345594" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PTP? is required for SCF-induced c-Kit and Fyn activation, and in this way regulates a Fyn-based c-Kit signaling axis (Fyn/Gab2/Shp2/Vav/PAK/Rac/JNK) that mediates mast cell migration.", "citation": { "type": "PubMed", "name": "J Immunol 2010 Nov 15 185(10) 5993-6002", "id": "20944008" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd7" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "However, Fyn-deficient mast cells showed a significant reduction in phosphorylation of Shp2 phosphatase and p38 mitogen-activated protein kinase. ", "citation": { "type": "PubMed", "name": "Cell Signal 2006 Sep 18(9) 1447-54", "id": "16442778" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2002 Apr 22(8) 2673-86", "id": "11909961" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd1" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The functional significance of pp115 association with p59fyn is suggested by the ability of alpha 4 integrin stimulation to activate Fyn tyrosine kinase activity. ", "citation": { "type": "PubMed", "name": "J Immunol 1997 Nov 15 159(10) 4806-14", "id": "9366405" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebcf" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Nov 28 278(48) 47610-21", "id": "13129934" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc9" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Pharmacol 2002 Sep 62(3) 672-9", "id": "12181444" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Cotransfection of tau and kinases showed that Tyr-18 was the major site for Fyn phosphorylation, but Tyr-394 was the main residue for Abl.", "citation": { "type": "PubMed", "name": "J Neurosci 2005 Jul 13 25(28) 6584-93", "id": "16014719" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "upon ligation of the integrin beta6 with fibronectin, beta6 complexed with Fyn and activated it.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Oct 24 278(43) 41646-53", "id": "12917446" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebbf" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "p250GAP is tyrosine phosphorylated by Fyn", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2003 Jun 20 306(1) 151-5", "id": "12788081" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebbe" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "However, purified EGFR did not phosphorylate recombinant PKC delta in vitro, whereas members of the Src family (c-Src, c-Fyn) and membrane preparations from keratinocytes did. ", "citation": { "type": "PubMed", "name": "J Biol Chem 1996 Mar 8 271(10) 5325-31", "id": "8621384" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebbd" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Keratinocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Feb 9 276(6) 3879-84", "id": "11078745" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Dec 6 277(49) 47373-9", "id": "12239221" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba3" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In Fyn-/- fibroblasts, activation of Ras by H(2)O(2) was significantly attenuated.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Jan 21 275(3) 1739-48", "id": "10636870" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb9c" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Fibroblasts", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Oncogene 2000 Jun 8 19(25) 2895-903", "id": "10871840" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb98" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "EGF-R causes disassembly of hemidesmosomes by activating Fyn, which in turn phosphorylates the beta4 cytoplasmic domain. Neoplastic cells expressing dominant negative Fyn display increased hemidesmosomes and migrate poorly in vitro in response to EGF.", "citation": { "type": "PubMed", "name": "J Cell Biol 2001 Oct 29 155(3) 447-58", "id": "11684709" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb93" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In heterogeneous COS-1 cells, Cbl-b was phosphorylated on tyrosine residues by both Syk- (Syk/Zap-70) and Src- (Fyn/Lck) family kinases", "citation": { "type": "PubMed", "name": "Oncogene 1999 Feb 4 18(5) 1147-56", "id": "10022120" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb92" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Cdk5 phosphorylates p80 Dab1 at multiple sites in its carboxyl-terminal region, and tyrosine phosphorylation of p80 Dab1 by Fyn tyrosine kinase is attenuated by this Cdk5-mediated phosphorylation in vitro. ", "citation": { "type": "PubMed", "name": "Brain Res 2007 Apr 6 1140 84-95", "id": "16529723" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb8f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "These data suggest that Fyn prefers Y731 to other tyrosines in the C-terminus of c-Cbl.", "citation": { "type": "PubMed", "name": "FEBS Lett 2004 Nov 19 577(3) 555-62", "id": "15556646" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb89" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Recently, it was shown that Fyn is one of the kinases responsible for the phosphorylation of caveolin", "citation": { "type": "PubMed", "name": "J Clin Invest 1999 Apr 103(7) 931-43", "id": "10194465" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb87" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2001 Oct 19 288(1) 233-9", "id": "11594778" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb80" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "IL-6-treatment of INA-6 cells induced the kinase activities of Fyn, Lyn and Hck", "citation": { "type": "PubMed", "name": "Oncogene 2007 Jul 26 26(34) 4987-98", "id": "17310994" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb7c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Using dominant-negative mutants of c-Src and Fyn and Src-deficient SYF cells as well as by co-immunoprecipitation studies, we can demonstrate that the M2R-mediated transactivation of EGFR specifically involves Fyn but not c-Src or Yes. ", "citation": { "type": "PubMed", "name": "Cell Signal 2006 Aug 18(8) 1338-49", "id": "16337776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb74" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Exogenous Fyn kinase was shown to phosphorylate on tyrosine residue(s) of Cbl from the cerebellum in vitro.", "citation": { "type": "PubMed", "name": "Brain Res 2002 Sep 20 950(1-2) 203-9", "id": "12231245" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb73" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "IL-6 induced the activation and tyrosine phosphorylation of p59Fyn, p56/59Hck, and p56Lyn.", "citation": { "type": "PubMed", "name": "Exp Hematol 1997 Dec 25(13) 1367-77", "id": "9406996" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb70" }, "experiment_context": { "species_common_name": "Human", "disease": "Multiple Myeloma", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1996 Sep 16(9) 4735-43", "id": "8756631" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb61" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Both SKAP55 and SKAP55R were found to bind FYB through their SH3 domains and to act as substrates for the FYN kinase in T cells", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 1998 Jul 21 95(15) 8779-84", "id": "9671755" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb5d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Lyn-deficient Mast Cells Show Increased Fyn Kinase Activity.", "citation": { "type": "PubMed", "name": "J Exp Med 2004 Jun 7 199(11) 1491-502", "id": "15173205" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb59" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In Fyn-/- fibroblasts, activation of Ras by H(2)O(2) was significantly attenuated.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Jan 21 275(3) 1739-48", "id": "10636870" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb50" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Fyn plays an essential role by positive regulation of Lck activity.", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2004 Oct 12 101(41) 14859-64", "id": "15465914" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb4f" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "naive T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "We identify Tyr-131 as the major phosphorylation site and Tyr-132 as a minor site and the Src family PTKs Lck and Fyn as enzymes capable of phosphorylating these sites in vivo and in vitro. ", "citation": { "type": "PubMed", "name": "J Biol Chem 1997 Feb 28 272(9) 5371-4", "id": "9038134" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb4d" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Collectively, our results suggest that BDNF enhances phosphorylation of NR2B tyrosine 1472 through activation of Fyn, leading to alteration of NMDA receptor activity and increased synaptic transmission.", "citation": { "type": "PubMed", "name": "Brain Res 2006 Nov 22 1121(1) 22-34", "id": "17045972" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb44" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "96108162;12522270;16094384;15659558", "citation": { "type": "PubMed", "name": "BMC Bioinformatics 2004 Jun 22 5 79", "id": "15212693" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb3f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Sema3A promotes Cdk5 activity through phosphorylation of Tyr15, a phosphorylation site with Fyn.", "citation": { "type": "PubMed", "name": "Neuron 2002 Aug 29 35(5) 907-20", "id": "12372285" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb3e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "neuron", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "alpha PTP phosphorylation occurred at Tyr789 and required SFKs (Src or Fyn/Yes), FAK, and an intact cytoskeleton.", "citation": { "type": "PubMed", "name": "J Biol Chem 2006 Apr 28 281(17) 11972-80", "id": "16507567" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb3c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Fyn but not JAK2 is the major kinase that phosphorylates Cbl", "citation": { "type": "PubMed", "name": "J Biol Chem 1999 Jan 22 274(4) 2097-106", "id": "9890970" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb38" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "This phosphorylation was mediated by Src family tyrosine kinases (STKs), with Fyn appearing to be the dominant kinase. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Nov 11 280(45) 37974-87", "id": "16144838" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb37" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The activated GSK-3beta phosphorylates Fyn at threonine residue(s). Phosphorylated Fyn accumulates in the nucleus and phosphorylates Nrf2 at tyrosine 568. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2007 Jun 1 282(22) 16502-10", "id": "17403689" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb2a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PKC-eta activity is both necessary and sufficient for Fyn activation, PKC-eta and Fyn are found in association, and recombinant PKC-eta directly activates Fyn. ", "citation": { "type": "PubMed", "name": "Mol Cell 2000 Nov 6(5) 1121-9", "id": "11106751" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb25" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "Keratinocytes", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Apr 16 279(16) 16311-6", "id": "14761954" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb18" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "we found that PP2, a Fyn kinase inhibitor (33), and leflunomide, an Src kinase inhibitor (34), markedly inhibited the UVB-induced phosphorylation of histone H3 at serine 10 in a dose-dependent manner (Fig. 4, A and C, respectively) but did not change the total histone H3 protein level (Fig. 4, B and D).", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Jan 28 280(4) 2446-54", "id": "15537652" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb0b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Thus, in contrast to the activation pathway mediated by serine/threonine phosphorylation, tyrosine phosphorylation of Itch plays a negative role in modulating Itch-promoted ubiquitination.", "citation": { "type": "PubMed", "name": "Mol Cell 2006 Jan 6 21(1) 135-41", "id": "16387660" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb08" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "nterestingly, we also observed that APOBEC3G can be phosphorylated on tyrosine in the presence of Fyn or Hck, suggesting that both kinases may regulate APOBEC3G function. ", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2005 Apr 15 329(3) 917-24", "id": "15752743" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb05" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "2) both Fyn and Lck are capable of phosphorylating DAP12; and 3) both kinases coimmunoprecipitate with the Ly-49D/DAP12 complex in NK cells.", "citation": { "type": "PubMed", "name": "J Immunol 2006 Jun 1 176(11) 6615-23", "id": "16709819" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb03" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "The LHR-mediated phosphorylation of the EGFR and Shc, the activation of Ras, and the phosphorylation of ERK1/2 are inhibited by expression of a dominant-negative mutant of Fyn, a member of the Src family kinases (SFKs) expressed in MA-10 cells and by PP2, a pharmacological inhibitor of the SFKs. ", "citation": { "type": "PubMed", "name": "Endocrinology 2006 Jul 147(7) 3419-27", "id": "16614081" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb02" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "SKAP-HOM is a cytosolic adaptor protein representing a specific substrate for the Src family protein tyrosine kinase Fyn", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2005 Sep 25(18) 8052-63", "id": "16135797" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb01" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Interaction with DAF also activates Fyn kinase, an event that is required for the phosphorylation of caveolin and transport of virus into the cell within caveolar vesicles.", "citation": { "type": "PubMed", "name": "Cell 2006 Jan 13 124(1) 119-31", "id": "16413486" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb00" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "SK is activated following FcepsilonRI aggregation at the surface of mast cells in a LYN- and FYN-dependent manner78, and this results in the phosphorylation of lipid-raft associated sphingosine to form S1P 79,80.", "citation": { "type": "PubMed", "name": "Nat Rev Immunol 2006 Mar 6(3) 218-30", "id": "16470226" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf8" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Kinase-active Lck complexes with and activates Fyn", "citation": { "type": "PubMed", "name": "J Biol Chem 2008 Sep 26 283(39) 26409-22", "id": "18660530" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf5" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Fibroblasts", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Several lipid signaling pathways are activated downstream of Fc?RI via Fyn, including pathways mediated by PI3K, SphK, and PLD", "citation": { "type": "PubMed", "name": "Immunol Rev 2007 Jun 217 255-68", "id": "17498064" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf4" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Furthermore, dominant negative Fyn decreases the ability of squamous carcinoma cells to invade through Matrigel in vitro and to form lung metastases following intravenous injection in nude mice. ", "citation": { "type": "PubMed", "name": "J Cell Biol 2001 Oct 29 155(3) 447-58", "id": "11684709" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf2" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "squamous cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "SHP2 KO BMMCs displayed several phenotypes associated with reduced Fyn activity, including elevated phosphorylation of the inhibitory pY531 site in Fyn, impaired signaling to Grb2-associated binder 2, Akt/PKB, and IkappaB kinase, and decreased TNF-alpha release compared with control cells.", "citation": { "type": "PubMed", "name": "J Immunol 2009 Oct 15 183(8) 4940-7", "id": "19786542" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "mast cell", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Hence, we identified p59fyn and p53/56lyn to be stimulated by IL-7.", "citation": { "type": "PubMed", "name": "J Immunol 1994 Jul 1 153(1) 97-109", "id": "7515933" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaee" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In response to PDGF, Fyn associated with PKCdelta via tyrosine 187. Finally, overexpression of dominant negative Fyn abrogated the decrease in GS expression and reduced the tyrosine phosphorylation of PKCdelta induced by PDGF. We conclude that the tyrosine phosphorylation of PKCdelta and its association with tyrosine kinases may be an important point of divergence in PKC signaling.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Nov 10 275(45) 35491-8", "id": "10945993" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae1" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "Activation of tyrosine kinases Fyn and Lyn, but not Lck, also occurred within 2 min after PAF stimulation in the cells", "citation": { "type": "PubMed", "name": "Prog Lipid Res 2000 Jan 39(1) 41-82", "id": "10729607" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eada" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:FYN) actsIn kin(p(HGNC:FYN))", "summary_text": "In a human embryonic kidney (HEK) 293 cell expression system, PTPalpha enhanced fyn-mediated NR2A and NR2B tyrosine phosphorylation by several-fold.", "citation": { "type": "PubMed", "name": "J Neurochem 2006 Sep 98(6) 1798-809", "id": "16899073" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:LCK)", "relation": "actsIn", "target": "kin(p(HGNC:LCK))", "directed": false, "label": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051470", "evidences": [ { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Src, Lyn and Lck tyrosine kinases phosphorylate DAPP1 at Tyr(139) in vitro at similar rates in the presence or absence of PtdIns(3,4,5)P(3), and overexpression of these kinases in HEK-293 cells induces the phosphorylation of Tyr(139). co-expression of DAPP1 with Src, Lyn or Lck induced a very high level of phosphorylation of DAPP1 at Tyr139, even in unstimulated cells, which was not increased further by agonist stimulation of cells. As Src-family kinases activate the PI 3-kinase pathway in many cells [1], it is possible that the overexpression of Src, Lyn or Lck in HEK-293 cells induces the activation of PI 3-kinase, thereby promoting DAPP1 phosphorylation in unstimulated cells. As Src-family tyrosine kinases are located at the plasma membrane by virtue of myristoylation and palmitoylation of their N-termini [26], it is likely that the role of PtdIns(3,4,5)P3 is to recruit DAPP1 to the cell membrane, where it can be phosphorylated with Src-family tyrosine kinases.", "citation": { "type": "PubMed", "name": "Biochem J 2000 Jul 15 349(Pt 2) 605-10", "id": "10880360" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec63" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "On stimulation of the cells through their T cell antigen receptor, the phosphotyrosine content of LMPTP-B declined rapidly. In co-transfected COS cells, Lck and Fyn caused phosphorylation of LMPTP, whereas Csk, Zap, and Jak2 did not. Most of the phosphate was located at Tyr-131, and some was also located at Tyr-132. Incubation of wild-type LMPTP with Lck and adenosine 5'-O-(thiotriphosphate) caused a 2-fold increase in the activity of LMPTP. Site-directed mutagenesis showed that Tyr-131 is important for the catalytic activity of LMPTP, and that thiophosphorylation of Tyr-131, and to a lesser degree Tyr-132, is responsible for the activation.", "citation": { "type": "PubMed", "name": "J Biol Chem 1997 Feb 28 272(9) 5371-4", "id": "9038134" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec0f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "We have studied the phosphorylation of the closely related lck, fyn, and c-src tyrosine protein kinases in leukemic murine T-cell lines that have lost the expression of CD45. The phosphorylation of the lck kinase at an inhibitory site of tyrosine phosphorylation, Tyr-505, was increased by two-, six-, and eightfold in three different cell lines. Phosphorylation of the fyn kinase at the homologous site, Tyr-531, was unaltered in one of these cell lines, but increased by 2.5-fold in the two others.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1993 Mar 13(3) 1651-6", "id": "8441403" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec02" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "We show that Crry increases early TCR-dependent activation signals, including p56lck-, zeta-associated protein-70 (ZAP-70), Vav-1, Akt, and extracellular signal-regulated kinase (ERK) phosphorylation but also costimulation-dependent mitogen-activated protein kinases (MAPK), such as the stress-activated c-Jun N-terminal kinase (JNK). It is intriguing that Crry costimulus enhanced p38 MAPK activation in T helper cell type 1 (Th1) but not in Th2 cells. ", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2005 Dec 78(6) 1386-96", "id": "16301324" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebe0" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Here we have examined the role of p62(dok) in CD2-dependent signaling in Jurkat T cells. As previously reported, we find that ligation of the CD2 molecule by mitogenic pairs of anti-CD2 mAbs led to phosphorylation of p62(dok). While CD2-induced p62(dok) tyrosine phosphorylation was independent of both the p36/38 membrane adapter protein linker of activated T cells (LAT) and the ZAP70/Syk family of kinases, it was dependent upon the Src family of kinases including Lck and Fyn. We find further that CD2 engagement induced the association of tyrosine-phosphorylated p62(dok) to Crk-L. ", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Dec 7 276(49) 45654-61", "id": "11553620" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb86" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Simultaneous overexpression of selenophosphate synthetase and phospholipid-hydroperoxide GSH peroxidase (PHGPx) [250] blocks activation of NF-kB by IL-1. Overexpression of SOD [84] or GSH peroxidase [81, 211] abolished NF-kB activation by preventing degradation of IkB after stimulation with TNF-a. The precise mechanism(s) through which oxidants and reductants influence activation of NF-kB is presently unknown; however, there is evidence that antioxidant enzyme (AOE372), a redox-sensitive thioredoxin peroxidase, regulates IkB phosphorylation [246]. Phosphatases The phosphatases are an important component of most signal transduction pathways, because failure to reverse kinase actions can disrupt normal cellular functions. For example, transfection of human fibroblasts with constitutively active ras (hRasV12) inhibits cell growth and ultimately results in a senescentlike phenotype [441]. Similarly, constitutive ERK activation has an inhibitory effect on cell cycle progression [442,443]. Both the serine/threonine phosphatases and the PTPs are known to be redox-sensitive [82,144,153,156,271,281, 444-449]. The mechanism of redox effects on activity is probably best understood for the PTPs. Without exception, the PTPs contain a highly conserved region of 11 amino acid residues in their catalytic domain; specifi- cally, (Ile/Val)-His-Cys-X-Ala-Gly-X-X-Arg-(Ser/Thr)- Gly, where X is a nonconserved amino acid [17]. Either oxidation or mutation of the cysteine renders these molecules inactive [17,281]. H2O2 is a potent inhibitor of PTPs. As in the case of other oxidants, H2O2 probably oxidizes the thiolate anion at the catalytic site [280]. Because formation of a phosphorylcysteine intermediate seems to be critical to PTP activity [450-452], blocking it through oxidation of the cysteine inactivates the molecules. In many cases, treatment of cells with H2O2 stimulates increases in protein phosphorylation by inhibiting phosphatase-catalyzed removal of phosphate groups. Furthermore, mitogens that increase cellular ox- idant production may stimulate phosphorylation indirectly by decreasing phosphatase activity. Additional mechanisms are involved in stimulation of pathways activated by growth factors that increase oxidant production, however, because there are known instances in which the oxidants they produce have no effect on protein phosphorylation. For example, TGF-b1 stimulates phosphorylation of numerous proteins and has been shown to cause a large increase in H2O2 production; however, its effects on protein phosphorylation are not blocked by catalase [453]. Furthermore, H2O2 is effective in promoting phosphorylation of phospholipase D, the PDGF receptor, and PKC-a even after pretreatment of Swiss 3T3 fibroblasts with orthovanadate to inhibit phosphatases [454]. Thus, although diminished phosphatase activity may partially account for increased phosphorylation in some cases, it cannot totally account for oxidation effects on phosphorylation in every case. SPECIFICITY In general, there is good agreement between studies on redox effects on any given gene; albeit, not all oxidizing or reducing treatments exert equivalent effects. This is clearly demonstrated in studies of pag , which encodes a protein associated with cellular proliferation. Pag protein inhibits the tyrosine kinase activity of the Abelson (abl ) protein by binding to its SH3-binding domain [455]. BSO, menadione, sodium arsenate, and diethyl maleate all stimulate pag expression, but H2O2 does not [269]. Conversely, H2O2 stimulates c-fos expression (Table 1), although 4-hydroynonenal (a product of v-6-polyunsaturated fatty acid peroxidation) not only fails to induce c-fos expression but is actually inhibitory to c-fos induction by EGF and PDGF [185]. Similarly, some oxidants such as diamide decrease hypoxia-induced signals [201], although others such as H2O2 increase them [124]. As might be expected, the effects of any stimu...", "citation": { "type": "PubMed", "name": "Free Radic Biol Med 2000 Feb 1 28(3) 463-99", "id": "10699758" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb33" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Results demonstrated that autophosphorylation of Lck (at Tyr394) facilitates Csk-mediated phosphorylation of Lck at its regulatory site (Tyr505). Subsequent peptide binding studies revealed that Csk can bind to a peptide corresponding to the Lck-autophosphorylation site only when it is phosphorylated. These findings suggest that autophosphorylation of Lck at Tyr394 triggers an interaction with Csk and thereby facilitates subsequent phosphorylation and inactivation of Lck.", "citation": { "type": "PubMed", "name": "Farmaco 1998 Apr 53(4) 266-72", "id": "9658584" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec71" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "The CD45 tyrosine phosphatase has been reported to activate the src family tyrosine kinases Lck and Fyn by dephosphorylating regulatory COOH-terminal tyrosine residues 505 and 528, respectively. ", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1996 Sep 16(9) 4996-5003", "id": "8756658" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec33" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "The Csk tyrosine kinase negatively regulates the Src family kinases Lck and Fyn in T cells.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2005 Mar 25(6) 2227-41", "id": "15743820" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec17" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Magicin phosphorylation is not observed in an Lck-deficient line, J.CaM1.6, indicating that Lck is the major Src family kinase for phosphorylating magicin in Jurkat cells. Employing site-directed mutagenesis along with in vitro kinase assays, we found that Y64 of magicin is phosphorylated by Lck creating a SH2-Grb2 binding motif. ", "citation": { "type": "PubMed", "name": "Biochem Biophys Res Commun 2006 Sep 29 348(3) 826-31", "id": "16899217" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebaf" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Moreover, SH-PTP1 is constitutively phosphorylated on tyrosine in the Lck-overexpressing lymphoma cell line LSTRA. SH-PTP1 is also a good substrate for recombinant Lck in vitro. Comparisons of the tryptic phosphopeptide maps of wild-type SH-PTP1 and deletion and point mutations establish that the two sites (Y-536 and Y-564) which are directly phosphorylated by Lck in vitro are also phosphorylated in vivo in LSTRA cells.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1994 Mar 14(3) 1824-34", "id": "8114715" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "We show that Crry increases early TCR-dependent activation signals, including p56lck-, zeta-associated protein-70 (ZAP-70), Vav-1, Akt, and extracellular signal-regulated kinase (ERK) phosphorylation but also costimulation-dependent mitogen-activated protein kinases (MAPK), such as the stress-activated c-Jun N-terminal kinase (JNK). It is intriguing that Crry costimulus enhanced p38 MAPK activation in T helper cell type 1 (Th1) but not in Th2 cells. ", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2005 Dec 78(6) 1386-96", "id": "16301324" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb67" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Tyr174 of Vav is thought to be the site of phosphorylation by Lck that regulates Vav function (18). A Vav mutant protein with a Tyr (Y) to Phe (F) substitution at position 174 (Y174F) was not phosphorylated by Lck in vitro (Fig. 2A)", "citation": { "type": "PubMed", "name": "Science 1998 Jan 23 279(5350) 558-60", "id": "9438848" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb55" }, "experiment_context": { "species_common_name": "Rat", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Here we report a novel signaling pathway whereby RhoA can efficiently modulate Stat3 transcriptional activity by inducing its simultaneous tyrosine and serine phosphorylation. Tyrosine phosphorylation is exerted via a member of the Src family of kinases (SrcFK) and JAK2, whereas the JNK pathway mediates serine phosphorylation.", "citation": { "type": "PubMed", "name": "Mol Biol Cell 2001 Oct 12(10) 3282-94", "id": "11598209" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb4a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Evasion from apoptosis is a hallmark of cancer, and recent success using targeted therapeutics underscores the importance of identifying anti-apoptotic survival pathways. Here we utilize RNA interference (RNAi) to systematically screen the kinase and phosphatase component of the human genome.", "citation": { "type": "PubMed", "name": "Nat Cell Biol 2005 Jun 7(6) 591-600", "id": "15864305" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb1e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "The protein-tyrosine kinase Csk is one of the main down-regulators of the Src family of kinases. Csk may be involved in the down-regulation of T cell receptor (TCR) signaling by C-terminal tyrosine phosphorylation of Lck and Fyn; however, it is not known how Csk activity is regulated or how it targets these Src family members.", "citation": { "type": "PubMed", "name": "J Biol Chem 1996 Apr 19 271(16) 9698-703", "id": "8621646" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb16" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "The phosphatidylinositol 3-kinase (PI3K) and the tyrosine phosphatase SHP-1 are two Lck substrates that have been implicated in TCR signaling. By contrast, a truncated SHP-1 mutant lacking the Lck phosphorylation site (Tyr(564)) failed to bind p85.", "citation": { "type": "PubMed", "name": "J Biol Chem 1999 Sep 24 274(39) 27583-9", "id": "10488096" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Defects in TCR-mediated signals underlying these abnormalities have now been investigated using CD45-null T cells. No T cell proliferation was detected in response to a CD3 mAb. In thymocytes the p56(lck) and p59(fyn) tyrosine kinases were hyperphosphorylated, and p56(lck) was in its inactive conformation.", "citation": { "type": "PubMed", "name": "J Immunol 1997 Jun 15 158(12) 5773-82", "id": "9190928" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae5" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "In addition, CD38 ligation resulted in an elevated tyrosine kinase activity of the CD38-associated Lck and ultimate activation of interleukin-2 gene transcription. Furthermore, expression of a kinase-deficient Lck mutant suppressed interleukin-2 gene activation in a dose-dependent manner.", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Jan 21 275(3) 1685-90", "id": "10636863" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae3" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "In contrast to receptor tyrosine kinases, both CD2 and TCR lack cytoplasmic kinase domains and must activate Lck, a lipid-modified protein diffusing in the inner leaflet of the membrane. ", "citation": { "type": "PubMed", "name": "J Cell Biol 2009 May 4 185(3) 521-34", "id": "19398758" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec76" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "2) both Fyn and Lck are capable of phosphorylating DAP12; and 3) both kinases coimmunoprecipitate with the Ly-49D/DAP12 complex in NK cells.", "citation": { "type": "PubMed", "name": "J Immunol 2006 Jun 1 176(11) 6615-23", "id": "16709819" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec75" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Nov 24 275(47) 37224-31", "id": "10978311" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec37" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2002 May 22(10) 3527-36", "id": "11971983" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec30" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "The protein-tyrosine kinase p56lck catalyzed phosphorylation of GST-Erk1 at two autophosphorylations sites, including Tyr-204, and at a novel site.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 1993 Aug 13(8) 4679-90", "id": "7687743" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec23" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Yes and Lck are known to be enriched in rafts and may mediate the activation of Shc when Fyn is not expressed.", "citation": { "type": "PubMed", "name": "Science 1999 Aug 13 285(5430) 1028-32", "id": "10446041" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec21" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Eur J Biochem 2001 Dec 268(23) 6083-96", "id": "11733002" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec1f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Lck phosphorylated ezrin in vitro, and the major phosphotyrosine was identified as Y145", "citation": { "type": "PubMed", "name": "FEBS Lett 2003 Jan 30 535(1-3) 82-6", "id": "12560083" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebf0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Interestingly, lck (null) CD8+ T cells expressed higher levels of CD48 when compared with wt T cells", "citation": { "type": "PubMed", "name": "J Immunol 2004 Jul 1 173(1) 174-80", "id": "15210772" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebde" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 1997 Jun 6 272(23) 14562-70", "id": "9169414" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebda" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "We have identified IL-7-induced activation of three cyoplasmic tyrosine kinases in T cells, Jak1, Jak3, and the src-like kinase p56lck.", "citation": { "type": "PubMed", "name": "Blood 1995 Sep 15 86(6) 2077-85", "id": "7662955" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "FEBS Lett 1999 Mar 26 447(2-3) 241-6", "id": "10214954" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd5" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Slit-2 can block the CXCL12-induced activation of the Src and Lck kinases", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2007 Sep 82(3) 465-76", "id": "17565045" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebc7" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Feb 14 278(7) 5163-71", "id": "12454019" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebbb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Eur J Immunol 2001 Apr 31(4) 1191-8", "id": "11298344" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb9" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Oncogene 2002 Apr 4 21(15) 2357-64", "id": "11948419" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb5" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Coexpression in 293T cells demonstrated that Lck kinase activity and Cbl ubiquitin ligase activity were essential for Lck ubiquitination", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2002 Mar 19 99(6) 3794-9", "id": "11904433" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eba6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Pharmacol 2002 Sep 62(3) 672-9", "id": "12181444" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb90" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2000 Feb 4 275(5) 3603-9", "id": "10652356" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb77" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "9525940;9102067;12522270;11997497;10829062", "citation": { "type": "PubMed", "name": "BMC Bioinformatics 2004 Jun 22 5 79", "id": "15212693" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb75" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "In heterogeneous COS-1 cells, Cbl-b was phosphorylated on tyrosine residues by both Syk- (Syk/Zap-70) and Src- (Fyn/Lck) family kinases", "citation": { "type": "PubMed", "name": "Oncogene 1999 Feb 4 18(5) 1147-56", "id": "10022120" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb69" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "A synthetic peptide modeled after the putative regulatory phosphorylation site in murine p42mapk (Tyr185) was phosphorylated by p56lck with a similar Vmax, but a fivefold lower Michaelis constant (Km) than a peptide containing the Tyr394 autophosphorylation site from p56lck. ", "citation": { "type": "PubMed", "name": "Science 1992 Feb 14 255(5046) 853-5", "id": "1311128" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb65" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 1998 Aug 7 273(32) 20487-93", "id": "9685404" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb62" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Mol Endocrinol 1995 Jan 9(1) 24-33", "id": "7539106" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb5f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Lck-mediated phosphorylation and activation of ZAP-70 was defective in Fyn-/- cells.", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2004 Oct 12 101(41) 14859-64", "id": "15465914" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb4e" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Biochemistry 2005 Nov 22 44(46) 15257-68", "id": "16285729" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb41" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Modified assertion", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 1994 Feb 1 91(3) 873-7", "id": "7508123" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb35" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "the src-family PTK, lck, phosphorylates PI3K but also SHP-1, an SH2 domain-containing non-receptor tyrosine phosphatase SHP-1 was proposed to regulate lck-induced PI3K phosphorylation and activity", "citation": { "type": "PubMed", "name": "Mol Immunol 2002 Jun 38(15) 1087-99", "id": "12044776" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb2c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Fyn plays an essential role by positive regulation of Lck activity.", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2004 Oct 12 101(41) 14859-64", "id": "15465914" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb28" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "naive T-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": " Overexpression of c-Cbl, a ligand of the Lck SH3 domain, depleted Lck from lipid rafts in Jurkat cells", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Feb 15 277(7) 5683-91", "id": "11741956" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb19" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "# Ariadne: Cross-linking CD44 on T cells activates the tyrosine kinase, p56 Lck (Lck), which associates with CD44 ( 14 ). [Regulation]", "citation": { "type": "PubMed", "name": "J Biol Chem 2001 Aug 3 276(31) 28767-73", "id": "11369760" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb17" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Kinase-active Lck complexes with and activates Fyn", "citation": { "type": "PubMed", "name": "J Biol Chem 2008 Sep 26 283(39) 26409-22", "id": "18660530" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb10" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "Fibroblasts", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "We first observed that in the absence of Jak3, both Lck and Syk had the capacity to phosphorylate Stat3 and Stat5a.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2000 Jun 20(12) 4371-80", "id": "10825200" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb0c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Blood 2002 Feb 1 99(3) 957-65", "id": "11806999" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb0a" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Aug 22 278(34) 31972-9", "id": "12783885" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaff" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "J Biol Chem 1998 Jun 19 273(25) 15765-72", "id": "9624175" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaf1" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "In T cells, CD4/Lck-dependent tyrosine phosphorylation on Shc was markedly diminished when Y317 was mutated, suggesting a preference of Lck for the Y317 site.", "citation": { "type": "PubMed", "name": "Eur J Immunol 1998 Aug 28(8) 2265-75", "id": "9710204" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaef" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "the association of APS with Vav3 in turn enhanced the Lck-mediated phosphorylation of Vav3.", "citation": { "type": "PubMed", "name": "Oncogene 2002 Oct 31 21(50) 7720-9", "id": "12400014" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eae6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Luciferase reporter gene assay indicated that Lck induces NFkappaB-dependent urokinase type plasminogen activator (uPA) promoter activity in presence of H/R.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Dec 26 278(52) 52598-612", "id": "14534291" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eade" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "Lnk is tyrosine-phosphorylated by p56lck.", "citation": { "type": "PubMed", "name": "J Immunol 2000 May 15 164(10) 5199-206", "id": "10799879" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead6" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:LCK) actsIn kin(p(HGNC:LCK))", "summary_text": "PhosphoElm data from PMID 15212693", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2001 Jun 5 98(12) 6587-92", "id": "11381116" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eacf" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:FOXO3)", "relation": "decreases", "target": "bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "directed": false, "label": "p(HGNC:FOXO3) decreases bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "metadata": { "casual": false, "createdBy": "edwardsanders", "edgeId": "52d177f3bf21ca0758e0b198", "evidences": [ { "bel_statement": "p(HGNC:FOXO3) decreases bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "summary_text": "\"These data suggested that FOXO3 downregulates the accumulation of CD8 T cells in tissue specific fashion during an acute LCMV [lymphocytic choriomeningitis virus] infection.\" (p. 3)", "citation": { "type": "Other", "name": "Sullivan JA, Kim EH, Plisch EH, \"FOXO3 regulates CD8 T cell memory by T cell-intrinsic mechanisms,\" PLoS Pathog, 2012, 8:1002533.", "id": "22359505" }, "metadata": { "created_by": "edwardsanders", "id": "52d177f3bf21ca0758e0b130" }, "experiment_context": { "species_common_name": "mouse", "disease": "Viral infection", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IL15)", "relation": "directlyIncreases", "target": "cat(p(MGI:Il2rg))", "directed": false, "label": "p(HGNC:IL15) directlyIncreases cat(p(MGI:Il2rg))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051486", "evidences": [ { "bel_statement": "p(HGNC:IL15) directlyIncreases cat(p(MGI:Il2rg))", "summary_text": "IL-7 and IL-15 were identified as the cytokines responsible for CD8+ cytotoxic T cell lineage specification in vivo. Additionally, we found that small numbers of aberrant CD8+ T cells expressing Runx3d could arise without γc signaling, but these cells were developmentally arrested before expressing cytotoxic lineage genes. Thus, γc-transduced cytokine signals are required for cytotoxic lineage specification", "citation": { "type": "Other", "name": "", "id": "23109710" }, "metadata": { "created_by": "ilyayudkevichstudent", "id": "5305896f89e3620d90b7de09" }, "experiment_context": { "species_common_name": "mouse", "disease": "", "cell": "", "tissue": "cd8+ t cells" } }, { "bel_statement": "p(HGNC:IL15) directlyIncreases cat(p(MGI:Il2rg))", "summary_text": "IL-15 utilizes ... the common cytokine receptor γ-chain (CD132) for signal transduction in lymphocytes", "citation": { "type": "Other", "name": "", "id": "20335267" }, "metadata": { "created_by": "ilyayudkevichstudent", "id": "52fadfeb89e3620e1c996675" }, "experiment_context": { "species_common_name": "human", "disease": "", "cell": "", "tissue": "lung" } }, { "bel_statement": "p(HGNC:IL15) directlyIncreases cat(p(MGI:Il2rg))", "summary_text": "This Network edge has no supporting evidence. Please add real evidence to this edge prior to deleting.", "citation": { "type": "PubMed", "name": "", "id": "0" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb31" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:IDO2)", "relation": "negativeCorrelation", "target": "p(HGNC:IDO1)", "directed": false, "label": "p(HGNC:IDO2) negativeCorrelation p(HGNC:IDO1)", "metadata": { "casual": false, "createdBy": "csauco", "edgeId": "548b0b2d89e3620fe090276c", "evidences": [ { "bel_statement": "p(HGNC:IDO2) negativeCorrelation p(HGNC:IDO1)", "summary_text": "Heme-binding-mediated negative regulation of the tryptophan metabolic enzyme indoleamine 2,3-dioxygenase 1 (IDO1) by IDO2 .// hIDO2 plays a novel role as a negative regulator of hIDO1 by competing for heme-binding with hIDO1", "citation": { "type": "Other", "name": "Lee YK1, Lee HB1, Shin DM2, Kang MJ3, Yi EC3, Noh S4, Lee J4, Lee C4, Min CK5, Choi EY1", "id": "25394548" }, "metadata": { "created_by": "mirymg96", "id": "548b3a6989e3620fe091fff0" }, "experiment_context": { "species_common_name": "human", "disease": "cancer and immunological disorders", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:IDO2) negativeCorrelation p(HGNC:IDO1)", "summary_text": "These results demonstrate that hIDO2 plays a novel role as a negative regulator of hIDO1 by competing for heme-binding with hIDO1, and provide information useful for development of therapeutic strategies to control cancer and immunological disorders that target IDO molecules.", "citation": { "type": "Other", "name": "", "id": "25394548" }, "metadata": { "created_by": "csauco", "id": "548b0b2d89e3620fe09026ff" }, "experiment_context": { "species_common_name": "human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:CCR5)", "relation": "increases", "target": "path(SDIS:\"T-cell migration\")", "directed": false, "label": "p(HGNC:CCR5) increases path(SDIS:\"T-cell migration\")", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051495", "evidences": [ { "bel_statement": "p(HGNC:CCR5) increases path(SDIS:\"T-cell migration\")", "summary_text": "Most importantly, CCR5 deficiency resulted in decreased recruitment of memory T cells expressing key effector molecules and impaired control of virus replication during the initial stages of a secondary response. ", "citation": { "type": "PubMed", "name": "Immunity 2008 Jul 29(1) 101-13", "id": "18617426" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec4c" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "t-cell", "tissue": "" } } ] } }, { "source": "p(HGNC:CCL3)", "relation": "directlyIncreases", "target": "cat(p(HGNC:CCR5))", "directed": false, "label": "p(HGNC:CCL3) directlyIncreases cat(p(HGNC:CCR5))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051472", "evidences": [ { "bel_statement": "p(HGNC:CCL3) directlyIncreases cat(p(HGNC:CCR5))", "summary_text": "Numerous studies have shown that immature human and mouse blood- and bone marrow-derived DC subsets express a panel of inflammatory chemokine receptors (CCR1-6,8,9, CXCR3,4, CX3CR1) [Table 1 and reviewed in (1-5)]. [Table 1 Chemokine receptors expressed by DC and the functional outcome of receptor ligation}]", "citation": { "type": "PubMed", "name": "Clin Lab Med 2008 Sep 28(3) 375-84, v", "id": "19028258" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec01" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "dendritic cell", "tissue": "" } }, { "bel_statement": "p(HGNC:CCL3) directlyIncreases cat(p(HGNC:CCR5))", "summary_text": "CL3 (previously known as MIP-1α) is a ligand for the chemokine receptors CCR1 and CCR5, and we previously showed that expression of CCR5 by lung CD8+ T cells increases with spirometrically-defined COPD severity", "citation": { "type": "Other", "name": "", "id": "23374856" }, "metadata": { "created_by": "ilyayudkevichstudent", "id": "53057fa789e36207f81f6980" }, "experiment_context": { "species_common_name": "human", "disease": "copd", "cell": "", "tissue": "cd8+ t cells" } }, { "bel_statement": "p(HGNC:CCL3) directlyIncreases cat(p(HGNC:CCR5))", "summary_text": "The CCR5 ligands CCL3 and CCL4 are produced by Th cells and DCs after their antigen-specific interaction, thereby creating a chemokine micromilieu which recruits naïve CCR5-expressing CTLs.", "citation": { "type": "Other", "name": "", "id": "22566821" }, "metadata": { "created_by": "ilyayudkevichstudent", "id": "52f9947a89e3620ba81606ae" }, "experiment_context": { "species_common_name": "human", "disease": "", "cell": "cytotoxic t cells", "tissue": "" } } ] } }, { "source": "complex(SCOMP:\"T Cell Receptor Complex\")", "relation": "actsIn", "target": "cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "directed": false, "label": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "metadata": { "casual": false, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c3405146f", "evidences": [ { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Jurkat transfectants overexpressing Chat-H show a marked increase in interleukin-2 production after costimulation of T cell receptor and CD28. The degree of JNK activation is enhanced substantially in the Chat-H transfectants upon costimulation. WE found that Chat-H forms a complex with Pyk2H and enhances its tyrosine 402 phosphorylation, an up-regulator of the JNK pathway. The Src homology-2 domain mutant of Chat-H loses this signal modulating activity.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Feb 21 278(8) 6012-7", "id": "12486027" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eafc" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Indeed, our present findings that in vivo CD28 costimulation induced both TCR? down-regulation and CD69 up-regulation supports the importance of Lck binding to CD28 for costimulatory function, because both TCR? down-regulation and CD69 up-regulation require Lck activation (18, 19, 25), and we found that signaling of both functions required an intact Lck binding motif in the CD28 cytosolic tail. In CTLA-4-deficient mice, disease induction requires CD28 enhancement of TCR signaling by autoreactive TCR specificities with presumably high affinity for self-ligands, because disease induction is delayed by in vivo expression of transgenic TCRs with low affinity for self ligands (26, 27). Thus, we think that the importance of an intact Lck binding motif in the CD28 cytosolic tail for disease induction in CTLA-4-deficient mice reflects the fact that, by increasing the residency time of Lck in the immunological synapse, CD28 costimulation specifically increases the intensity and duration of in vivo TCR signaling by T cells with autoreactive TCR specificities. ", "citation": { "type": "PubMed", "name": "Proc Natl Acad Sci U S A 2007 Aug 21 104(34) 13756-61", "id": "17702861" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec6e" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "We show that Crry increases early TCR-dependent activation signals, including p56lck-, zeta-associated protein-70 (ZAP-70), Vav-1, Akt, and extracellular signal-regulated kinase (ERK) phosphorylation but also costimulation-dependent mitogen-activated protein kinases (MAPK), such as the stress-activated c-Jun N-terminal kinase (JNK). It is intriguing that Crry costimulus enhanced p38 MAPK activation in T helper cell type 1 (Th1) but not in Th2 cells. ", "citation": { "type": "PubMed", "name": "J Leukoc Biol 2005 Dec 78(6) 1386-96", "id": "16301324" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebdd" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "On stimulation of the cells through their T cell antigen receptor, the phosphotyrosine content of LMPTP-B declined rapidly. In co-transfected COS cells, Lck and Fyn caused phosphorylation of LMPTP, whereas Csk, Zap, and Jak2 did not. Most of the phosphate was located at Tyr-131, and some was also located at Tyr-132. Incubation of wild-type LMPTP with Lck and adenosine 5'-O-(thiotriphosphate) caused a 2-fold increase in the activity of LMPTP. Site-directed mutagenesis showed that Tyr-131 is important for the catalytic activity of LMPTP, and that thiophosphorylation of Tyr-131, and to a lesser degree Tyr-132, is responsible for the activation.", "citation": { "type": "PubMed", "name": "J Biol Chem 1997 Feb 28 272(9) 5371-4", "id": "9038134" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Conclusion. The molecular basis of {gamma}{delta} TCR recognition has remained an enigma, as has the immunobiology of {gamma}{delta} T cells. Many characteristics of {gamma}{delta} T cells suggest that they participate early in the immune response, similar to other members of the innate immune system. These features include direct recognition of antigen and immediate effector outcomes such as cytokine release (30) and cytotoxicity (31).", "citation": { "type": "PubMed", "name": "Science 2005 Apr 8 308(5719) 227-31", "id": "15821084" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb76" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "It is well established that T lymphocytes undergo homeostatic proliferation in lymphopenic environment. The homeostatic proliferation requires recognition of the major histocompatibility complex on the host. Recent studies have demonstrated that costimulation-mediated CD28, 4-1BB, and CD40 is not required for T cell homeostatic proliferation. It has been suggested that homeostatic proliferation is costimulation independent. Here, we report that T cells from mice with a targeted mutation of CD24 have a remarkably reduced rate of proliferation when adoptively transferred into syngeneic lymphopenic hosts.", "citation": { "type": "PubMed", "name": "J Exp Med 2004 Oct 18 200(8) 1083-9", "id": "15477346" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb53" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "lymphocyte", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "It is well established that T lymphocytes undergo homeostatic proliferation in lymphopenic environment. The homeostatic proliferation requires recognition of the major histocompatibility complex on the host. Recent studies have demonstrated that costimulation-mediated CD28, 4-1BB, and CD40 is not required for T cell homeostatic proliferation. It has been suggested that homeostatic proliferation is costimulation independent. Here, we report that T cells from mice with a targeted mutation of CD24 have a remarkably reduced rate of proliferation when adoptively transferred into syngeneic lymphopenic hosts.", "citation": { "type": "PubMed", "name": "J Exp Med 2004 Oct 18 200(8) 1083-9", "id": "15477346" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb48" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Overexpression of an SH2 domain-defective Shb causes diminished phosphorylation of SLP-76 and Vav and consequently decreased activation of c-Jun kinase upon T cell receptor (TCR) stimulation. -- [bcd] from FFT figure 5C: The precipitated proteins were resolved on SDS/PAGE and blotted for phosphotyrosine (4G10)", "citation": { "type": "PubMed", "name": "Eur J Biochem 2002 Jul 269(13) 3279-88", "id": "12084069" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec3b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "recognition of the MHC class II heterodimer-antigen complex by the T-cell receptor and the accessory protein CD4 of T lymphocytes leads to the generation of an immune response", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2001 Oct 21(19) 6495-506", "id": "11533238" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebd0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "We have previously demonstrated that a signal via TSA-1/Sca-2 inhibits T cell receptor (TCR)-mediated T cell activation and apoptosis.", "citation": { "type": "PubMed", "name": "J Biol Chem 1998 May 15 273(20) 12301-6", "id": "9575182" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebb2" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "In contrast, DCs incubated with aluminum/OVA activated CD4(+) T cells to secrete IL-4 and IL-5 as well as IFN-gamma.", "citation": { "type": "PubMed", "name": "Vaccine 2007 Jun 6 25(23) 4575-85", "id": "17485153" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ebad" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "T-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Pleckstrin-2 expressed in Jurkat T cells bound to the cellular membrane and enhanced actin-dependent spreading only after stimulation of the T-cell antigen receptor or the integrin alpha4beta1.", "citation": { "type": "PubMed", "name": "Blood 2007 Feb 1 109(3) 1147-55", "id": "17008542" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb8c" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "K9/vIRF-1 and K10.5/K10.6/vIRF-3. In addition to the latently expressed vIRFs discussed above, KSHV encodes two other homologs of these proteins (Fig. 1). vIRF-1, encoded by K9, transforms cells in culture, is tumorigenic in nude mice, and inhibits apoptosis induced by Sendai virus infection, IFN-{alpha}, IFN-ß, TNF-{alpha}, TCR/CD3 cross-linking, and p53", "citation": { "type": "PubMed", "name": "Microbiol Mol Biol Rev 2003 Jun 67(2) 175-212, table of contents", "id": "12794189" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb7f" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "HIP-55 (SH3P7 or mAbp1), an actin-binding adaptor protein, interacts with and is tyrosine phosphorylated by ZAP-70, which is a crucial proximal protein tyrosine kinase for TCR signaling. HIP-55 knockout T cells displayed defective T-cell proliferation, decreased cytokine production, and decreased up-regulation of the activation markers induced by TCR stimulation.", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2005 Aug 25(16) 6869-78", "id": "16055701" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb7e" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "An increase in the surface density of activating ligand (immobilized anti-TcR mAb) enhanced both secretion of IFN and secretion of granules.", "citation": { "type": "PubMed", "name": "Cell Immunol 1989 Nov 124(1) 64-76", "id": "2478302" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb5b" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "To better understand the contribution of the P-I region to PLC-gamma1 activation, we mapped the PLC-gamma1-binding site within the region, and created a SLP-76 mutant that fails to bind SH3(PLC), but is fully functional, mediating TCR-induced phosphorylation of PLC-gamma1 at tyrosine 783, calcium flux, and nuclear factor of activated T cells activation.", "citation": { "type": "PubMed", "name": "J Biol Chem 2005 Mar 4 280(9) 8364-70", "id": "15623534" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb47" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "The interaction of anti-TSA with FcgammaRIIB resulted in an inhibition of the ability of the FcgammaRIIB to cross-link and/or aggregate soluble anti-CD3 or soluble anti-Cbeta T-cell receptor (TCR), leading to an inhibition of induction of expression of CD25 and CD69, interleukin (IL)-2 production and proliferation of naive T cells.", "citation": { "type": "PubMed", "name": "Immunology 2001 Sep 104(1) 28-36", "id": "11576217" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb15" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "This redistribution brings VHR into the vicinity of the triggered TCRs, where VHR is phosphorylated at Tyr138 by ZAP-70. We found that this phosphorylation is required for the function of VHR as an inhibitor of the Erk2 and Jnk MAPKs", "citation": { "type": "PubMed", "name": "Nat Immunol 2003 Jan 4(1) 44-8", "id": "12447358" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ead0" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "t-cell", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "HIP-55 interacted with ZAP-70, a critical protein-tyrosine kinase in TCR signaling, and this interaction was induced by TCR signaling.", "citation": { "type": "PubMed", "name": "J Biol Chem 2003 Dec 26 278(52) 52195-202", "id": "14557276" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb99" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "An increase in the surface density of activating ligand (immobilized anti-TcR mAb) enhanced both secretion of IFN and secretion of granules.", "citation": { "type": "PubMed", "name": "Cell Immunol 1989 Nov 124(1) 64-76", "id": "2478302" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb95" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Hef1 is tyrosine-phosphorylated following beta-1-integrin and/or T cell receptor stimulation and is thus considered to be important for immunological reactions", "citation": { "type": "PubMed", "name": "J Biol Chem 2002 Apr 26 277(17) 14933-41", "id": "11827972" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb8e" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "T cell receptor signaling increased expression of the protein arginine methyltransferase PRMT1, which in turn methylated the nuclear factor of activated T cells (NFAT) cofactor protein, NIP45.", "citation": { "type": "PubMed", "name": "Mol Cell 2004 Aug 27 15(4) 559-71", "id": "15327772" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eb32" }, "experiment_context": { "species_common_name": "Mouse", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "We have previously demonstrated that a signal via TSA-1/Sca-2 inhibits T cell receptor (TCR)-mediated T cell activation and apoptosis.", "citation": { "type": "PubMed", "name": "J Biol Chem 1998 May 15 273(20) 12301-6", "id": "9575182" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eafb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "Using RNA interference and overexpression experiments, the HIP-55-HPK1 complex was found to negatively regulate nuclear factor of activated T cell (NFAT) activation by the T cell antigen receptor.", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Apr 9 279(15) 15550-60", "id": "14729663" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eaea" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "complex(SCOMP:\"T Cell Receptor Complex\") actsIn cat(complex(SCOMP:\"T Cell Receptor Complex\"))", "summary_text": "In fact, GILZ overexpression inhibits TCR-activated NF-kappaB nuclear translocation, interleukin-2 production, FasL upregulation, and the consequent activation-induced apoptosis", "citation": { "type": "PubMed", "name": "Mol Cell Biol 2002 Nov 22(22) 7929-41", "id": "12391160" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405eadb" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "a(CHEBI:acrolein)", "relation": "increases", "target": "bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "directed": false, "label": "a(CHEBI:acrolein) increases bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "metadata": { "casual": false, "createdBy": "edwardsanders", "edgeId": "52e8181cbf21ca0b1807399f", "evidences": [ { "bel_statement": "a(CHEBI:acrolein) increases bp(GOBP:\"CD8-positive, alpha-beta T cell proliferation\")", "summary_text": "\"Acrolein exposure induces a time-dependent increase in the number of CD8+ cells in the lungs of wild-type mice.\"", "citation": { "type": "Other", "name": "Borchers MT, Wesselkamper SC, Harris NL, \"CD8+ T cells contribute to macrophage accumulation and airspace enlargement following repeated irritant exposures,\" Exp Mol Pathol, 2007, 83:301-10.", "id": "17950725" }, "metadata": { "created_by": "edwardsanders", "id": "52e8181cbf21ca0b1807392e" }, "experiment_context": { "species_common_name": "mouse", "disease": "", "cell": "", "tissue": "Lung" } } ] } }, { "source": "p(HGNC:IL2RG)", "relation": "increases", "target": "bp(GOBP:\"T cell activation\")", "directed": false, "label": "p(HGNC:IL2RG) increases bp(GOBP:\"T cell activation\")", "metadata": { "casual": false, "createdBy": "mberra", "edgeId": "5477834189e36203806fca55", "evidences": [ { "bel_statement": "p(HGNC:IL2RG) increases bp(GOBP:\"T cell activation\")", "summary_text": "Thus, sγc expression is a naturally occurring immunomodulator that regulates γc cytokine signaling and controls T cell activation and differentiation.", "citation": { "type": "Other", "name": "Immunity. 2014 Jun 19;40(6):910-23. doi: 10.1016/j.immuni.2014.04.020. Epub 2014 Jun 5.", "id": "24909888" }, "metadata": { "created_by": "mberra", "id": "5477834189e36203806fc9e5" }, "experiment_context": { "species_common_name": "mouse", "disease": "", "cell": "", "tissue": "" } } ] } }, { "source": "p(HGNC:CXCL16)", "relation": "directlyIncreases", "target": "cat(p(HGNC:CXCR6))", "directed": false, "label": "p(HGNC:CXCL16) directlyIncreases cat(p(HGNC:CXCR6))", "metadata": { "casual": true, "createdBy": "selventa", "edgeId": "524b3517d3fbfd4c34051478", "evidences": [ { "bel_statement": "p(HGNC:CXCL16) directlyIncreases cat(p(HGNC:CXCR6))", "summary_text": "Interleukin-22 (IL-22) plays a critical role in mucosal defense, although the molecular mechanisms that ensure IL-22 tissue distribution remain poorly understood. We show that the CXCL16-CXCR6 chemokine-chemokine receptor axis regulated group 3 innate lymphoid cell (ILC3) diversity and function. CXCL16 was constitutively expressed by CX3CR1(+) intestinal dendritic cells (DCs) and coexpressed with IL-23 after Citrobacter rodentium infection. Intestinal ILC3s expressed CXCR6 and its ablation generated a selective loss of the NKp46(+) ILC3 subset, a depletion of intestinal IL-22, and the inability to control C. rodentium infection. CD4(+) ILC3s were unaffected by CXCR6 deficiency and remained clustered within lymphoid follicles. In contrast, the lamina propria of Cxcr6(-/-) mice was devoid of ILC3s. The loss of ILC3-dependent IL-22 epithelial stimulation reduced antimicrobial peptide expression that explained the sensitivity of Cxcr6(-/-) mice to C. rodentium. Our results delineate a critical CXCL16-CXCR6 crosstalk that coordinates the intestinal topography of IL-22 secretion required for mucosal defense.", "citation": { "type": "Other", "name": "Immunity. 2014 Nov 20;41(5):776-88. doi: 10.1016/j.immuni.2014.10.007. Epub 2014 Nov 6", "id": "25456160" }, "metadata": { "created_by": "alejandroferreiromorales.cr", "id": "548c384c89e3620fe0a1ec79" }, "experiment_context": { "species_common_name": "human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCL16) directlyIncreases cat(p(HGNC:CXCR6))", "summary_text": "CXCR6, the receptor for CXCL16", "citation": { "type": "PubMed", "name": "J Biol Chem 2004 Jan 30 279(5) 3188-96", "id": "14625285" }, "metadata": { "created_by": "selventa", "id": "524b351fd3fbfd4c3405ec26" }, "experiment_context": { "species_common_name": "Human", "disease": "", "cell": "", "tissue": "" } }, { "bel_statement": "p(HGNC:CXCL16) directlyIncreases cat(p(HGNC:CXCR6))", "summary_text": "We have found that expression of CXCR6 is greatly increased on BAL T cells compared with blood T cells and that there are very high levels of CXCL16 in both the normal and inflamed lung which is predominantly located in AM.", "citation": { "type": "Other", "name": "", "id": "16393323" }, "metadata": { "created_by": "ilyayudkevichstudent", "id": "52fadeec89e3620e1c9964c6" }, "experiment_context": { "species_common_name": "human", "disease": "", "cell": "", "tissue": "lung" } } ] } }, { "source": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "relation": "increases", "target": "p(HGNC:IFNG)", "directed": false, "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:IFNG)", "metadata": { "casual": false, "createdBy": "ganna.androsova", "edgeId": "5538c4a689e362097c008f8b", "evidences": [ { "bel_statement": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:IFNG)", "summary_text": "CD8/CD28(null) cells were increased in both current- and ex-smoker COPD groups; these cells expressed significantly more interferon (IFN)-γ, OX40, 4-1BB, CTLA4, granzyme and perforin when stimulated than CD8/CD28(+) T cells.", "citation": { "type": "Other", "name": "", "id": "21910726" }, "metadata": { "created_by": "ganna.androsova", "id": "5538c4a589e362097c008f0b" }, "experiment_context": { "species_common_name": "human", "disease": "COPD", "cell": "CD8/CD28(null) T cells", "tissue": "lung" } } ] } }, { "source": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "relation": "increases", "target": "p(HGNC:TNFRSF4)", "directed": false, "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:TNFRSF4)", "metadata": { "casual": false, "createdBy": "ganna.androsova", "edgeId": "5538c59389e362097c010f2a", "evidences": [ { "bel_statement": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:TNFRSF4)", "summary_text": "CD8/CD28(null) cells were increased in both current- and ex-smoker COPD groups; these cells expressed significantly more interferon (IFN)-γ, OX40, 4-1BB, CTLA4, granzyme and perforin when stimulated than CD8/CD28(+) T cells.", "citation": { "type": "Other", "name": "", "id": "21910726" }, "metadata": { "created_by": "ganna.androsova", "id": "5538c59389e362097c010ea8" }, "experiment_context": { "species_common_name": "human", "disease": "COPD", "cell": "CD8/CD28(null) cells", "tissue": "lung" } } ] } }, { "source": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "relation": "increases", "target": "p(HGNC:CTLA4)", "directed": false, "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:CTLA4)", "metadata": { "casual": false, "createdBy": "ganna.androsova", "edgeId": "5538dd3389e362097c020e86", "evidences": [ { "bel_statement": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:CTLA4)", "summary_text": "CD8/CD28(null) cells were increased in both current- and ex-smoker COPD groups; these cells expressed significantly more interferon (IFN)-γ, OX40, 4-1BB, CTLA4, granzyme and perforin when stimulated than CD8/CD28(+) T cells.", "citation": { "type": "Other", "name": "", "id": "21910726" }, "metadata": { "created_by": "ganna.androsova", "id": "5538dd3289e362097c020e02" }, "experiment_context": { "species_common_name": "human", "disease": "COPD", "cell": "CD8/CD28(null) cells", "tissue": "lung" } } ] } }, { "source": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "relation": "increases", "target": "p(HGNC:GZMA)", "directed": false, "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:GZMA)", "metadata": { "casual": false, "createdBy": "ganna.androsova", "edgeId": "5538ddd889e362097c02512f", "evidences": [ { "bel_statement": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:GZMA)", "summary_text": "CD8/CD28(null) cells were increased in both current- and ex-smoker COPD groups; these cells expressed significantly more interferon (IFN)-γ, OX40, 4-1BB, CTLA4, granzyme and perforin when stimulated than CD8/CD28(+) T cells.", "citation": { "type": "Other", "name": "", "id": "21910726" }, "metadata": { "created_by": "ganna.androsova", "id": "5538ddd889e362097c0250a9" }, "experiment_context": { "species_common_name": "human", "disease": "COPD", "cell": "CD8/CD28(null) cells", "tissue": "lung" } } ] } }, { "source": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "relation": "increases", "target": "p(HGNC:TNF)", "directed": false, "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:TNF)", "metadata": { "casual": false, "createdBy": "ganna.androsova", "edgeId": "5538e40e89e362097c02f11e", "evidences": [ { "bel_statement": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:TNF)", "summary_text": "There was an increase in intracellular CD8(+) T cell Th1 proinflammatory cytokines in some COPD groups in the peripheral blood and in CD8(+) T cell tumour necrosis factor (TNF)-alpha in some COPD groups and smoker controls in BAL and BB.", "citation": { "type": "Other", "name": "", "id": "17614970" }, "metadata": { "created_by": "ganna.androsova", "id": "5538e40e89e362097c02f092" }, "experiment_context": { "species_common_name": "human", "disease": "COPD", "cell": "CD8(+) T cell", "tissue": "lung" } } ] } }, { "source": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\")", "relation": "increases", "target": "p(HGNC:PRF1)", "directed": false, "label": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:PRF1)", "metadata": { "casual": false, "createdBy": "ganna.androsova", "edgeId": "5538decb89e362097c025643", "evidences": [ { "bel_statement": "path(MESHD:\"Pulmonary Disease, Chronic Obstructive\") increases p(HGNC:PRF1)", "summary_text": "CD8/CD28(null) cells were increased in both current- and ex-smoker COPD groups; these cells expressed significantly more interferon (IFN)-γ, OX40, 4-1BB, CTLA4, granzyme and perforin when stimulated than CD8/CD28(+) T cells.", "citation": { "type": "Other", "name": "", "id": "21910726" }, "metadata": { "created_by": "ganna.androsova", "id": "5538decb89e362097c0255bb" }, "experiment_context": { "species_common_name": "human", "disease": "COPD", "cell": "CD8/CD28(null) cells", "tissue": "lung" } } ] } } ] } } pybel-0.12.1/src/pybel/testing/resources/bel/isolated.bel000066400000000000000000000025001334645200200233250ustar00rootroot00000000000000################################################################################## # Document Properties Section SET DOCUMENT Name = "PyBEL Test Isolated Nodes" SET DOCUMENT Description = "Tests the effect of using isolated nodes in IO" SET DOCUMENT Version = "0.1.0" SET DOCUMENT Copyright = "Copyright (c) Charles Tapley Hoyt. All Rights Reserved." SET DOCUMENT Authors = "Charles Tapley Hoyt" SET DOCUMENT Licenses = "WTF License" SET DOCUMENT ContactInfo = "charles.hoyt@scai.fraunhofer.de" ################################################################################## # Definitions Section DEFINE NAMESPACE HGNC AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=hgnc-human-genes.belns" DEFINE NAMESPACE MESHD AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=mesh-diseases.belns" ################################################################################## # Statements Section ################################################################################## SET Citation = {"PubMed","That one article from last week","123455"} SET Evidence = "These are mostly made up" #: Test that there's an isolated node that makes it path(MESHD:Achlorhydria) #: Test an isolated node that gets some extra stuff induced complex(p(HGNC:ADGRB1), p(HGNC:ADGRB2)) pybel-0.12.1/src/pybel/testing/resources/bel/misordered.bel000066400000000000000000000025641334645200200236700ustar00rootroot00000000000000################################################################################## # Document Properties Section SET DOCUMENT Name = "PyBEL Test Citation Clearing" SET DOCUMENT Description = "Made for testing PyBEL parsing without citation clearance" SET DOCUMENT Version = "1.0.0" SET DOCUMENT Copyright = "Copyright (c) Charles Tapley Hoyt. All Rights Reserved." SET DOCUMENT Authors = "Charles Tapley Hoyt" SET DOCUMENT Licenses = "WTF License" SET DOCUMENT ContactInfo = "charles.hoyt@scai.fraunhofer.de" ################################################################################## # Definitions Section DEFINE NAMESPACE HGNC AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=hgnc-human-genes.belns" DEFINE ANNOTATION TESTAN1 AS LIST {"1","2","3"} ################################################################################## # Statements Section SET STATEMENT_GROUP = "Group 1" SET TESTAN1 = "1" SET Citation = {"PubMed","That one article from last week","123455"} SET Evidence = "Evidence 1" p(HGNC:AKT1) -> p(HGNC:EGFR) UNSET ALL SET Evidence = "Evidence 1" SET TESTAN1 = "1" SET Citation = {"PubMed","That one article from last week","123455"} p(HGNC:EGFR) -| p(HGNC:FADD) UNSET ALL SET TESTAN1 = "1" SET Evidence = "Evidence 1" SET Citation = {"PubMed","That one article from last week","123455"} p(HGNC:EGFR) =| p(HGNC:CASP8) pybel-0.12.1/src/pybel/testing/resources/bel/slushy.bel000066400000000000000000000072011334645200200230530ustar00rootroot00000000000000SET DOCUMENT Name = "Worst. BEL Document. Ever." SET DOCUMENT Description = "This document outlines all of the evil and awful work that is possible during BEL curation" SET DOCUMENT Version = "0.0" SET DOCUMENT Authors = "Charles Tapley Hoyt" SET DOCUMENT Licenses = "WTF License" # SET DOCUMENT ContactInfo = "charles.hoyt@scai.fraunhofer.de" # Missing Contact Info is required # SET DOCUMENT InvalidMetadata = "very invalid, indeed" DEFINE NAMESPACE CHEBI AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=chebi.belns" DEFINE NAMESPACE HGNC AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=hgnc-human-genes.belns" DEFINE NAMESPACE MESHD AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=mesh-diseases.belns" DEFINE NAMESPACE GO AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=go-biological-process.belns" DEFINE NAMESPACE dbSNP AS PATTERN "rs[0-9]+$" DEFINE ANNOTATION CellLine AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=cell-line.belanno" DEFINE ANNOTATION TextLocation AS LIST {"Abstract","Results","Legend","Review"} DEFINE ANNOTATION Disease AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=mesh-diseases.belanno" #: Name doesn't match annotation file Keyword (LexicographyWarning, not implemented yet) DEFINE ANNOTATION Specieses AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=species-taxonomy-id.belanno" DEFINE ANNOTATION PowerLevel AS PATTERN "[0-9]+$" # MissingAnnotationKeyWarning UNSET STATEMENT_GROUP # MissingAnnotationKeyWarning UNSET Citation SET STATEMENT_GROUP = "Group 1" # InvalidCitationException SET Citation = {"PubMed"} # InvalidCitationType SET Citation = {"Pubmed", "Incomplete", "1234"} # InvalidPubMedIdentifierWarning SET Citation = {"PubMed", "Fake Name", "Fake Reference"} # MissingCitationException p(HGNC:AKT1) -- p(HGNC:AKT2) SET Citation = {"PubMed","Trends in molecular medicine","12928037"} # MissingAnnotationKeyWarning UNSET Evidence # MissingAnnotationKeyWarning UNSET PowerLevel # MissingSupportWarning p(HGNC:AKT1) -- p(HGNC:AKT2) SET Evidence = "This is definitely real evidence" # Naked name (NakedNameWarning) biologicalProcess("response to oxidative stress") increases biologicalProcess(GO:necrosis) # UndefinedNamespaceWarning p(UNDEFINED:"YFG") -- p(HGNC:AKT1) # Missing name (MissingNamespaceNameWarning) biologicalProcess(GO:"maybe response to oxidative stress") increases biologicalProcess(GO:necrosis) # UndefinedAnnotationWarning SET UNDEFINED_ANNOTATION = "Nope." # MissingAnnotationKeyWarning UNSET TextLocation # IllegalAnnotationValueWarning SET TextLocation = "Nope" # MissingAnnotationRegexWarning SET PowerLevel = "Nine Thousand" # MissingNamespaceRegexWarning g(dbSNP:"rs123123-A") eq g(HGNC:TP53) # MalformedTranslocationWarning tloc(p(HGNC:AKT1)) -- p(HGNC:AKT2) # PlaceholderAminoAcidWarning p(HGNC:AKT1, sub(G, 1, X)) -- p(HGNC:AKT2) # NestedRelationWarning p(HGNC:AKT1) -> (p(HGNC:AKT2) -> biologicalProcess(GO:"response to oxidative stress")) # InvalidFunctionSemantic bp(HGNC:AKT1) -> p(HGNC:AKT2) # Forgot quotes (ParseException) # SET Disease = Atherosclerosis # Mixed up arguments (ParseException) p(HGNC:TP53,sub(Q,R,248)) directlyDecreases transcriptionalActivity(proteinAbundance(HGNC:TP53)) UNSET STATEMENT_GROUP ######## The following statements have no errors ############# SET STATEMENT_GROUP = "Group 2" SET Citation = {"PubMed","That one article from last week","123455"} SET Evidence = "Evidence 1" p(HGNC:AKT1) -> p(HGNC:EGFR) pybel-0.12.1/src/pybel/testing/resources/bel/test_bel.bel000066400000000000000000000034021334645200200233240ustar00rootroot00000000000000################################################################################## # Document Properties Section SET DOCUMENT Name = "PyBEL Test Simple" SET DOCUMENT Description = "Made for testing PyBEL parsing" SET DOCUMENT Version = "1.6.0" SET DOCUMENT Copyright = "Copyright (c) Charles Tapley Hoyt. All Rights Reserved." SET DOCUMENT Authors = "Charles Tapley Hoyt" SET DOCUMENT Licenses = "WTF License" SET DOCUMENT ContactInfo = "charles.hoyt@scai.fraunhofer.de" SET DOCUMENT Project = "PyBEL Testing" ################################################################################## # Definitions Section DEFINE NAMESPACE CHEBI AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=chebi.belns" DEFINE NAMESPACE HGNC AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=hgnc-human-genes.belns" DEFINE ANNOTATION Species AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=species-taxonomy-id.belanno" DEFINE ANNOTATION CellLine AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=cell-line.belanno" ################################################################################## # Statements Section SET STATEMENT_GROUP = "Group 1" SET Citation = {"PubMed","That one article from last week","123455","2012-01-31","Example Author|Example Author2"} SET Species = "9606" SET Evidence = "Evidence 1 \ w extra notes" p(HGNC:AKT1) -> p(HGNC:EGFR) SET Evidence = "Evidence 2" SET CellLine = "10B9 cell" p(HGNC:EGFR) -| p(HGNC:FADD) p(HGNC:EGFR) =| p(HGNC:CASP8) SET Citation = {"PubMed","That other article from last week","123456"} SET Species = "10116" SET Evidence = "Evidence 3" p(HGNC:FADD) -> p(HGNC:CASP8) p(HGNC:AKT1) -- p(HGNC:CASP8) pybel-0.12.1/src/pybel/testing/resources/bel/thorough.bel000066400000000000000000000143301334645200200233640ustar00rootroot00000000000000################################################################################## # Document Properties Section SET DOCUMENT Name = "PyBEL Test Thorough" SET DOCUMENT Description = "Statements made up to contain many conceivable variants of nodes from BEL" SET DOCUMENT Version = "1.0.0" SET DOCUMENT Copyright = "Copyright (c) Charles Tapley Hoyt. All Rights Reserved." SET DOCUMENT Authors = "Charles Tapley Hoyt" SET DOCUMENT Licenses = "WTF License" SET DOCUMENT ContactInfo = "charles.hoyt@scai.fraunhofer.de" ################################################################################## # Definitions Section DEFINE NAMESPACE CHEBI AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=chebi.belns" DEFINE NAMESPACE HGNC AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=hgnc-human-genes.belns" DEFINE NAMESPACE GOBP AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=go-biological-process.belns" DEFINE NAMESPACE GOCC AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=go-cellular-component.belns" DEFINE NAMESPACE MESHD AS URL "https://owncloud.scai.fraunhofer.de/index.php/s/JsfpQvkdx3Y5EMx/download?path=mesh-diseases.belns" DEFINE NAMESPACE dbSNP AS PATTERN "rs[0-9]*" DEFINE NAMESPACE TESTNS2 AS URL "https://raw.githubusercontent.com/pybel/pybel/develop/tests/belns/test_ns_2.belns" DEFINE ANNOTATION TESTAN1 AS LIST {"1","2","3"} DEFINE ANNOTATION TESTAN2 AS LIST {"1","2","3"} DEFINE ANNOTATION TestRegex AS PATTERN "[0-9]+" ################################################################################## # Statements Section ################################################################################## SET Citation = {"PubMed","That one article from last week","123455"} SET Evidence = "These are mostly made up" SET TESTAN1 = {"1", "2"} SET TestRegex = "9000" a(CHEBI:"oxygen atom") -> geneAbundance(HGNC:AKT1,gmod(M)) UNSET {TESTAN1, TestRegex} g(HGNC:AKT1, loc(GOCC:intracellular)) -| abundance(CHEBI:"oxygen atom", loc(GOCC:intracellular)) g(HGNC:AKT1, var(p.Phe508del)) =| p(HGNC:AKT1) g(HGNC:AKT1,sub(G,308,A)) cnc g(fus(HGNC:TMPRSS2, c.1_79, HGNC:ERG, c.312_5034)) g(HGNC:AKT1,sub(G,308,A),loc(GOCC:intracellular)) -> g(HGNC:AKT1, var(p.Phe508del), sub(G,308,A), var(c.1521_1523delCTT)) m(HGNC:MIR21) => g(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626)) g(HGNC:CFTR, var(c.1521_1523delCTT)) -> deg(p(HGNC:AKT1)) g(HGNC:CFTR, var(g.117199646_117199648delCTT)) -> g(HGNC:CFTR, var(c.1521_1523delCTT)) microRNAAbundance(HGNC:MIR21) -| p(HGNC:AKT1, pmod(TESTNS2:PhosRes, Ser, 473)) m(HGNC:MIR21,loc(GOCC:intracellular)) -| p(HGNC:AKT1, pmod(Ph, Ser, 473)) m(HGNC:MIR21,var(p.Phe508del)) -| p(HGNC:AKT1, pmod(Ph, S, 473)) m(HGNC:MIR21,var(p.Phe508del),loc(GOCC:intracellular)) -> p(HGNC:AKT1, var(p.C40*)) p(HGNC:AKT1, loc(GOCC:intracellular)) =| p(HGNC:AKT1,sub(A,127,Y),pmod(Ph, Ser),loc(GOCC:intracellular)) g(HGNC:CHCHD4, fusion(HGNC:AIFM1)) -> p(fus(HGNC:TMPRSS2, p.1_79, HGNC:ERG, p.312_5034)) p(HGNC:AKT1, var(p.Arg1851*)) -> p(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626)) p(HGNC:AKT1, trunc(40)) -> p(HGNC:CHCHD4, fusion(HGNC:AIFM1)) p(HGNC:CFTR, var(=)) -> surf(p(HGNC:EGFR)) -> p(HGNC:MIA, frag(?_*)) p(HGNC:CFTR, var(?)) -> pathology(MESHD:Adenocarcinoma) p(HGNC:MIA, frag(5_20)) -> sec(complex(GOCC:"interleukin-23 complex")) p(HGNC:MIA, frag(1_?)) -> tloc(p(HGNC:EGFR), GOCC:"cell surface", GOCC:endosome) deg(p(HGNC:AKT1)) -> p(HGNC:MIA, frag(?)) p(HGNC:CFTR, var(p.Phe508del)) -- p(HGNC:MIA, frag(?, "55kD")) p(HGNC:AKT1) -> p(HGNC:CFTR, var(p.Gly576Ala)) r(HGNC:AKT1) -> tloc(p(HGNC:EGFR), fromLoc(GOCC:"cell surface"), toLoc(GOCC:endosome)) r(HGNC:AKT1, var(p.Phe508del), var(c.1521_1523delCTT)) => r(fus(HGNC:TMPRSS2, r.1_79, HGNC:ERG, r.312_5034)) r(fus(HGNC:TMPRSS2, ?, HGNC:ERG, ?)) -> complexAbundance(proteinAbundance(HGNC:HBP1),geneAbundance(HGNC:NCF1)) r(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626)) -- p(HGNC:EGFR) r(HGNC:CHCHD4, fusion(HGNC:AIFM1)) -> complex(p(HGNC:FOS), p(HGNC:JUN)) act(p(HGNC:AKT1), ma(kin)) -> r(HGNC:CFTR, var(r.1521_1523delcuu)) act(p(HGNC:AKT1)) -> r(HGNC:CFTR, var(r.1653_1655delcuu)) complex(TESTNS2:"AP-1 Complex") -> p(HGNC:HRAS, pmod(Palm)) composite(p(HGNC:IL6), complex(GOCC:"interleukin-23 complex")) -| bp(GOBP:"cell cycle arrest") act(p(HGNC:AKT1), ma(catalyticActivity)) -> deg(p(HGNC:EGFR)) kin(p(HGNC:AKT1)) -> sec(p(HGNC:EGFR)) ################################################################################################################ SET Citation = {"PubMed","That one article from last week #2","123456"} SET Evidence = "These were all explicitly stated in the BEL 2.0 Specification" composite(p(HGNC:CASP8),p(HGNC:FADD),a(TESTNS2:"Abeta_42")) -> bp(GOBP:"neuron apoptotic process") pep(p(TESTNS2:"CAPN Family", location(GOCC:intracellular))) -| reaction(reactants(p(HGNC:CDK5R1)),products(p(HGNC:CDK5))) proteinAbundance(HGNC:CAT, location(GOCC:intracellular)) directlyDecreases abundance(CHEBI:"hydrogen peroxide") g(HGNC:CAT, location(GOCC:intracellular)) directlyDecreases abundance(CHEBI:"hydrogen peroxide") act(p(HGNC:HMGCR), ma(cat)) rateLimitingStepOf bp(GOBP:"cholesterol biosynthetic process") g(HGNC:APP,sub(G,275341,C)) cnc path(MESHD:"Alzheimer Disease") pep(complex(p(HGNC:F3),p(HGNC:F7))) regulates pep(p(HGNC:F9)) p(HGNC:CAT) -| (a(CHEBI:"hydrogen peroxide") -> bp(GOBP:"apoptotic process")) p(HGNC:CAT) -| (a(CHEBI:"hydrogen peroxide") -> bp(GOBP:"apoptotic process")) kin(p(TESTNS2:"GSK3 Family")) neg p(HGNC:MAPT,pmod(P)) p(HGNC:GSK3B, pmod(P, S, 9)) pos act(p(HGNC:GSK3B), ma(kin)) g(HGNC:AKT1) orthologous g(TESTNS2:"AKT1 ortholog") g(HGNC:AKT1) :> r(HGNC:AKT1) r(HGNC:AKT1) >> p(HGNC:AKT1) p(TESTNS2:PRKC) hasMembers list(p(HGNC:PRKCA), p(HGNC:PRKCB), p(HGNC:PRKCD), p(HGNC:PRKCE)) pathology(MESHD:Psoriasis) isA pathology(MESHD:"Skin Diseases") rxn(reactants(a(CHEBI:"(3S)-3-hydroxy-3-methylglutaryl-CoA"),a(CHEBI:NADPH), a(CHEBI:hydron)),products(a(CHEBI:mevalonate), a(CHEBI:"NADP(+)"))) subProcessOf bp(GOBP:"cholesterol biosynthetic process") a(CHEBI:"nitric oxide") increases surf(complex(p(HGNC:ITGAV),p(HGNC:ITGB3))) # Test that the equivalentTo relation works g(HGNC:ARRDC2) eq g(HGNC:ARRDC3) g(HGNC:CFTR, var(c.1521_1523delCTT)) -- g(dbSNP:rs123456) pybel-0.12.1/src/pybel/testing/resources/belanno/000077500000000000000000000000001334645200200217145ustar00rootroot00000000000000pybel-0.12.1/src/pybel/testing/resources/belanno/cell-line.belanno000066400000000000000000000025731334645200200251270ustar00rootroot00000000000000[AnnotationDefinition] Keyword=CellLine TypeString=list DescriptionString=Cell Line Ontology (CLO) and Experimental Factor Ontology (EFO) Cell Lines UsageString=Use this annotation to indicate the cell line context of a statement. Use to annotate a statement that was demonstrated in a given cell line. VersionString=20150611 CreatedDateTime=2015-06-11T19:57:45 [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=Cell Line Ontology (CLO) DescriptionString=The Cell Line Ontology (CLO) is a community-driven ontology that is developed to standardize and integrate cell line information and support computer-assisted reasoning. PublishedVersionString=2.1.63 PublishedDate=2015-04-25 ReferenceURL=http://www.clo-ontology.org/ NameString=Experimental Factor Ontology (EFO) DescriptionString=The Experimental Factor Ontology (EFO) provides a systematic description of many experimental variables available in EBI databases. PublishedVersionString=2.60 PublishedDate=2015-05-15 ReferenceURL=http://www.ebi.ac.uk/efo/ [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] 10B9 cell|CLO_0001031 mouse x rat hybridoma cell line cell|CLO_0000498 olfactory neurosphere cell line|EFO_0005705 1321N1 cell|CLO_0001072 pybel-0.12.1/src/pybel/testing/resources/belanno/confidence-1.0.0.belanno000066400000000000000000000012701334645200200260030ustar00rootroot00000000000000[AnnotationDefinition] Keyword=Confidence TypeString=list DescriptionString=Confidence annotations for statements UsageString=A curator can use this annotation to indicate the confidence in which they have for the correctness of their statements VersionString=20170430 CreatedDateTime=2017-01-22T12:00:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Charles Tapley Hoyt (c) 2017. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Confidence [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] Wrong| Very Low| Low| Medium| High| Very High| Axiomatic| pybel-0.12.1/src/pybel/testing/resources/belanno/mesh-diseases.belanno000066400000000000000000000017271334645200200260150ustar00rootroot00000000000000[AnnotationDefinition] Keyword=MeSHDisease TypeString=list DescriptionString=Disease terms from the [C] branch of Medical Subject Headings (MeSH). UsageString=Use to annotate a statement demonstrated in the context of a specific disease. VersionString=20150611 CreatedDateTime=2015-06-11T19:57:45 [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=MeSH DescriptionString=MeSH (Medical Subject Headings) is a controlled vocabulary thesaurus created, maintained, and provided by the U.S. National Library of Medicine. PublishedVersionString=2015 ReferenceURL=http://www.nlm.nih.gov/mesh/meshhome.html [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] 22q11 Deletion Syndrome|D058165 46, XX Disorders of Sex Development|D058489 46, XX Testicular Disorders of Sex Development|D058531 pybel-0.12.1/src/pybel/testing/resources/belanno/species-taxonomy-id.belanno000066400000000000000000000015241334645200200271570ustar00rootroot00000000000000[AnnotationDefinition] Keyword=Species TypeString=list DescriptionString=The annotation values for species provided by NCBI Taxonomy Identifiers. UsageString=Use this annotation to indicate the species context for a statement. VersionString=20120202 CreatedDateTime=2012-02-02T12:00:00 [Author] NameString=OpenBEL CopyrightString=OpenBEL (c) 2013, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=belframework@selventa.com [Citation] NameString=NCBI Taxonomy Ids DescriptionString=NCBI maintains species taxonomies PublishedVersionString=20120202 PublishedDate=2011-02-02 ReferenceURL=http://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/ [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] 9606|Homo sapiens 10090|Mus musculus 10116|Rattus norvegicus pybel-0.12.1/src/pybel/testing/resources/belanno/test_an_1.belanno000066400000000000000000000013031334645200200251260ustar00rootroot00000000000000[AnnotationDefinition] Keyword=TESTAN1 TypeString=list NameString=Test Annotations 1 for PyBEL DomainString=TestAn1 SpeciesString=all DescriptionString=Test Annotations 1 for PyBEL to make a subset of useful BEL VersionString=1.0.0 CreatedDateTime=2016-09-17T20:50:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Copyright (c) Charles Tapley Hoyt. All Rights Reserved. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Test Annotations 1 for PyBEL DescriptionString=Test Annotations 1 for PyBEL to make a subset of useful BEL [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] TestAnnot1|O TestAnnot2|O TestAnnot3|O TestAnnot4|O TestAnnot5|O pybel-0.12.1/src/pybel/testing/resources/belns/000077500000000000000000000000001334645200200214015ustar00rootroot00000000000000pybel-0.12.1/src/pybel/testing/resources/belns/chebi.belns000066400000000000000000000021551334645200200235030ustar00rootroot00000000000000[Namespace] NameString=Chemicals of Biological Interest (Names) Keyword=CHEBI DomainString=Chemical SpeciesString=all DescriptionString=Chemical Entities of Biological Interest (ChEBI) unique names. The values used are the ChEBI ASCII Names. These names may be used to specify abundances. VersionString=20150611 CreatedDateTime=2015-06-11T19:51:16 QueryValueURL=http://www.ebi.ac.uk/chebi/searchFreeText.do?searchString=[VALUE] [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=ChEBI DescriptionString=Chemical Entities of Biological Interest (ChEBI) is a freely available dictionary of molecular entities focused on .small. chemical compounds. PublishedVersionString=127 PublishedDate=2015-06-01 ReferenceURL=http://www.ebi.ac.uk/chebi/ [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] nitric oxide|A (3S)-3-hydroxy-3-methylglutaryl-CoA|A NADPH|A hydrogen peroxide|A hydron|A mevalonate|A oxygen atom|A NADP(+)|A sialic acid|A pybel-0.12.1/src/pybel/testing/resources/belns/disease-ontology.belns000066400000000000000000000023051334645200200257130ustar00rootroot00000000000000[Namespace] NameString=Disease Ontology Names Keyword=DO DomainString=Biological Process SpeciesString=all DescriptionString=Disease names from the Disease Ontology (DO). These names may be used to specify pathologies. VersionString=20150611 CreatedDateTime=2015-06-11T19:51:16 [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=Disease Ontology (DO) DescriptionString=The Disease Ontology has been developed as a standardized ontology for human disease with the purpose of providing the biomedical community with consistent, reusable and sustainable descriptions of human disease terms, phenotype characteristics and related medical vocabulary disease concepts through collaborative efforts of researchers at Northwestern University, Center for Genetic Medicine and the University of Maryland School of Medicine, Institute for Genome Sciences. PublishedVersionString=2015-06-11 PublishedDate=2015-06-11 ReferenceURL=http://disease-ontology.org/ [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] Alzheimer's disease|O pybel-0.12.1/src/pybel/testing/resources/belns/go-biological-process.belns000066400000000000000000000022731334645200200266150ustar00rootroot00000000000000[Namespace] NameString=GO Biological Process Names Keyword=GOBP DomainString=Biological Process SpeciesString=all DescriptionString=The Gene Ontology (GO) names for biological process. These names may be used to represent biological processes. VersionString=20150611 CreatedDateTime=2015-06-11T19:51:23 QueryValueURL=http://amigo.geneontology.org/cgi-bin/amigo/search.cgi?search_query=[VALUE]&search_constraint=term&exact_match=1&action=new-search [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=The Gene Ontology Consortium DescriptionString=The Gene Ontology project is a major bioinformatics initiative with the aim of standardizing the representation of gene and gene product attributes across species and databases. PublishedVersionString=2015-05-30 PublishedDate=2015-05-29 ReferenceURL=http://www.geneontology.org/ [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] cholesterol biosynthetic process|B cell cycle arrest|B neuron apoptotic process|B apoptotic process|B response to oxidative stress|B pybel-0.12.1/src/pybel/testing/resources/belns/go-cellular-component.belns000066400000000000000000000022601334645200200266340ustar00rootroot00000000000000[Namespace] NameString=GO Cellular Components (Names) Keyword=GOCC DomainString=Cell Components and Complexes SpeciesString=all DescriptionString=Gene Ontology (GO) cellular component names. These terms may be used to represent cellular components as well as complex abundances. VersionString=20150611 CreatedDateTime=2015-06-11T19:51:15 QueryValueURL=http://amigo.geneontology.org/cgi-bin/amigo/search.cgi?search_query=[VALUE]&search_constraint=term&exact_match=1&action=new-search [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=The Gene Ontology Consortium DescriptionString=The Gene Ontology project is a major bioinformatics initiative with the aim of standardizing the representation of gene and gene product attributes across species and databases. PublishedVersionString=2015-05-30 PublishedDate=2015-05-29 ReferenceURL=http://www.geneontology.org [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] interleukin-23 complex|C intracellular|A endosome|A cell surface|A extracellular space|Apybel-0.12.1/src/pybel/testing/resources/belns/hgnc-human-genes.belns000066400000000000000000000030641334645200200255550ustar00rootroot00000000000000[Namespace] NameString=HGNC Approved Gene Symbols Keyword=HGNC DomainString=Gene and Gene Products SpeciesString=9606 DescriptionString=HUGO Gene Nomenclature Committee (HGNC) approved gene symbols. These symbols may be used to specify human gene, RNA, microRNA and protein abundances. Single character encoding following each value specify which abundance types are valid. VersionString=20150611 CreatedDateTime=2015-06-11T19:51:19 QueryValueURL=http://www.genenames.org/data/hgnc_data.php?match=[VALUE] [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=HUGO Gene Nomenclature Committee at the European Bioinformatics Institute DescriptionString=The HGNC approved gene symbols for human. Each symbol is unique and each gene is only given one approved gene symbol. PublishedVersionString=Thu, 11 Jun 2015 06:00:09 PublishedDate=2015-06-11 ReferenceURL=http://www.genenames.org/ [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] ADGRB1|GPR ADGRB2|GPR AIFM1|GPR AKT1|GPR AKT2|GRP APP|GPR ARRDC2|GPR ARRDC3|GPR BCR|GPR CASP8|GPR CAT|GPR CDK5|GPR CDK5R1|GPR CFTR|GPR CHCHD4|GPR EGFR|GPR ERG|GPR F3|GPR F7|GPR F9|GPR FADD|GPR FOS|GPR GSK3B|GPR HBP1|GPR HMGCR|GPR HRAS|GPR IL6|GPR ITGAV|GPR ITGB3|GPR JAK2|GPR JUN|GPR MAPT|GPR MHS2|G MIA|GPR MIATNB|GR MIR21|GPR NCF1|GPR PRKCA|GPR PRKCB|GPR PRKCD|GPR PRKCE|GPR TMPRSS2|GPR TP53|GPR CD33|GPR PTPN6|GPR PTPN11|GPR SYK|GRP TYROBP|GPR TREM2|GPR pybel-0.12.1/src/pybel/testing/resources/belns/mesh-diseases.belns000066400000000000000000000020301334645200200251530ustar00rootroot00000000000000[Namespace] NameString=MeSH Diseases (Names) Keyword=MESHD DomainString=Biological Process SpeciesString=all DescriptionString=Medical Subject Headings (MeSH) from the Diseases [C] branch. These headings may be used to specify pathologies. VersionString=20150611 CreatedDateTime=2015-06-11T19:51:19 QueryValueURL=http://www.nlm.nih.gov/cgi/mesh/2013/MB_cgi?mode=&term=[VALUE]&field=entry [Author] NameString=OpenBEL CopyrightString=Copyright (c) 2015, OpenBEL Project. This work is licensed under a Creative Commons Attribution 3.0 Unported License. ContactInfoString=info@openbel.org [Citation] NameString=MeSH DescriptionString=MeSH (Medical Subject Headings) is a controlled vocabulary thesaurus created, maintained, and provided by the U.S. National Library of Medicine. PublishedVersionString=2015 PublishedDate=2015-06-11 ReferenceURL=http://www.nlm.nih.gov/mesh/meshhome.html [Processing] CaseSensitiveFlag=yes DelimiterString=| CacheableFlag=yes [Values] Achlorhydria|O Adenocarcinoma|O Skin Diseases|O Psoriasis|O Alzheimer Disease|O pybel-0.12.1/src/pybel/testing/resources/belns/test_nocache.belns000066400000000000000000000011071334645200200250640ustar00rootroot00000000000000[Namespace] Keyword=TESTNS3 NameString=Test Namespace 3 For PyBEL DomainString=TestNs1 SpeciesString=all DescriptionString=This namespace is for the tests that no caching happens VersionString=1.0.0 CreatedDateTime=2016-09-17T20:50:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Copyright (c) Charles Tapley Hoyt. All Rights Reserved. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Test Namespace 3 for PyBEL DescriptionString=Test Namespace 3 for PyBEL [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=no [Values] PhosRes|A pybel-0.12.1/src/pybel/testing/resources/belns/test_ns_1.belns000066400000000000000000000012401334645200200243220ustar00rootroot00000000000000[Namespace] Keyword=TESTNS1 NameString=Test Namespace 1 for PyBEL DomainString=TestNs1 SpeciesString=all DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL VersionString=1.0.0 CreatedDateTime=2016-09-17T20:50:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Copyright (c) Charles Tapley Hoyt. All Rights Reserved. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Test Namespace 1 for PyBEL DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] TestValue1|O TestValue2|O TestValue3|O TestValue4|O TestValue5|O pybel-0.12.1/src/pybel/testing/resources/belns/test_ns_1_updated.belns000066400000000000000000000013031334645200200260300ustar00rootroot00000000000000[Namespace] Keyword=TESTNS1 NameString=Test Namespace 1 for PyBEL DomainString=TestNs1 SpeciesString=all DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL VersionString=1.1.0 CreatedDateTime=2016-10-20T17:15:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Copyright (c) Charles Tapley Hoyt. All Rights Reserved. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Test Namespace 1 for PyBEL DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] ImprovedTestValue1|O TestValue2|O TestValue3|O ImprovedTestValue4|O TestValue5|O AdditionalValue6|O pybel-0.12.1/src/pybel/testing/resources/belns/test_ns_2.belns000066400000000000000000000012701334645200200243260ustar00rootroot00000000000000[Namespace] Keyword=TESTNS2 NameString=Test Namespace 2 for PyBEL DomainString=TestNs1 SpeciesString=all DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL VersionString=1.0.0 CreatedDateTime=2016-09-17T20:50:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Copyright (c) Charles Tapley Hoyt. All Rights Reserved. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Test Namespace 1 for PyBEL DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] PhosRes| AP-1 Complex|CA Abeta_42|A CAPN Family|P GSK3 Family|P AKT1 ortholog|GRP PRKC|P pybel-0.12.1/src/pybel/testing/resources/belns/test_ns_empty.belns000066400000000000000000000011471334645200200253260ustar00rootroot00000000000000[Namespace] Keyword=TESTNSEMPTY NameString=Test Empty Namespace for PyBEL DomainString=TestNs1 SpeciesString=all DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL VersionString=1.0.0 CreatedDateTime=2016-09-17T20:50:00 [Author] NameString=Charles Tapley Hoyt CopyrightString=Copyright (c) Charles Tapley Hoyt. All Rights Reserved. ContactInfoString=charles.hoyt@scai.fraunhofer.de [Citation] NameString=Test Namespace 1 for PyBEL DescriptionString=Test Namespace 1 for PyBEL to make a subset of useful BEL [Processing] CaseSensitiveFlag=no DelimiterString=| CacheableFlag=yes [Values] pybel-0.12.1/src/pybel/testing/utils.py000066400000000000000000000043721334645200200200040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for PyBEL testing.""" from uuid import uuid4 from requests.compat import urlparse from ..constants import BEL_DEFAULT_NAMESPACE, FRAUNHOFER_RESOURCES from ..manager.models import Namespace, NamespaceEntry from ..struct.summary import get_annotation_values_by_annotation from ..struct.summary.node_summary import get_names def get_uri_name(url): """Get the file name from the end of the URL. Only useful for PyBEL's testing though since it looks specifically if the file is from the weird owncloud resources distributed by Fraunhofer. :type url: str :rtype: str """ url_parsed = urlparse(url) if url.startswith(FRAUNHOFER_RESOURCES): return url_parsed.query.split('=')[-1] else: url_parts = url_parsed.path.split('/') return url_parts[-1] def n(): """Return a UUID string for testing. :rtype: str """ return str(uuid4())[:15] def make_dummy_namespaces(manager, graph): """Make dummy namespaces for the test. :type manager: pybel.manager.Manager :type graph: pybel.BELGraph """ for keyword, names in get_names(graph).items(): if keyword == BEL_DEFAULT_NAMESPACE: continue if keyword in graph.namespace_url and graph.namespace_url[keyword] in graph.uncached_namespaces: continue graph.namespace_url[keyword] = url = n() namespace = Namespace(keyword=keyword, url=url) manager.session.add(namespace) for name in names: entry = NamespaceEntry(name=name, namespace=namespace) manager.session.add(entry) manager.session.commit() def make_dummy_annotations(manager, graph): """Make dummy annotations for the test. :param pybel.manager.Manager manager: :param pybel.BELGraph graph: """ annotation_names = get_annotation_values_by_annotation(graph) for keyword, names in annotation_names.items(): graph.annotation_url[keyword] = url = n() namespace = Namespace(keyword=keyword, url=url, is_annotation=True) manager.session.add(namespace) for name in names: entry = NamespaceEntry(name=name, namespace=namespace) manager.session.add(entry) manager.session.commit() pybel-0.12.1/src/pybel/tokens.py000066400000000000000000000121751334645200200164720ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module helps handle node data dictionaries.""" from .constants import ( FRAGMENT, FRAGMENT_DESCRIPTION, FRAGMENT_START, FRAGMENT_STOP, FUNCTION, FUSION, FUSION_MISSING, FUSION_REFERENCE, FUSION_START, FUSION_STOP, GMOD, HGVS, IDENTIFIER, KIND, MEMBERS, MODIFIER, NAME, NAMESPACE, PARTNER_3P, PARTNER_5P, PMOD, PMOD_CODE, PMOD_POSITION, PRODUCTS, RANGE_3P, RANGE_5P, REACTANTS, REACTION, TARGET, VARIANTS, ) from .dsl import ( BaseAbundance, BaseEntity, CentralDogma, FUNC_TO_DSL, FUNC_TO_FUSION_DSL, FUNC_TO_LIST_DSL, FusionBase, Reaction, Variant, fragment, fusion_range, gmod, hgvs, missing_fusion_range, pmod, ) __all__ = [ 'parse_result_to_dsl', ] def parse_result_to_dsl(tokens): """Convert a ParseResult to a PyBEL DSL object. :type tokens: dict or pyparsing.ParseResults :rtype: BaseEntity """ if MODIFIER in tokens: return parse_result_to_dsl(tokens[TARGET]) elif REACTION == tokens[FUNCTION]: return _reaction_po_to_dict(tokens) elif VARIANTS in tokens: return _variant_po_to_dict(tokens) elif MEMBERS in tokens: return _list_po_to_dict(tokens) elif FUSION in tokens: return _fusion_to_dsl(tokens) return _simple_po_to_dict(tokens) def _fusion_to_dsl(tokens): """Convert a PyParsing data dictionary to a PyBEL fusion data dictionary. :param tokens: A PyParsing data dictionary representing a fusion :type tokens: ParseResult :rtype: FusionBase """ func = tokens[FUNCTION] fusion_dsl = FUNC_TO_FUSION_DSL[func] member_dsl = FUNC_TO_DSL[func] partner_5p = member_dsl( namespace=tokens[FUSION][PARTNER_5P][NAMESPACE], name=tokens[FUSION][PARTNER_5P][NAME] ) partner_3p = member_dsl( namespace=tokens[FUSION][PARTNER_3P][NAMESPACE], name=tokens[FUSION][PARTNER_3P][NAME] ) range_5p = _fusion_range_to_dsl(tokens[FUSION][RANGE_5P]) range_3p = _fusion_range_to_dsl(tokens[FUSION][RANGE_3P]) return fusion_dsl( partner_5p=partner_5p, partner_3p=partner_3p, range_5p=range_5p, range_3p=range_3p, ) def _fusion_range_to_dsl(tokens): """Convert a PyParsing data dictionary into a PyBEL. :type tokens: ParseResult :rtype: pybel.dsl.FusionRangeBase """ if FUSION_MISSING in tokens: return missing_fusion_range() return fusion_range( reference=tokens[FUSION_REFERENCE], start=tokens[FUSION_START], stop=tokens[FUSION_STOP] ) def _simple_po_to_dict(tokens): """Convert a simple named entity to a DSL object. :type tokens: ParseResult :rtype: BaseAbundance """ dsl = FUNC_TO_DSL.get(tokens[FUNCTION]) if dsl is None: raise ValueError('invalid tokens: {}'.format(tokens)) return dsl( namespace=tokens[NAMESPACE], name=tokens[NAME], ) def _variant_po_to_dict(tokens): """Convert a PyParsing data dictionary to a central dogma abundance (i.e., Protein, RNA, miRNA, Gene). :type tokens: ParseResult :rtype: CentralDogma """ dsl = FUNC_TO_DSL.get(tokens[FUNCTION]) if dsl is None: raise ValueError('invalid tokens: {}'.format(tokens)) return dsl( namespace=tokens[NAMESPACE], name=tokens[NAME], variants=[ _variant_to_dsl_helper(variant_tokens) for variant_tokens in tokens[VARIANTS] ], ) def _variant_to_dsl_helper(tokens): """Convert variant tokens to DSL objects. :type tokens: ParseResult :rtype: Variant """ kind = tokens[KIND] if kind == HGVS: return hgvs(tokens[IDENTIFIER]) if kind == GMOD: return gmod( name=tokens[IDENTIFIER][NAME], namespace=tokens[IDENTIFIER][NAMESPACE], ) if kind == PMOD: return pmod( name=tokens[IDENTIFIER][NAME], namespace=tokens[IDENTIFIER][NAMESPACE], code=tokens.get(PMOD_CODE), position=tokens.get(PMOD_POSITION), ) if kind == FRAGMENT: start, stop = tokens.get(FRAGMENT_START), tokens.get(FRAGMENT_STOP) return fragment( start=start, stop=stop, description=tokens.get(FRAGMENT_DESCRIPTION) ) raise ValueError('invalid fragment kind: {}'.format(kind)) def _reaction_po_to_dict(tokens): """Convert a reaction parse object to a DSL. :type tokens: ParseResult :rtype: Reaction """ return Reaction( reactants=_reaction_part_po_to_dict(tokens[REACTANTS]), products=_reaction_part_po_to_dict(tokens[PRODUCTS]), ) def _reaction_part_po_to_dict(tokens): """Convert a PyParsing result to a reaction. :type tokens: ParseResult :rtype: list[BaseAbundance] """ return [parse_result_to_dsl(token) for token in tokens] def _list_po_to_dict(tokens): """Convert a list parse object to a node. :param tokens: PyParsing ParseObject :rtype: ListAbundance """ func = tokens[FUNCTION] dsl = FUNC_TO_LIST_DSL[func] members = [parse_result_to_dsl(token) for token in tokens[MEMBERS]] return dsl(members) pybel-0.12.1/src/pybel/utils.py000066400000000000000000000243051334645200200163250ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Utilities for PyBEL.""" import hashlib import json import logging from collections import Iterable, MutableMapping, defaultdict from datetime import datetime from six import string_types from six.moves.cPickle import dumps from .constants import ( ACTIVITY, CITATION, CITATION_REFERENCE, CITATION_TYPE, DEGRADATION, EFFECT, EVIDENCE, FROM_LOC, IDENTIFIER, LOCATION, MODIFIER, NAME, NAMESPACE, OBJECT, RELATION, SUBJECT, TO_LOC, TRANSLOCATION, VERSION, ) log = logging.getLogger(__name__) def expand_dict(flat_dict, sep='_'): """Expand a flattened dictionary. :param dict flat_dict: a nested dictionary that has been flattened so the keys are composite :param str sep: the separator between concatenated keys :rtype: dict """ res = {} rdict = defaultdict(list) for flat_key, value in flat_dict.items(): key = flat_key.split(sep, 1) if 1 == len(key): res[key[0]] = value else: rdict[key[0]].append((key[1:], value)) for k, v in rdict.items(): res[k] = expand_dict({ik: iv for (ik,), iv in v}) return res def flatten_dict(data, parent_key='', sep='_'): """Flatten a nested dictionary. :param data: A nested dictionary :type data: dict or MutableMapping :param str parent_key: The parent's key. This is a value for tail recursion, so don't set it yourself. :param str sep: The separator used between dictionary levels :rtype: dict .. seealso:: http://stackoverflow.com/a/6027615 """ items = {} for key, value in data.items(): # prepend the parent key key = parent_key + sep + key if parent_key else key if isinstance(value, (dict, MutableMapping)): items.update(flatten_dict(value, key, sep=sep)) elif isinstance(value, (set, list)): items[key] = ','.join(value) else: items[key] = value return items def get_version(): """Get the current PyBEL version. :return: The current PyBEL version :rtype: str """ return VERSION def tokenize_version(version_string): """Tokenize a version string to a tuple. Truncates qualifiers like ``-dev``. :param str version_string: A version string :return: A tuple representing the version string :rtype: tuple >>> tokenize_version('0.1.2-dev') (0, 1, 2) """ before_dash = version_string.split('-')[0] version_tuple = before_dash.split('.')[:3] # take only the first 3 in case there's an extension like -dev.0 return tuple(map(int, version_tuple)) def ensure_quotes(s): """Quote a string that isn't solely alphanumeric. :type s: str :rtype: str """ return '"{}"'.format(s) if not s.isalnum() else s CREATION_DATE_FMT = '%Y-%m-%dT%H:%M:%S' PUBLISHED_DATE_FMT = '%Y-%m-%d' PUBLISHED_DATE_FMT_2 = '%d:%m:%Y %H:%M' DATE_VERSION_FMT = '%Y%m%d' def valid_date(s): """Check that a string represents a valid date in ISO 8601 format YYYY-MM-DD. :type s: str :rtype: bool """ try: datetime.strptime(s, PUBLISHED_DATE_FMT) return True except ValueError: return False def valid_date_version(s): """Check that the string is a valid date versions string. :type s: str :rtype: bool """ try: datetime.strptime(s, DATE_VERSION_FMT) return True except ValueError: return False def parse_datetime(s): """Try to parse a datetime object from a standard datetime format or date format. :param str s: A string representing a date or datetime :return: A parsed date object :rtype: datetime.date """ for fmt in (CREATION_DATE_FMT, PUBLISHED_DATE_FMT, PUBLISHED_DATE_FMT_2): try: dt = datetime.strptime(s, fmt) except ValueError: pass else: return dt raise ValueError('Incorrect datetime format for {}'.format(s)) def _hash_tuple(t): return hashlib.sha512(dumps(t)).hexdigest() def _get_citation_tuple(data): citation = data.get(CITATION) if citation is None: return None, None return '{type}:{reference}'.format(type=citation[CITATION_TYPE], reference=citation[CITATION_REFERENCE]) def _get_edge_tuple(u, v, data): """Convert an edge to a consistent tuple. :param BaseEntity u: The source BEL node :param BaseEntity v: The target BEL node :param dict data: The edge's data dictionary :return: A tuple that can be hashed representing this edge. Makes no promises to its structure. """ return ( u.as_bel(), v.as_bel(), _get_citation_tuple(data), data.get(EVIDENCE), canonicalize_edge(data), ) def hash_edge(u, v, data): """Convert an edge tuple to a SHA512 hash. :param BaseEntity u: The source BEL node :param BaseEntity v: The target BEL node :param dict data: The edge's data dictionary :return: A hashed version of the edge tuple using md5 hash of the binary pickle dump of u, v, and the json dump of d :rtype: str """ edge_tuple = _get_edge_tuple(u, v, data) return _hash_tuple(edge_tuple) def subdict_matches(target, query, partial_match=True): """Checks if all the keys in the query dict are in the target dict, and that their values match 1. Checks that all keys in the query dict are in the target dict 2. Matches the values of the keys in the query dict a. If the value is a string, then must match exactly b. If the value is a set/list/tuple, then will match any of them c. If the value is a dict, then recursively check if that subdict matches :param dict target: The dictionary to search :param dict query: A query dict with keys to match :param bool partial_match: Should the query values be used as partial or exact matches? Defaults to :code:`True`. :return: if all keys in b are in target_dict and their values match :rtype: bool """ for k, v in query.items(): if k not in target: return False elif not isinstance(v, (int, string_types, dict, Iterable)): raise ValueError('invalid value: {}'.format(v)) elif isinstance(v, (int, string_types)) and target[k] != v: return False elif isinstance(v, dict): if partial_match: if not isinstance(target[k], dict): return False elif not subdict_matches(target[k], v, partial_match): return False elif not partial_match and target[k] != v: return False elif isinstance(v, Iterable) and target[k] not in v: return False return True def hash_dump(data): """Hash an arbitrary JSON dictionary by dumping it in sorted order, encoding it in UTF-8, then hashing the bytes. :param data: An arbitrary JSON-serializable object :type data: dict or list or tuple :rtype: str """ return hashlib.sha512(json.dumps(data, sort_keys=True).encode('utf-8')).hexdigest() def hash_citation(type, reference): """Create a hash for a type/reference pair of a citation. :param str type: The corresponding citation type :param str reference: The citation reference :rtype: str """ s = u'{type}:{reference}'.format(type=type, reference=reference) return hashlib.sha512(s.encode('utf8')).hexdigest() def hash_evidence(text, type, reference): """Create a hash for an evidence and its citation. :param str text: The evidence text :param str type: The corresponding citation type :param str reference: The citation reference :rtype: str """ s = u'{type}:{reference}:{text}'.format(type=type, reference=reference, text=text) return hashlib.sha512(s.encode('utf8')).hexdigest() def canonicalize_edge(data): """Canonicalize the edge to a tuple based on the relation, subject modifications, and object modifications. :param dict data: A PyBEL edge data dictionary :return: A 3-tuple that's specific for the edge (relation, subject, object) :rtype: tuple """ return ( data[RELATION], _canonicalize_edge_modifications(data.get(SUBJECT)), _canonicalize_edge_modifications(data.get(OBJECT)), ) def _canonicalize_edge_modifications(data): """Return the SUBJECT or OBJECT entry of a PyBEL edge data dictionary as a canonical tuple. :param dict data: A PyBEL edge data dictionary :rtype: tuple """ if data is None: return modifier = data.get(MODIFIER) location = data.get(LOCATION) effect = data.get(EFFECT) if modifier is None and location is None: return result = [] if modifier == ACTIVITY: if effect: effect_name = effect.get(NAME) effect_identifier = effect.get(IDENTIFIER) t = ( ACTIVITY, effect[NAMESPACE], effect_name or effect_identifier, ) else: t = (ACTIVITY,) result.append(t) elif modifier == DEGRADATION: t = (DEGRADATION,) result.append(t) elif modifier == TRANSLOCATION: if effect: from_loc_name = effect[FROM_LOC].get(NAME) from_loc_identifier = effect[FROM_LOC].get(IDENTIFIER) to_loc_name = effect[TO_LOC].get(NAME) to_loc_identifier = effect[TO_LOC].get(IDENTIFIER) t = ( TRANSLOCATION, data[EFFECT][FROM_LOC][NAMESPACE], from_loc_name or from_loc_identifier, data[EFFECT][TO_LOC][NAMESPACE], to_loc_name or to_loc_identifier, ) else: t = (TRANSLOCATION,) result.append(t) if location: location_name = location.get(NAME) location_identifier = location.get(IDENTIFIER) t = ( LOCATION, location[NAMESPACE], location_name or location_identifier, ) result.append(t) if not result: raise ValueError('Invalid data: {}'.format(data)) return tuple(result) def get_corresponding_pickle_path(path): """Get the same path with a pickle extension. :param str path: A path to a BEL file. :rtype: str """ return '{path}.pickle'.format(path=path) pybel-0.12.1/tests/000077500000000000000000000000001334645200200140475ustar00rootroot00000000000000pybel-0.12.1/tests/__init__.py000066400000000000000000000000671334645200200161630ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel`.""" pybel-0.12.1/tests/constant_helper.py000066400000000000000000000634331334645200200176220ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Constants for PyBEL tests.""" import logging from pybel.constants import * from pybel.dsl import * from pybel.dsl.namespaces import hgnc log = logging.getLogger(__name__) expected_test_simple_metadata = { METADATA_NAME: "PyBEL Test Simple", METADATA_DESCRIPTION: "Made for testing PyBEL parsing", METADATA_VERSION: "1.6.0", METADATA_COPYRIGHT: "Copyright (c) Charles Tapley Hoyt. All Rights Reserved.", METADATA_AUTHORS: "Charles Tapley Hoyt", METADATA_LICENSES: "WTF License", METADATA_CONTACT: "charles.hoyt@scai.fraunhofer.de", METADATA_PROJECT: 'PyBEL Testing', } expected_test_thorough_metadata = { METADATA_NAME: "PyBEL Test Thorough", METADATA_DESCRIPTION: "Statements made up to contain many conceivable variants of nodes from BEL", METADATA_VERSION: "1.0.0", METADATA_COPYRIGHT: "Copyright (c) Charles Tapley Hoyt. All Rights Reserved.", METADATA_AUTHORS: "Charles Tapley Hoyt", METADATA_LICENSES: "WTF License", METADATA_CONTACT: "charles.hoyt@scai.fraunhofer.de" } citation_1 = { CITATION_TYPE: 'PubMed', CITATION_NAME: 'That one article from last week', CITATION_REFERENCE: '123455' } citation_2 = { CITATION_TYPE: 'PubMed', CITATION_NAME: 'That one article from last week #2', CITATION_REFERENCE: '123456' } evidence_1 = "Evidence 1" dummy_evidence = 'These are mostly made up' akt1 = hgnc(name='AKT1') egfr = hgnc(name='EGFR') fadd = hgnc(name='FADD') casp8 = hgnc(name='CASP8') mia = hgnc(name='MIA') il6 = protein('HGNC', 'IL6') adgrb1 = protein(namespace='HGNC', name='ADGRB1') adgrb2 = protein(namespace='HGNC', name='ADGRB2') adgrb_complex = complex_abundance([adgrb1, adgrb2]) achlorhydria = pathology(namespace='MESHD', name='Achlorhydria') akt1_rna = akt1.get_rna() akt1_gene = akt1_rna.get_gene() akt_methylated = akt1_gene.with_variants(gmod('Me')) akt1_phe_508_del = akt1_gene.with_variants(hgvs('p.Phe508del')) cftr = hgnc('CFTR') cftr_protein_unspecified_variant = cftr.with_variants(hgvs_unspecified()) cftr_protein_phe_508_del = cftr.with_variants(hgvs('p.Phe508del')) adenocarcinoma = pathology('MESHD', 'Adenocarcinoma') interleukin_23_complex = named_complex_abundance('GOCC', 'interleukin-23 complex') oxygen_atom = abundance(namespace='CHEBI', name='oxygen atom') hydrogen_peroxide = abundance('CHEBI', 'hydrogen peroxide') tmprss2_gene = gene('HGNC', 'TMPRSS2') tmprss2_erg_gene_fusion = gene_fusion( partner_5p=tmprss2_gene, range_5p=fusion_range('c', 1, 79), partner_3p=gene('HGNC', 'ERG'), range_3p=fusion_range('c', 312, 5034) ) bcr_jak2_gene_fusion = gene_fusion( partner_5p=gene('HGNC', 'BCR'), range_5p=fusion_range('c', '?', 1875), partner_3p=gene('HGNC', 'JAK2'), range_3p=fusion_range('c', 2626, '?') ) chchd4_aifm1_gene_fusion = gene_fusion( partner_5p=gene('HGNC', 'CHCHD4'), partner_3p=gene('HGNC', 'AIFM1') ) tmprss2_erg_protein_fusion = protein_fusion( partner_5p=protein('HGNC', 'TMPRSS2'), range_5p=fusion_range('p', 1, 79), partner_3p=protein('HGNC', 'ERG'), range_3p=fusion_range('p', 312, 5034) ) bcr_jak2_protein_fusion = protein_fusion( partner_5p=protein('HGNC', 'BCR'), range_5p=fusion_range('p', '?', 1875), partner_3p=protein('HGNC', 'JAK2'), range_3p=fusion_range('p', 2626, '?') ) chchd4_aifm1_protein_fusion = protein_fusion( protein('HGNC', 'CHCHD4'), protein('HGNC', 'AIFM1') ) bcr_jak2_rna_fusion = rna_fusion( partner_5p=rna('HGNC', 'BCR'), range_5p=fusion_range('r', '?', 1875), partner_3p=rna('HGNC', 'JAK2'), range_3p=fusion_range('r', 2626, '?') ) chchd4_aifm1_rna_fusion = rna_fusion( partner_5p=rna('HGNC', 'CHCHD4'), partner_3p=rna('HGNC', 'AIFM1') ) tmprss2_erg_rna_fusion = rna_fusion( partner_5p=rna('HGNC', 'TMPRSS2'), range_5p=fusion_range('r', 1, 79), partner_3p=rna('HGNC', 'ERG'), range_3p=fusion_range('r', 312, 5034) ) tmprss2_erg_rna_fusion_unspecified = rna_fusion( partner_5p=rna('HGNC', 'TMPRSS2'), partner_3p=rna('HGNC', 'ERG') ) BEL_THOROUGH_NODES = { oxygen_atom, tmprss2_erg_rna_fusion, tmprss2_erg_rna_fusion_unspecified, akt_methylated, bcr_jak2_rna_fusion, chchd4_aifm1_rna_fusion, akt1_gene, akt1_phe_508_del, akt1, gene('HGNC', 'AKT1', variants=hgvs('c.308G>A')), tmprss2_erg_gene_fusion, gene('HGNC', 'AKT1', variants=[hgvs('c.1521_1523delCTT'), hgvs('c.308G>A'), hgvs('p.Phe508del')]), mirna('HGNC', 'MIR21'), bcr_jak2_gene_fusion, gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), gene('HGNC', 'CFTR'), gene('HGNC', 'CFTR', variants=hgvs('g.117199646_117199648delCTT')), gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), protein('HGNC', 'AKT1', variants=pmod('Ph', 'Ser', 473)), mirna('HGNC', 'MIR21', variants=hgvs('p.Phe508del')), protein('HGNC', 'AKT1', variants=hgvs('p.C40*')), protein('HGNC', 'AKT1', variants=[hgvs('p.Ala127Tyr'), pmod('Ph', 'Ser')]), chchd4_aifm1_gene_fusion, tmprss2_erg_protein_fusion, protein('HGNC', 'AKT1', variants=hgvs('p.Arg1851*')), bcr_jak2_protein_fusion, protein('HGNC', 'AKT1', variants=hgvs('p.40*')), chchd4_aifm1_protein_fusion, protein('HGNC', 'CFTR', variants=hgvs_reference()), cftr, egfr, cftr_protein_unspecified_variant, adenocarcinoma, cftr_protein_phe_508_del, protein('HGNC', 'MIA', variants=fragment(5, 20)), mia, interleukin_23_complex, protein('HGNC', 'MIA', variants=fragment(1, '?')), protein('HGNC', 'MIA', variants=fragment()), protein('HGNC', 'MIA', variants=fragment(description='55kD')), protein('HGNC', 'CFTR', variants=hgvs('p.Gly576Ala')), akt1_rna, rna('HGNC', 'AKT1', variants=[hgvs('c.1521_1523delCTT'), hgvs('p.Phe508del')]), gene('HGNC', 'NCF1'), complex_abundance([ gene('HGNC', 'NCF1'), protein('HGNC', 'HBP1') ]), protein('HGNC', 'HBP1'), complex_abundance([protein('HGNC', 'FOS'), protein('HGNC', 'JUN')]), protein('HGNC', 'FOS'), protein('HGNC', 'JUN'), rna('HGNC', 'CFTR', variants=hgvs('r.1521_1523delcuu')), rna('HGNC', 'CFTR'), rna('HGNC', 'CFTR', variants=hgvs('r.1653_1655delcuu')), composite_abundance([ interleukin_23_complex, il6 ]), il6, bioprocess('GOBP', 'cell cycle arrest'), hydrogen_peroxide, protein('HGNC', 'CAT'), gene('HGNC', 'CAT'), protein('HGNC', 'HMGCR'), bioprocess('GOBP', 'cholesterol biosynthetic process'), gene('HGNC', 'APP', variants=hgvs('c.275341G>C')), gene('HGNC', 'APP'), pathology('MESHD', 'Alzheimer Disease'), complex_abundance([protein('HGNC', 'F3'), protein('HGNC', 'F7')]), protein('HGNC', 'F3'), protein('HGNC', 'F7'), protein('HGNC', 'F9'), protein('HGNC', 'GSK3B', variants=pmod('Ph', 'Ser', 9)), protein('HGNC', 'GSK3B'), pathology('MESHD', 'Psoriasis'), pathology('MESHD', 'Skin Diseases'), reaction( reactants=[ abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ], products=[ abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate') ] ), abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron'), abundance('CHEBI', 'mevalonate'), abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'nitric oxide'), complex_abundance([ protein('HGNC', 'ITGAV'), protein('HGNC', 'ITGB3') ]), protein('HGNC', 'ITGAV'), protein('HGNC', 'ITGB3'), protein('HGNC', 'FADD'), abundance('TESTNS2', 'Abeta_42'), protein('TESTNS2', 'GSK3 Family'), protein('HGNC', 'PRKCA'), protein('HGNC', 'CDK5'), protein('HGNC', 'CASP8'), protein('HGNC', 'AKT1', variants=pmod(namespace='TESTNS2', name='PhosRes', code='Ser', position=473)), protein('HGNC', 'HRAS', variants=pmod('Palm')), bioprocess('GOBP', 'apoptotic process'), composite_abundance([ abundance('TESTNS2', 'Abeta_42'), protein('HGNC', 'CASP8'), protein('HGNC', 'FADD') ]), reaction( reactants=[protein('HGNC', 'CDK5R1')], products=[protein('HGNC', 'CDK5')], ), protein('HGNC', 'PRKCB'), named_complex_abundance('TESTNS2', 'AP-1 Complex'), protein('HGNC', 'PRKCE'), protein('HGNC', 'PRKCD'), protein('TESTNS2', 'CAPN Family'), gene('TESTNS2', 'AKT1 ortholog'), protein('HGNC', 'HRAS'), protein('HGNC', 'CDK5R1'), protein('TESTNS2', 'PRKC'), bioprocess('GOBP', 'neuron apoptotic process'), protein('HGNC', 'MAPT', variants=pmod('Ph')), protein('HGNC', 'MAPT'), gene('HGNC', 'ARRDC2'), gene('HGNC', 'ARRDC3'), gene('dbSNP', 'rs123456') } BEL_THOROUGH_EDGES = [ (gene('HGNC', 'AKT1', variants=hgvs('p.Phe508del')), akt1, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: DIRECTLY_DECREASES, }), (akt1, protein('HGNC', 'AKT1', variants=pmod('Ph', 'Ser', 473)), { RELATION: HAS_VARIANT, }), (akt1, protein('HGNC', 'AKT1', variants=hgvs('p.C40*')), { RELATION: HAS_VARIANT, }), (akt1, protein('HGNC', 'AKT1', variants=[hgvs('p.Ala127Tyr'), pmod('Ph', 'Ser')]), { RELATION: HAS_VARIANT, }), (akt1, protein('HGNC', 'AKT1', variants=[hgvs('p.Ala127Tyr'), pmod('Ph', 'Ser')]), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: DIRECTLY_DECREASES, SUBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, OBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, }), (akt1, protein('HGNC', 'AKT1', variants=hgvs('p.Arg1851*')), { RELATION: HAS_VARIANT, }), (akt1, protein('HGNC', 'AKT1', variants=hgvs('p.40*')), { RELATION: HAS_VARIANT, }), (akt1, protein('HGNC', 'MIA', variants=fragment()), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: {MODIFIER: DEGRADATION}, }), (akt1, protein('HGNC', 'CFTR', variants=hgvs('p.Gly576Ala')), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (akt1, rna('HGNC', 'CFTR', variants=hgvs('r.1521_1523delcuu')), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'kin'}}, }), (akt1, rna('HGNC', 'CFTR', variants=hgvs('r.1653_1655delcuu')), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: {MODIFIER: ACTIVITY}, }), (akt1, egfr, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'cat' } }, OBJECT: {MODIFIER: DEGRADATION}, }), (akt1, egfr, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: { MODIFIER: ACTIVITY, EFFECT: {NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE} }, OBJECT: translocation( {NAMESPACE: 'GOCC', NAME: 'intracellular'}, {NAMESPACE: 'GOCC', NAME: 'extracellular space'} ), }), (gene('HGNC', 'AKT1', variants=hgvs('c.308G>A')), tmprss2_erg_gene_fusion, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: CAUSES_NO_CHANGE, }), (gene('HGNC', 'AKT1', variants=hgvs('c.308G>A')), gene('HGNC', 'AKT1', variants=[hgvs('c.1521_1523delCTT'), hgvs('c.308G>A'), hgvs('p.Phe508del')]), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, }), (mirna('HGNC', 'MIR21'), bcr_jak2_gene_fusion, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: DIRECTLY_INCREASES, }), (mirna('HGNC', 'MIR21'), protein('HGNC', 'AKT1', variants=pmod('Ph', 'Ser', 473)), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: DECREASES, SUBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, }), (mirna('HGNC', 'MIR21'), mirna('HGNC', 'MIR21', variants=hgvs('p.Phe508del')), { RELATION: HAS_VARIANT, }), (gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), akt1, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, OBJECT: {MODIFIER: DEGRADATION}, }), (gene('HGNC', 'CFTR'), gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), { RELATION: HAS_VARIANT, }), (gene('HGNC', 'CFTR'), gene('HGNC', 'CFTR', variants=hgvs('g.117199646_117199648delCTT')), { RELATION: HAS_VARIANT, }), (gene('HGNC', 'CFTR'), gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), { RELATION: HAS_VARIANT, }), (gene('HGNC', 'CFTR', variants=hgvs('g.117199646_117199648delCTT')), gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (mirna('HGNC', 'MIR21', variants=hgvs('p.Phe508del')), protein('HGNC', 'AKT1', variants=hgvs('p.C40*')), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, SUBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, }), (chchd4_aifm1_gene_fusion, tmprss2_erg_protein_fusion, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (protein('HGNC', 'AKT1', variants=hgvs('p.Arg1851*')), bcr_jak2_protein_fusion, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (protein('HGNC', 'AKT1', variants=hgvs('p.40*')), chchd4_aifm1_protein_fusion, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (protein('HGNC', 'CFTR', variants=hgvs_reference()), egfr, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'intracellular'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'} } }, }), (cftr, protein('HGNC', 'CFTR', variants=hgvs('=')), { RELATION: HAS_VARIANT, }), (cftr, protein('HGNC', 'CFTR', variants=hgvs('?')), { RELATION: HAS_VARIANT, }), (cftr, protein('HGNC', 'CFTR', variants=hgvs('p.Phe508del')), { RELATION: HAS_VARIANT, }), (cftr, protein('HGNC', 'CFTR', variants=hgvs('p.Gly576Ala')), { RELATION: HAS_VARIANT, }), (mia, protein('HGNC', 'MIA', variants=fragment(5, 20)), { RELATION: HAS_VARIANT, }), (mia, protein('HGNC', 'MIA', variants=fragment(1, '?')), { RELATION: HAS_VARIANT, }), (mia, protein('HGNC', 'MIA', variants=fragment()), { RELATION: HAS_VARIANT, }), (mia, protein('HGNC', 'MIA', variants=fragment(description='55kD')), { RELATION: HAS_VARIANT, }), (akt1_rna, rna('HGNC', 'AKT1', variants=[hgvs('c.1521_1523delCTT'), hgvs('p.Phe508del')]), { RELATION: HAS_VARIANT, }), (akt1_rna, akt1, { RELATION: TRANSLATED_TO, }), (gene('HGNC', 'APP'), gene('HGNC', 'APP', variants=hgvs('c.275341G>C')), { RELATION: HAS_VARIANT, }), (complex_abundance([protein('HGNC', 'F3'), protein('HGNC', 'F7')]), protein('HGNC', 'F3'), { RELATION: HAS_COMPONENT, }), (complex_abundance([protein('HGNC', 'F3'), protein('HGNC', 'F7')]), protein('HGNC', 'F7'), { RELATION: HAS_COMPONENT, }), (protein('HGNC', 'GSK3B'), protein('HGNC', 'GSK3B', variants=pmod('Ph', 'Ser', 9)), { RELATION: HAS_VARIANT, }), (pathology('MESHD', 'Psoriasis'), pathology('MESHD', 'Skin Diseases'), { RELATION: IS_A, }), (complex_abundance([gene('HGNC', 'NCF1'), protein('HGNC', 'HBP1')]), protein('HGNC', 'HBP1'), { RELATION: HAS_COMPONENT, }), (complex_abundance([gene('HGNC', 'NCF1'), protein('HGNC', 'HBP1')]), gene('HGNC', 'NCF1'), { RELATION: HAS_COMPONENT, }), (complex_abundance([protein('HGNC', 'FOS'), protein('HGNC', 'JUN')]), protein('HGNC', 'FOS'), { RELATION: HAS_COMPONENT, }), (complex_abundance([protein('HGNC', 'FOS'), protein('HGNC', 'JUN')]), protein('HGNC', 'JUN'), { RELATION: HAS_COMPONENT, }), (rna('HGNC', 'CFTR'), rna('HGNC', 'CFTR', variants=hgvs('r.1521_1523delcuu')), { RELATION: HAS_VARIANT, }), (rna('HGNC', 'CFTR'), rna('HGNC', 'CFTR', variants=hgvs('r.1653_1655delcuu')), { RELATION: HAS_VARIANT, }), (composite_abundance([interleukin_23_complex, il6]), il6, { RELATION: HAS_COMPONENT, }), (composite_abundance([interleukin_23_complex, il6]), interleukin_23_complex, { RELATION: HAS_COMPONENT, }), (protein('HGNC', 'CFTR', variants=hgvs('?')), pathology('MESHD', 'Adenocarcinoma'), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (rna('HGNC', 'AKT1', variants=[hgvs('c.1521_1523delCTT'), hgvs('p.Phe508del')]), tmprss2_erg_rna_fusion, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: DIRECTLY_INCREASES, }), (rna_fusion(rna('HGNC', 'TMPRSS2'), rna('HGNC', 'ERG')), complex_abundance([gene('HGNC', 'NCF1'), protein('HGNC', 'HBP1')]), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (protein('HGNC', 'MIA', variants=fragment(5, 20)), named_complex_abundance('GOCC', 'interleukin-23 complex'), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'intracellular'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'extracellular space'} } }, }), (protein('HGNC', 'MIA', variants=fragment(1, '?')), egfr, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'endosome'} } }, }), (akt1_rna, egfr, { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'endosome'} } }, }), (rna_fusion(rna('HGNC', 'CHCHD4'), rna('HGNC', 'AIFM1'), ), complex_abundance([protein('HGNC', 'FOS'), protein('HGNC', 'JUN')]), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: INCREASES, }), (composite_abundance([interleukin_23_complex, il6]), bioprocess('GOBP', 'cell cycle arrest'), { EVIDENCE: dummy_evidence, CITATION: citation_1, RELATION: DECREASES, }), (protein('HGNC', 'CAT'), hydrogen_peroxide, { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: DIRECTLY_DECREASES, SUBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, }), (gene('HGNC', 'CAT'), hydrogen_peroxide, { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: DIRECTLY_DECREASES, SUBJECT: {LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'}}, }), (protein('HGNC', 'HMGCR'), bioprocess('GOBP', 'cholesterol biosynthetic process'), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: RATE_LIMITING_STEP_OF, SUBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'cat'}}, }), (gene('HGNC', 'APP', variants=hgvs('c.275341G>C')), pathology('MESHD', 'Alzheimer Disease'), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: CAUSES_NO_CHANGE, }), (complex_abundance([protein('HGNC', 'F3'), protein('HGNC', 'F7')]), protein('HGNC', 'F9'), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: REGULATES, SUBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAME: 'pep', NAMESPACE: BEL_DEFAULT_NAMESPACE}}, OBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAME: 'pep', NAMESPACE: BEL_DEFAULT_NAMESPACE}}, }), (protein('HGNC', 'GSK3B', variants=pmod('Ph', 'Ser', 9)), protein('HGNC', 'GSK3B'), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: POSITIVE_CORRELATION, OBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'kin'}}, }), (protein('HGNC', 'GSK3B'), protein('HGNC', 'GSK3B', variants=pmod('Ph', 'Ser', 9)), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: POSITIVE_CORRELATION, SUBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'kin'}}, }), (reaction( reactants=( abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ), products=( abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate') ) ), abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), { RELATION: HAS_REACTANT, }), (reaction( reactants=( abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ), products=( abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate') ) ), abundance('CHEBI', 'NADPH'), { RELATION: HAS_REACTANT, }), ( reaction( reactants=( abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ), products=( abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate') ) ), abundance('CHEBI', 'hydron'), { RELATION: HAS_REACTANT, }), (reaction( reactants=( abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ), products=( abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate')) ), abundance('CHEBI', 'mevalonate'), { RELATION: HAS_PRODUCT, }), (reaction( reactants=( abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ), products=( abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate') ) ), abundance('CHEBI', 'NADP(+)'), { RELATION: HAS_PRODUCT, }), (reaction( reactants=( abundance('CHEBI', '(3S)-3-hydroxy-3-methylglutaryl-CoA'), abundance('CHEBI', 'NADPH'), abundance('CHEBI', 'hydron') ), products=( abundance('CHEBI', 'NADP(+)'), abundance('CHEBI', 'mevalonate') ) ), bioprocess('GOBP', 'cholesterol biosynthetic process'), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: SUBPROCESS_OF, }), (abundance('CHEBI', 'nitric oxide'), complex_abundance([protein('HGNC', 'ITGAV'), protein('HGNC', 'ITGB3')]), { EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification', CITATION: citation_2, RELATION: INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'intracellular'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'} } }, }), (complex_abundance([protein('HGNC', 'ITGAV'), protein('HGNC', 'ITGB3')]), protein('HGNC', 'ITGAV'), { RELATION: HAS_COMPONENT, }), (complex_abundance([protein('HGNC', 'ITGAV'), protein('HGNC', 'ITGB3')]), protein('HGNC', 'ITGB3'), { RELATION: HAS_COMPONENT, }), (gene('HGNC', 'ARRDC2'), gene('HGNC', 'ARRDC3'), { RELATION: EQUIVALENT_TO, }), (gene('HGNC', 'ARRDC3'), gene('HGNC', 'ARRDC2'), { RELATION: EQUIVALENT_TO, CITATION: citation_2, EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification' }), (gene('dbSNP', 'rs123456'), gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), { RELATION: ASSOCIATION, CITATION: citation_2, EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification' }), (gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')), gene('dbSNP', 'rs123456'), { RELATION: ASSOCIATION, CITATION: citation_2, EVIDENCE: 'These were all explicitly stated in the BEL 2.0 Specification' }), ] pybel-0.12.1/tests/constants.py000066400000000000000000000423251334645200200164430ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Constants for PyBEL tests.""" import logging import unittest from json import dumps from pybel import BELGraph from pybel.constants import ( ANNOTATIONS, ASSOCIATION, CITATION, CITATION_NAME, CITATION_REFERENCE, CITATION_TYPE, DECREASES, DIRECTLY_DECREASES, EVIDENCE, INCREASES, METADATA_AUTHORS, METADATA_DESCRIPTION, METADATA_LICENSES, METADATA_NAME, METADATA_VERSION, OPENBEL_ANNOTATION_RESOURCES, OPENBEL_NAMESPACE_RESOURCES, RELATION, ) from pybel.dsl import BaseEntity, complex_abundance, pathology, protein from pybel.dsl.namespaces import hgnc from pybel.parser.exc import ( BELSyntaxError, IllegalAnnotationValueWarning, InvalidCitationLengthException, InvalidCitationType, InvalidFunctionSemantic, InvalidPubMedIdentifierWarning, MalformedTranslocationWarning, MissingAnnotationKeyWarning, MissingAnnotationRegexWarning, MissingCitationException, MissingMetadataException, MissingNamespaceNameWarning, MissingNamespaceRegexWarning, MissingSupportWarning, NakedNameWarning, NestedRelationWarning, PlaceholderAminoAcidWarning, UndefinedAnnotationWarning, UndefinedNamespaceWarning, VersionFormatWarning, ) from pybel.parser.parse_bel import BELParser from pybel.utils import subdict_matches from tests.constant_helper import ( BEL_THOROUGH_EDGES, BEL_THOROUGH_NODES, citation_1, evidence_1, expected_test_simple_metadata, expected_test_thorough_metadata, ) log = logging.getLogger(__name__) test_citation_dict = { CITATION_TYPE: 'PubMed', CITATION_NAME: 'TestName', CITATION_REFERENCE: '1235813' } SET_CITATION_TEST = 'SET Citation = {{"{type}","{name}","{reference}"}}'.format(**test_citation_dict) test_evidence_text = 'I read it on Twitter' test_set_evidence = 'SET Evidence = "{}"'.format(test_evidence_text) HGNC_KEYWORD = 'HGNC' HGNC_URL = OPENBEL_NAMESPACE_RESOURCES + 'hgnc-human-genes.belns' MESH_DISEASES_KEYWORD = 'MeSHDisease' MESH_DISEASES_URL = OPENBEL_ANNOTATION_RESOURCES + "mesh-diseases.belanno" akt1 = hgnc(name='AKT1') egfr = hgnc(name='EGFR') fadd = hgnc(name='FADD') casp8 = hgnc(name='CASP8') def update_provenance(control_parser): """Put a default evidence and citation in a BEL parser. :param pybel.parser.parse_control.ControlParser control_parser: """ control_parser.citation.update(test_citation_dict) control_parser.evidence = test_evidence_text def assert_has_node(self, node, graph, **kwargs): """Check if a node with the given properties is contained within a graph. :param self: A Test Case :type self: unittest.TestCase :param node: :param graph: :type graph: BELGraph :param kwargs: """ self.assertIsInstance(node, BaseEntity) self.assertIn( node, graph, msg='{} not found in graph. Other nodes:\n{}'.format(node.as_bel(), '\n'.join( n.as_bel() for n in graph )), ) if kwargs: missing = set(kwargs) - set(graph.nodes[node]) self.assertFalse(missing, msg="Missing {} in node data".format(', '.join(sorted(missing)))) self.assertTrue(all(kwarg in graph.nodes[node] for kwarg in kwargs), msg="Missing kwarg in node data") self.assertEqual(kwargs, {k: graph.nodes[node][k] for k in kwargs}, msg="Wrong values in node data") def any_dict_matches(dict_of_dicts, query_dict): """ :param dict_of_dicts: :param query_dict: :return: """ return any( query_dict == sd for sd in dict_of_dicts.values() ) def any_subdict_matches(dict_of_dicts, query_dict): """Checks if dictionary target_dict matches one of the subdictionaries of a :param dict[any,dict] dict_of_dicts: dictionary of dictionaries :param dict query_dict: dictionary :return: if dictionary target_dict matches one of the subdictionaries of a :rtype: bool """ return any( subdict_matches(sub_dict, query_dict) for sub_dict in dict_of_dicts.values() ) def assert_has_edge(self, u, v, graph, permissive=True, **kwargs): """A helper function for checking if an edge with the given properties is contained within a graph :param unittest.TestCase self: A TestCase :param u: source node :type u: BaseEntity or tuple :param v: target node :type v: BaseEntity or tuple :param BELGraph graph: underlying graph """ self.assertIsInstance(u, BaseEntity) self.assertIsInstance(v, BaseEntity) self.assertTrue( graph.has_edge(u, v), msg='Edge ({}, {}) not in graph. Other edges:\n{}'.format(u, v, '\n'.join( '{} {} {}'.format(u.as_bel(), d[RELATION], v.as_bel()) for u, v, d in graph.edges(data=True) )) ) if not kwargs: return if permissive: matches = any_subdict_matches(graph[u][v], kwargs) else: matches = any_dict_matches(graph[u][v], kwargs) msg = 'No edge ({}, {}) with correct properties. expected:\n {}\nbut got:\n{}'.format( u, v, dumps(kwargs, indent=2, sort_keys=True), str(graph[u][v]) ) self.assertTrue(matches, msg=msg) class TestGraphMixin(unittest.TestCase): """A test case with additional functions for testing graphs.""" def assert_has_node(self, g, n, **kwargs): """Help assert node membership. :param g: Graph :param n: Node :param kwargs: """ assert_has_node(self, n, g, **kwargs) def assert_has_edge(self, g, u, v, **kwargs): """Help assert edge membership. :param g: Graph :param u: Source node :param v: Target node :param kwargs: """ assert_has_edge(self, u, v, g, **kwargs) class TestTokenParserBase(unittest.TestCase): """A test case that has a BEL parser available.""" @classmethod def setUpClass(cls): """Build a BEL graph and BEL parser that persist through the class.""" cls.graph = BELGraph() cls.parser = BELParser( cls.graph, autostreamline=False, disallow_unqualified_translocations=True, ) def setUp(self): """Clear the parser at the beginning of each test.""" self.parser.clear() def assert_has_node(self, member, **kwargs): """Assert that this test case's graph has the given node. :type member: tuple or BaseEntity """ assert_has_node(self, member, self.graph, **kwargs) def assert_has_edge(self, u, v, **kwargs): """Assert that this test case's graph has the given edge. :param u: source node :type u: BaseEntity or tuple :param v: target node :type v: BaseEntity or tuple """ assert_has_edge(self, u, v, self.graph, **kwargs) def add_default_provenance(self): """Add a default citation and evidence to the parser.""" update_provenance(self.parser.control_parser) def help_check_hgnc(test_case, namespace_dict): """Assert that the namespace dictionary is correct. :param unittest.TestCase test_case: :param namespace_dict: :return: """ test_case.assertIn(HGNC_KEYWORD, namespace_dict) test_case.assertIn('MHS2', namespace_dict[HGNC_KEYWORD]) test_case.assertEqual(set('G'), set(namespace_dict[HGNC_KEYWORD]['MHS2'])) test_case.assertIn('MIATNB', namespace_dict[HGNC_KEYWORD]) test_case.assertEqual(set('GR'), set(namespace_dict[HGNC_KEYWORD]['MIATNB'])) test_case.assertIn('MIA', namespace_dict[HGNC_KEYWORD]) test_case.assertEqual(set('GRP'), set(namespace_dict[HGNC_KEYWORD]['MIA'])) class BelReconstitutionMixin(TestGraphMixin): """A test case that has checks for properly loading several BEL Scripts.""" def bel_simple_reconstituted(self, graph, check_metadata=True): """Check that test_bel.bel was loaded properly. :param BELGraph graph: A BEL grpah :param bool check_metadata: Check the graph's document section is correct """ self.assertIsNotNone(graph) self.assertIsInstance(graph, BELGraph) if check_metadata: self.assertIsNotNone(graph.document) self.assertEqual(expected_test_simple_metadata[METADATA_NAME], graph.name) self.assertEqual(expected_test_simple_metadata[METADATA_VERSION], graph.version) self.assertEqual(4, graph.number_of_nodes()) # FIXME this should work, but is getting 8 for the upgrade function # self.assertEqual(6, graph.number_of_edges(), # msg='Edges:\n{}'.format('\n'.join(map(str, graph.edges(keys=True, data=True))))) for node in graph: self.assertIsInstance(node, BaseEntity) self.assertIn(akt1, graph) self.assertIn(egfr, graph) self.assertIn(fadd, graph) self.assertIn(casp8, graph) bel_simple_citation_1 = { CITATION_NAME: "That one article from last week", CITATION_REFERENCE: "123455", CITATION_TYPE: "PubMed" } bel_simple_citation_2 = { CITATION_NAME: "That other article from last week", CITATION_REFERENCE: "123456", CITATION_TYPE: "PubMed" } evidence_1_extra = "Evidence 1 w extra notes" evidence_2 = 'Evidence 2' evidence_3 = 'Evidence 3' assert_has_edge(self, akt1, egfr, graph, **{ RELATION: INCREASES, CITATION: bel_simple_citation_1, EVIDENCE: evidence_1_extra, ANNOTATIONS: { 'Species': {'9606': True} } }) assert_has_edge(self, egfr, fadd, graph, **{ RELATION: DECREASES, ANNOTATIONS: { 'Species': {'9606': True}, 'CellLine': {'10B9 cell': True} }, CITATION: bel_simple_citation_1, EVIDENCE: evidence_2 }) assert_has_edge(self, egfr, casp8, graph, **{ RELATION: DIRECTLY_DECREASES, ANNOTATIONS: { 'Species': {'9606': True}, 'CellLine': {'10B9 cell': True} }, CITATION: bel_simple_citation_1, EVIDENCE: evidence_2, }) assert_has_edge(self, fadd, casp8, graph, **{ RELATION: INCREASES, ANNOTATIONS: { 'Species': {'10116': True} }, CITATION: bel_simple_citation_2, EVIDENCE: evidence_3, }) assert_has_edge(self, akt1, casp8, graph, **{ RELATION: ASSOCIATION, ANNOTATIONS: { 'Species': {'10116': True} }, CITATION: bel_simple_citation_2, EVIDENCE: evidence_3, }) assert_has_edge(self, casp8, akt1, graph, **{ RELATION: ASSOCIATION, ANNOTATIONS: { 'Species': {'10116': True} }, CITATION: bel_simple_citation_2, EVIDENCE: evidence_3, }) def bel_thorough_reconstituted(self, graph, check_metadata=True, check_warnings=True, check_provenance=True, check_citation_name=True): """Check that thorough.bel was loaded properly. :param BELGraph graph: A BEL graph :param bool check_metadata: Check the graph's document section is correct :param bool check_warnings: Check the graph produced the expected warnings :param bool check_provenance: Check the graph's definition section is correct :param bool check_citation_name: Check that the names in the citations get reconstituted. This isn't strictly necessary since this data can be looked up """ self.assertIsNotNone(graph) self.assertIsInstance(graph, BELGraph) if check_warnings: self.assertEqual(0, len(graph.warnings), msg='Document warnings:\n{}'.format('\n'.join(map(str, graph.warnings)))) if check_metadata: self.assertLessEqual(set(expected_test_thorough_metadata), set(graph.document)) self.assertEqual(expected_test_thorough_metadata[METADATA_NAME], graph.name) self.assertEqual(expected_test_thorough_metadata[METADATA_VERSION], graph.version) self.assertEqual(expected_test_thorough_metadata[METADATA_DESCRIPTION], graph.description) if check_provenance: self.assertEqual({'CHEBI', 'HGNC', 'GOBP', 'GOCC', 'MESHD', 'TESTNS2'}, set(graph.namespace_url)) self.assertEqual({'dbSNP'}, set(graph.namespace_pattern)) self.assertEqual({'TESTAN1', 'TESTAN2'}, set(graph.annotation_list)) self.assertEqual({'TestRegex'}, set(graph.annotation_pattern)) for node in graph: self.assertIsInstance(node, BaseEntity) self.assertEqual(set(BEL_THOROUGH_NODES), set(graph), msg='Nodes not equal') # FIXME # self.assertEqual(set((u, v) for u, v, _ in e), set(g.edges())) self.assertLess(0, graph.number_of_edges()) for u, v, data in BEL_THOROUGH_EDGES: if not check_citation_name and CITATION in data and CITATION_NAME in data[CITATION]: data[CITATION] = data[CITATION].copy() del data[CITATION][CITATION_NAME] assert_has_edge(self, u, v, graph, permissive=True, **data) def bel_slushy_reconstituted(self, graph, check_metadata=True, check_warnings=True): """Check that slushy.bel was loaded properly. :param BELGraph graph: A BEL graph :param bool check_metadata: Check the graph's document section is correct :param bool check_warnings: Check the graph produced the expected warnings """ self.assertIsNotNone(graph) self.assertIsInstance(graph, BELGraph) if check_metadata: self.assertIsNotNone(graph.document) self.assertIsInstance(graph.document, dict) expected_test_slushy_metadata = { METADATA_NAME: "Worst. BEL Document. Ever.", METADATA_DESCRIPTION: "This document outlines all of the evil and awful work that is possible during BEL curation", METADATA_VERSION: "0.0", METADATA_AUTHORS: "Charles Tapley Hoyt", METADATA_LICENSES: "WTF License", } self.assertEqual(expected_test_slushy_metadata[METADATA_NAME], graph.name) self.assertEqual(expected_test_slushy_metadata[METADATA_VERSION], graph.version) self.assertEqual(expected_test_slushy_metadata[METADATA_DESCRIPTION], graph.description) if check_warnings: expected_warnings = [ (0, MissingMetadataException), (3, VersionFormatWarning), (26, MissingAnnotationKeyWarning), (29, MissingAnnotationKeyWarning), (34, InvalidCitationLengthException), (37, InvalidCitationType), (40, InvalidPubMedIdentifierWarning), (43, MissingCitationException), (48, MissingAnnotationKeyWarning), (51, MissingAnnotationKeyWarning), (54, MissingSupportWarning), (59, NakedNameWarning), (62, UndefinedNamespaceWarning), (65, MissingNamespaceNameWarning), (68, UndefinedAnnotationWarning), (71, MissingAnnotationKeyWarning), (74, IllegalAnnotationValueWarning), (77, MissingAnnotationRegexWarning), (80, MissingNamespaceRegexWarning), (83, MalformedTranslocationWarning), (86, PlaceholderAminoAcidWarning), (89, NestedRelationWarning), (92, InvalidFunctionSemantic), # (95, Exception), (98, BELSyntaxError), ] for (el, ew), (l, _, w, _) in zip(expected_warnings, graph.warnings): self.assertEqual(el, l, msg="Expected different error on line {}. Check line {}".format(el, l)) self.assertIsInstance(w, ew, msg='Line: {}'.format(el)) for node in graph: self.assertIsInstance(node, BaseEntity) self.assertIn(akt1, graph) self.assertIn(egfr, graph) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) assert_has_edge(self, akt1, egfr, graph, **{ RELATION: INCREASES, CITATION: citation_1, EVIDENCE: evidence_1, }) def bel_isolated_reconstituted(self, graph): """Run the isolated node test. :type graph: BELGraph """ self.assertIsNotNone(graph) self.assertIsInstance(graph, BELGraph) adgrb1 = protein(namespace='HGNC', name='ADGRB1') adgrb2 = protein(namespace='HGNC', name='ADGRB2') adgrb_complex = complex_abundance([adgrb1, adgrb2]) achlorhydria = pathology(namespace='MESHD', name='Achlorhydria') for node in graph: self.assertIsInstance(node, BaseEntity) self.assertIn(adgrb1, graph) self.assertIn(adgrb2, graph) self.assertIn(adgrb_complex, graph) self.assertIn(achlorhydria, graph) assert_has_edge(self, adgrb_complex, adgrb1, graph) assert_has_edge(self, adgrb_complex, adgrb2, graph) pybel-0.12.1/tests/test_canonicalization.py000066400000000000000000000274451334645200200210210ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for canonicalization functions.""" import unittest from pybel import BELGraph from pybel.canonicalize import _to_bel_lines_body, postpend_location from pybel.constants import BEL_DEFAULT_NAMESPACE, MODIFIER from pybel.dsl import ( abundance, activity, bioprocess, complex_abundance, composite_abundance, degradation, entity, extracellular, fragment, fusion_range, gene, gene_fusion, gmod, hgvs, intracellular, mirna, named_complex_abundance, pathology, pmod, protein, protein_substitution, reaction, rna, rna_fusion, secretion, translocation, ) from pybel.testing.utils import n from pybel.utils import canonicalize_edge class TestCanonicalize(unittest.TestCase): def test_postpend_location_failure(self): with self.assertRaises(ValueError): postpend_location('', dict(name='failure')) def test_canonicalize_variant_dsl(self): """Use the __str__ functions in the DSL to create BEL instead of external pybel.canonicalize.""" self.assertEqual('var("p.Val600Glu")', str(hgvs('p.Val600Glu'))) self.assertEqual('var("p.Val600Glu")', str(protein_substitution('Val', 600, 'Glu'))) self.assertEqual('pmod(Ph)', str(pmod('Ph'))) self.assertEqual('pmod(TEST:Ph)', str(pmod('Ph', namespace='TEST'))) self.assertEqual('pmod(TEST:Ph, Ser)', str(pmod('Ph', namespace='TEST', code='Ser'))) self.assertEqual('pmod(TEST:Ph, Ser, 5)', str(pmod('Ph', namespace='TEST', code='Ser', position=5))) self.assertEqual('pmod(GO:"protein phosphorylation", Thr, 308)', str(pmod(name='protein phosphorylation', namespace='GO', code='Thr', position=308))) self.assertEqual('frag("?")', str(fragment())) self.assertEqual('frag("672_713")', str(fragment(start=672, stop=713))) self.assertEqual('frag("?", "descr")', str(fragment(description='descr'))) self.assertEqual('frag("672_713", "descr")', str(fragment(start=672, stop=713, description='descr'))) self.assertEqual('gmod(Me)', str(gmod('Me'))) self.assertEqual('gmod(TEST:Me)', str(gmod('Me', namespace='TEST'))) self.assertEqual('gmod(GO:"DNA Methylation")', str(gmod('DNA Methylation', namespace='GO'))) def test_canonicalize_fusion_range_dsl(self): """Test canonicalization of enumerated fusion ranges.""" self.assertEqual('p.1_15', str(fusion_range('p', 1, 15))) self.assertEqual('p.*_15', str(fusion_range('p', '*', 15))) def test_abundance(self): """Test canonicalization of abundances.""" short = abundance(namespace='CHEBI', name='water') self.assertEqual('a(CHEBI:water)', str(short)) long = abundance(namespace='CHEBI', name='test name') self.assertEqual('a(CHEBI:"test name")', str(long)) def test_protein_reference(self): self.assertEqual('p(HGNC:AKT1)', str(protein(namespace='HGNC', name='AKT1'))) def test_gene_reference(self): node = gene(namespace='EGID', name='780') self.assertEqual('g(EGID:780)', str(node)) def test_protein_pmod(self): node = protein(name='PLCG1', namespace='HGNC', variants=[pmod(name='Ph', code='Tyr')]) self.assertEqual('p(HGNC:PLCG1, pmod(Ph, Tyr))', str(node)) def test_protein_fragment(self): node = protein(name='APP', namespace='HGNC', variants=[fragment(start=672, stop=713)]) self.assertEqual('p(HGNC:APP, frag("672_713"))', str(node)) def test_mirna_reference(self): self.assertEqual('m(HGNC:MIR1)', str(mirna(namespace='HGNC', name='MIR1'))) def test_rna_fusion_specified(self): node = rna_fusion( partner_5p=rna(namespace='HGNC', name='TMPRSS2'), range_5p=fusion_range('r', 1, 79), partner_3p=rna(namespace='HGNC', name='ERG'), range_3p=fusion_range('r', 312, 5034) ) self.assertEqual('r(fus(HGNC:TMPRSS2, "r.1_79", HGNC:ERG, "r.312_5034"))', str(node)) def test_rna_fusion_unspecified(self): node = rna_fusion( partner_5p=rna(namespace='HGNC', name='TMPRSS2'), partner_3p=rna(namespace='HGNC', name='ERG'), ) self.assertEqual('r(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))', str(node)) def test_gene_fusion_specified(self): node = gene_fusion( partner_5p=gene(namespace='HGNC', name='TMPRSS2'), range_5p=fusion_range('c', 1, 79), partner_3p=gene(namespace='HGNC', name='ERG'), range_3p=fusion_range('c', 312, 5034) ) self.assertEqual('g(fus(HGNC:TMPRSS2, "c.1_79", HGNC:ERG, "c.312_5034"))', str(node)) def test_pathology(self): node = pathology(namespace='DO', name='Alzheimer disease') self.assertEqual('path(DO:"Alzheimer disease")', str(node)) def test_bioprocess(self): node = bioprocess(namespace='GO', name='apoptosis') self.assertEqual('bp(GO:apoptosis)', str(node)) def test_named_complex_abundance(self): node = named_complex_abundance(namespace='SCOMP', name='Calcineurin Complex') self.assertEqual('complex(SCOMP:"Calcineurin Complex")', str(node)) def test_complex_abundance(self): node = complex_abundance(members=[protein(namespace='HGNC', name='FOS'), protein(namespace='HGNC', name='JUN')]) self.assertEqual('complex(p(HGNC:FOS), p(HGNC:JUN))', str(node)) def test_composite_abundance(self): node = composite_abundance(members=[ protein(namespace='HGNC', name='FOS'), protein(namespace='HGNC', name='JUN') ]) self.assertEqual('composite(p(HGNC:FOS), p(HGNC:JUN))', str(node)) def test_reaction(self): node = reaction( reactants=[abundance(namespace='CHEBI', name='A')], products=[abundance(namespace='CHEBI', name='B')] ) self.assertEqual('rxn(reactants(a(CHEBI:A)), products(a(CHEBI:B)))', str(node)) class TestCanonicalizeEdge(unittest.TestCase): """This class houses all testing for the canonicalization of edges such that the relation/modifications can be used as a second level hash""" def setUp(self): self.g = BELGraph() self.u = protein(name='u', namespace='TEST') self.v = protein(name='v', namespace='TEST') self.g.add_node_from_data(self.u) self.g.add_node_from_data(self.v) def get_data(self, k): return self.g[self.u][self.v][k] def add_edge(self, subject_modifier=None, object_modifier=None, annotations=None): key = self.g.add_increases( self.u, self.v, evidence=n(), citation=n(), subject_modifier=subject_modifier, object_modifier=object_modifier, annotations=annotations, ) return canonicalize_edge(self.get_data(key)) def test_failure(self): with self.assertRaises(ValueError): self.add_edge(subject_modifier={MODIFIER: 'nope'}) def test_canonicalize_edge_info(self): c1 = self.add_edge( annotations={ 'Species': '9606' } ) c2 = self.add_edge( annotations={ 'Species': '9606' } ) c3 = self.add_edge( subject_modifier=activity('tport'), ) c4 = self.add_edge( subject_modifier=activity('tport', namespace=BEL_DEFAULT_NAMESPACE), ) self.assertEqual(c1, c2) self.assertNotEqual(c1, c3) self.assertEqual(c3, c4) def test_subject_degradation_location(self): self.assertEqual( self.add_edge( subject_modifier=degradation() ), self.add_edge( subject_modifier=degradation() ) ) self.assertEqual( self.add_edge( subject_modifier=degradation(location=entity(name='somewhere', namespace='GOCC')) ), self.add_edge( subject_modifier=degradation(location=entity(name='somewhere', namespace='GOCC')) ) ) self.assertNotEqual( self.add_edge( subject_modifier=degradation() ), self.add_edge( subject_modifier=degradation(location=entity(name='somewhere', namespace='GOCC')) ) ) def test_translocation(self): self.assertEqual( self.add_edge(subject_modifier=secretion()), self.add_edge(subject_modifier=secretion()), ) self.assertEqual( self.add_edge(subject_modifier=secretion()), self.add_edge(subject_modifier=translocation(from_loc=intracellular, to_loc=extracellular)), ) class TestSerializeBEL(unittest.TestCase): def setUp(self): self.citation = n() self.evidence = n() self.url = n() self.graph = BELGraph() self.graph.namespace_url['HGNC'] = self.url def help_check_lines(self, lines): """Checks the given lines match the graph built during the tests :type lines: list[str] """ self.assertEqual(lines, list(_to_bel_lines_body(self.graph))) def test_simple(self): """Tests a scenario with a qualified edge, but no annotaitons""" self.graph.add_increases( protein(namespace='HGNC', name='YFG1'), protein(namespace='HGNC', name='YFG'), citation=self.citation, evidence=self.evidence ) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) expected_lines = [ '#' * 80, 'SET Citation = {{"PubMed", "{}"}}'.format(self.citation), 'SET SupportingText = "{}"'.format(self.evidence), 'p(HGNC:YFG1) increases p(HGNC:YFG)', 'UNSET SupportingText', 'UNSET Citation' ] self.help_check_lines(expected_lines) def test_single_annotation(self): """Tests a scenario with a qualified edge, but no annotaitons""" a1, v1 = map(lambda _: n(), range(2)) self.graph.add_increases( protein(namespace='HGNC', name='YFG1'), protein(namespace='HGNC', name='YFG'), citation=self.citation, evidence=self.evidence, annotations={ a1: {v1} } ) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) expected_lines = [ '#' * 80, 'SET Citation = {{"PubMed", "{}"}}'.format(self.citation), 'SET SupportingText = "{}"'.format(self.evidence), 'SET {} = "{}"'.format(a1, v1), 'p(HGNC:YFG1) increases p(HGNC:YFG)', 'UNSET {}'.format(a1), 'UNSET SupportingText', 'UNSET Citation' ] self.help_check_lines(expected_lines) def test_multiple_annotations(self): a1, v1, v2 = map(lambda _: n(), range(3)) v1, v2 = sorted([v1, v2]) self.graph.add_increases( protein(namespace='HGNC', name='YFG1'), protein(namespace='HGNC', name='YFG'), citation=self.citation, evidence=self.evidence, annotations={ a1: {v1, v2} } ) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) expected_lines = [ '#' * 80, 'SET Citation = {{"PubMed", "{}"}}'.format(self.citation), 'SET SupportingText = "{}"'.format(self.evidence), 'SET {} = {{"{}", "{}"}}'.format(a1, v1, v2), 'p(HGNC:YFG1) increases p(HGNC:YFG)', 'UNSET {}'.format(a1), 'UNSET SupportingText', 'UNSET Citation', ] self.help_check_lines(expected_lines) pybel-0.12.1/tests/test_cli.py000066400000000000000000000077771334645200200162510ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for the command line interface.""" import json import logging import os import traceback import unittest from click.testing import CliRunner from pybel import Manager, cli from pybel.constants import METADATA_NAME, PYBEL_CONTEXT_TAG from pybel.io import from_json, from_path, from_pickle from pybel.manager.database_io import from_database from pybel.testing.cases import FleetingTemporaryCacheMixin from pybel.testing.constants import test_bel_simple, test_bel_thorough from pybel.testing.mocks import mock_bel_resources from tests.constants import BelReconstitutionMixin, expected_test_thorough_metadata log = logging.getLogger(__name__) @unittest.skip class TestCli(FleetingTemporaryCacheMixin, BelReconstitutionMixin): def setUp(self): super(TestCli, self).setUp() self.runner = CliRunner() @mock_bel_resources def test_convert(self, mock_get): """Test conversion via the CLI.""" with self.runner.isolated_filesystem(): test_csv = os.path.abspath('test.csv') test_gpickle = os.path.abspath('test.gpickle') test_canon = os.path.abspath('test.bel') args = [ 'convert', # Input '--path', test_bel_thorough, '--connection', self.connection, # Outputs '--csv', test_csv, '--pickle', test_gpickle, '--bel', test_canon, '--store', '--allow-nested' ] result = self.runner.invoke(cli.main, args) self.assertEqual(0, result.exit_code, msg='{}\n{}\n{}'.format( result.exc_info[0], result.exc_info[1], traceback.format_tb(result.exc_info[2]) )) self.assertTrue(os.path.exists(test_csv)) self.bel_thorough_reconstituted(from_pickle(test_gpickle)) self.bel_thorough_reconstituted(from_path(test_canon)) manager = Manager(connection=self.connection) self.bel_thorough_reconstituted( from_database(expected_test_thorough_metadata[METADATA_NAME], manager=manager)) @mock_bel_resources def test_convert_json(self, mock_get): with self.runner.isolated_filesystem(): test_json = os.path.abspath('test.json') args = [ 'convert', '--path', test_bel_thorough, '--json', test_json, '--connection', self.connection, '--allow-nested' ] result = self.runner.invoke(cli.main, args) self.assertEqual(0, result.exit_code, msg=result.exc_info) with open(test_json) as f: self.bel_thorough_reconstituted(from_json(json.load(f))) @unittest.skipUnless('NEO_PATH' in os.environ, 'Need environmental variable $NEO_PATH') @mock_bel_resources def test_neo4j_remote(self, mock_get): from py2neo.database.status import GraphError from py2neo import Graph test_context = 'PYBEL_TEST_CTX' neo_path = os.environ['NEO_PATH'] try: neo = Graph(neo_path) neo.data('match (n)-[r]->() where r.{}="{}" detach delete n'.format(PYBEL_CONTEXT_TAG, test_context)) except GraphError: self.skipTest("Can't query Neo4J ") except: self.skipTest("Can't connect to Neo4J server") else: with self.runner.isolated_filesystem(): args = [ 'convert', '--path', test_bel_simple, '--connection', self.connection, '--neo', neo_path, '--neo-context', test_context ] self.runner.invoke(cli.main, args) q = 'match (n)-[r]->() where r.{}="{}" return count(n) as count'.format(PYBEL_CONTEXT_TAG, test_context) count = neo.data(q)[0]['count'] self.assertEqual(14, count) pybel-0.12.1/tests/test_dsl.py000066400000000000000000000143161334645200200162470ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for the internal DSL.""" import unittest from pybel import BELGraph from pybel.constants import NAME from pybel.dsl import ( abundance, complex_abundance, entity, fragment, fusion_range, gene, gene_fusion, missing_fusion_range, protein, ) from pybel.testing.utils import n from pybel.utils import ensure_quotes class TestDSL(unittest.TestCase): """Tests for the internal DSL.""" def test_add_robust_node(self): """Test adding a node with both a name and identifier.""" graph = BELGraph() namespace, name, identifier = n(), n(), n() node = protein(namespace=namespace, name=name, identifier=identifier) graph.add_node_from_data(node) self.assertIn(node, graph) def test_add_identified_node(self): """Test what happens when a node with only an identifier is added to a graph.""" graph = BELGraph() namespace, identifier = n(), n() node = protein(namespace=namespace, identifier=identifier) self.assertNotIn(NAME, node) graph.add_node_from_data(node) self.assertIn(node, graph) def test_add_named_node(self): """Test adding a named node to a BEL graph.""" graph = BELGraph() namespace, name = n(), n() node = protein(namespace=namespace, name=name) graph.add_node_from_data(node) self.assertIn(node, graph) def test_missing_information(self): """Test that entity and abundance functions raise on missing name/identifier.""" with self.assertRaises(ValueError): entity(namespace='test') with self.assertRaises(ValueError): protein(namespace='test') def test_abundance_as_bel_quoted(self): """Test converting an abundance to BEL with a name that needs quotation.""" namespace, name = 'HGNC', 'YFG-1' node = abundance(namespace=namespace, name=name) self.assertEqual('a(HGNC:"YFG-1")', node.as_bel()) def test_abundance_as_bel(self): """Test converting an abundance to BEL with a name that does not need quotation.""" namespace, name = 'HGNC', 'YFG' node = abundance(namespace=namespace, name=name) self.assertEqual('a(HGNC:YFG)', node.as_bel()) def test_str_has_identifier(self): namespace, identifier = n(), n() node = abundance(namespace=namespace, identifier=identifier) self.assertEqual( 'a({namespace}:{identifier})'.format(namespace=namespace, identifier=ensure_quotes(identifier)), node.as_bel()) def test_str_has_both(self): namespace, identifier = n(), n() node = abundance(namespace=namespace, identifier=identifier) self.assertEqual( 'a({namespace}:{identifier})'.format(namespace=namespace, identifier=ensure_quotes(identifier)), node.as_bel()) def test_as_tuple(self): namespace, name = n(), n() node = abundance(namespace=namespace, name=name) self.assertEqual(hash(node), hash(node.as_bel())) def test_complex_with_name(self): """Test what happens with a named complex. .. code-block:: complex(SCOMP:"9-1-1 Complex") hasComponent p(HGNC:HUS1) complex(SCOMP:"9-1-1 Complex") hasComponent p(HGNC:RAD1) complex(SCOMP:"9-1-1 Complex") hasComponent p(HGNC:RAD9A) """ hus1 = protein(namespace='HGNC', name='HUS1') rad1 = protein(namespace='HGNC', name='RAD1') rad9a = protein(namespace='HGNC', name='RAD9A') members = [hus1, rad1, rad9a] nine_one_one = complex_abundance(members=members, namespace='SCOMP', name='9-1-1 Complex') graph = BELGraph() graph.add_node_from_data(nine_one_one) self.assertIn(nine_one_one, graph) self.assertIn(hus1, graph) self.assertIn(rad1, graph) self.assertIn(rad9a, graph) def test_gene_fusion(self): """Test serialization of a gene fusion to BEL with a explicit fusion ranges.""" dsl = gene_fusion( gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), fusion_range('c', 1, 79), fusion_range('c', 312, 5034) ) self.assertEqual('g(fus(HGNC:TMPRSS2, "c.1_79", HGNC:ERG, "c.312_5034"))', dsl.as_bel()) def test_gene_fusion_missing_implicit(self): """Test serialization of a gene fusion to BEL with a implicit missing fusion ranges.""" dsl = gene_fusion( gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), ) self.assertEqual('g(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))', dsl.as_bel()) def test_gene_fusion_missing_explicit(self): """Test serialization of a gene fusion to BEL with an explicit missing fusion ranges.""" dsl = gene_fusion( gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), missing_fusion_range(), missing_fusion_range(), ) self.assertEqual('g(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))', dsl.as_bel()) class TestCentralDogma(unittest.TestCase): """Test functions specific for :class:`CentralDogmaAbundance`s.""" def test_get_parent(self): """Test the get_parent function in :class:`CentralDogmaAbundance`s.""" ab42 = protein(name='APP', namespace='HGNC', variants=[fragment(start=672, stop=713)]) app = ab42.get_parent() self.assertEqual('p(HGNC:APP)', app.as_bel()) self.assertEqual('p(HGNC:APP, frag("672_713"))', ab42.as_bel()) def test_with_variants(self): """Test the `with_variant` function in :class:`CentralDogmaAbundance`s.""" app = protein(name='APP', namespace='HGNC') ab42 = app.with_variants(fragment(start=672, stop=713)) self.assertEqual('p(HGNC:APP)', app.as_bel()) self.assertEqual('p(HGNC:APP, frag("672_713"))', ab42.as_bel()) def test_with_variants_list(self): """Test the `with_variant` function in :class:`CentralDogmaAbundance`s.""" app = protein(name='APP', namespace='HGNC') ab42 = app.with_variants([fragment(start=672, stop=713)]) self.assertEqual('p(HGNC:APP)', app.as_bel()) self.assertEqual('p(HGNC:APP, frag("672_713"))', ab42.as_bel()) if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_io/000077500000000000000000000000001334645200200155155ustar00rootroot00000000000000pybel-0.12.1/tests/test_io/__init__.py000066400000000000000000000000721334645200200176250ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.io`.""" pybel-0.12.1/tests/test_io/test_import.py000066400000000000000000000323151334645200200204440ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for input and output.""" import logging import os import tempfile import unittest from pathlib import Path from six import BytesIO, StringIO from pybel import ( BELGraph, from_bytes, from_json, from_json_file, from_jsons, from_lines, from_path, from_pickle, from_url, to_bel_lines, to_bytes, to_csv, to_graphml, to_gsea, to_json, to_json_file, to_jsons, to_pickle, to_sif, ) from pybel.constants import ( ANNOTATIONS, CITATION, DECREASES, DIRECTLY_DECREASES, EVIDENCE, GENE, GRAPH_PYBEL_VERSION, INCREASES, PYBEL_MINIMUM_IMPORT_VERSION, RELATION, ) from pybel.dsl import BaseEntity, gene from pybel.examples import sialic_acid_graph from pybel.io.exc import ImportVersionWarning, import_version_message_fmt from pybel.parser import BELParser from pybel.parser.exc import InvalidFunctionSemantic, MissingCitationException, MissingNamespaceRegexWarning from pybel.struct.summary import get_syntax_errors from pybel.testing.cases import TemporaryCacheClsMixin from pybel.testing.constants import ( test_bel_isolated, test_bel_misordered, test_bel_simple, test_bel_slushy, test_bel_thorough, ) from pybel.testing.mocks import mock_bel_resources from tests.constants import ( BelReconstitutionMixin, TestTokenParserBase, akt1, casp8, citation_1, egfr, evidence_1, fadd, test_citation_dict, test_evidence_text, test_set_evidence, ) logging.getLogger('requests').setLevel(logging.WARNING) log = logging.getLogger(__name__) testan1 = '1' class TestExampleInterchange(unittest.TestCase): """Test round-trip interchange of the sialic acid graph example.""" def help_test_equal(self, graph): """Check that a graph is equal to the sialic acid graph example. :type graph: pybel.BELGraph """ for node in graph: self.assertIsInstance(node, BaseEntity) self.assertEqual(set(sialic_acid_graph), set(graph)) self.assertEqual(set(sialic_acid_graph.edges()), set(graph.edges())) def test_example_bytes(self): """Test the round-trip through bytes.""" graph_bytes = to_bytes(sialic_acid_graph) graph = from_bytes(graph_bytes) self.help_test_equal(graph) def test_example_pickle(self): """Test the round-trip through a pickle.""" bio = BytesIO() to_pickle(sialic_acid_graph, bio) bio.seek(0) graph = from_pickle(bio) self.help_test_equal(graph) def test_thorough_json(self): """Test the round-trip through node-link JSON.""" graph_json_dict = to_json(sialic_acid_graph) graph = from_json(graph_json_dict) self.help_test_equal(graph) def test_thorough_jsons(self): """Test the round-trip through a node-link JSON string.""" graph_json_str = to_jsons(sialic_acid_graph) graph = from_jsons(graph_json_str) self.help_test_equal(graph) def test_thorough_json_file(self): """Test the round-trip through a node-link JSON file.""" sio = StringIO() to_json_file(sialic_acid_graph, sio) sio.seek(0) graph = from_json_file(sio) self.help_test_equal(graph) class TestInterchange(TemporaryCacheClsMixin, BelReconstitutionMixin): @classmethod def setUpClass(cls): """Set up this class with several pre-loaded BEL graphs.""" super(TestInterchange, cls).setUpClass() with mock_bel_resources: cls.thorough_graph = from_path(test_bel_thorough, manager=cls.manager, allow_nested=True) cls.slushy_graph = from_path(test_bel_slushy, manager=cls.manager, disallow_unqualified_translocations=True) cls.simple_graph = from_url(Path(test_bel_simple).as_uri(), manager=cls.manager) cls.isolated_graph = from_path(test_bel_isolated, manager=cls.manager) cls.misordered_graph = from_path(test_bel_misordered, manager=cls.manager, citation_clearing=False) def test_thorough_path(self): self.bel_thorough_reconstituted(self.thorough_graph) def test_thorough_bytes(self): graph_bytes = to_bytes(self.thorough_graph) graph = from_bytes(graph_bytes) self.bel_thorough_reconstituted(graph) def test_thorough_pickle(self): bio = BytesIO() to_pickle(self.thorough_graph, bio) bio.seek(0) graph = from_pickle(bio) self.bel_thorough_reconstituted(graph) def test_thorough_json(self): graph_json_dict = to_json(self.thorough_graph) graph = from_json(graph_json_dict) self.bel_thorough_reconstituted(graph) def test_thorough_jsons(self): graph_json_str = to_jsons(self.thorough_graph) graph = from_jsons(graph_json_str) self.bel_thorough_reconstituted(graph) def test_thorough_json_file(self): sio = StringIO() to_json_file(self.thorough_graph, sio) sio.seek(0) graph = from_json_file(sio) self.bel_thorough_reconstituted(graph) def test_thorough_graphml(self): handle, path = tempfile.mkstemp() with open(path, 'wb') as f: to_graphml(self.thorough_graph, f) os.close(handle) os.remove(path) def test_thorough_csv(self): handle, path = tempfile.mkstemp() with open(path, 'w') as f: to_csv(self.thorough_graph, f) os.close(handle) os.remove(path) def test_thorough_sif(self): handle, path = tempfile.mkstemp() with open(path, 'w') as f: to_sif(self.thorough_graph, f) os.close(handle) os.remove(path) def test_thorough_gsea(self): handle, path = tempfile.mkstemp() with open(path, 'w') as f: to_gsea(self.thorough_graph, f) os.close(handle) os.remove(path) def test_thorough_upgrade(self): lines = to_bel_lines(self.thorough_graph) reconstituted = from_lines(lines, manager=self.manager) self.bel_thorough_reconstituted(reconstituted, check_citation_name=False) def test_slushy(self): self.bel_slushy_reconstituted(self.slushy_graph) def test_slushy_bytes(self): graph_bytes = to_bytes(self.slushy_graph) graph = from_bytes(graph_bytes) self.bel_slushy_reconstituted(graph) def test_slushy_syntax_errors(self): syntax_errors = get_syntax_errors(self.slushy_graph) self.assertEqual(1, len(syntax_errors)) self.assertEqual(98, syntax_errors[0][0]) def test_slushy_json(self): graph_json = to_json(self.slushy_graph) graph = from_json(graph_json) self.bel_slushy_reconstituted(graph) def test_slushy_graphml(self): handle, path = tempfile.mkstemp() with open(path, 'wb') as f: to_graphml(self.slushy_graph, f) os.close(handle) os.remove(path) def test_simple_compile(self): self.bel_simple_reconstituted(self.simple_graph) def test_isolated_compile(self): self.bel_isolated_reconstituted(self.isolated_graph) def test_isolated_upgrade(self): lines = to_bel_lines(self.isolated_graph) with mock_bel_resources: reconstituted = from_lines(lines, manager=self.manager) self.bel_isolated_reconstituted(reconstituted) def test_misordered_compile(self): """Test that non-citation clearing mode works.""" self.assertEqual(4, self.misordered_graph.number_of_nodes()) self.assertEqual(3, self.misordered_graph.number_of_edges()) e1 = { RELATION: INCREASES, CITATION: citation_1, EVIDENCE: evidence_1, ANNOTATIONS: { 'TESTAN1': {testan1: True} } } self.assert_has_edge(self.misordered_graph, akt1, egfr, **e1) e2 = { RELATION: DECREASES, CITATION: citation_1, EVIDENCE: evidence_1, ANNOTATIONS: { 'TESTAN1': {testan1: True} } } self.assert_has_edge(self.misordered_graph, egfr, fadd, **e2) e3 = { RELATION: DIRECTLY_DECREASES, CITATION: citation_1, EVIDENCE: evidence_1, ANNOTATIONS: { 'TESTAN1': {testan1: True} } } self.assert_has_edge(self.misordered_graph, egfr, casp8, **e3) namespaces = { 'TESTNS': { "1": "GRP", "2": "GRP" } } annotations = { 'TestAnnotation1': {'A', 'B', 'C'}, 'TestAnnotation2': {'X', 'Y', 'Z'}, 'TestAnnotation3': {'D', 'E', 'F'} } class TestFull(TestTokenParserBase): @classmethod def setUpClass(cls): cls.graph = BELGraph() cls.parser = BELParser( cls.graph, namespace_dict=namespaces, annotation_dict=annotations, namespace_regex={'dbSNP': 'rs[0-9]*'} ) def test_regex_match(self): line = 'g(dbSNP:rs10234) -- g(dbSNP:rs10235)' self.add_default_provenance() self.parser.parseString(line) self.assertIn(gene('dbSNP', 'rs10234'), self.parser.graph) self.assertIn(gene('dbSNP', 'rs10235'), self.parser.graph) def test_regex_mismatch(self): line = 'g(dbSNP:10234) -- g(dbSNP:rr10235)' with self.assertRaises(MissingNamespaceRegexWarning): self.parser.parseString(line) def test_semantic_failure(self): statement = "bp(TESTNS:1) -- p(TESTNS:2)" with self.assertRaises(InvalidFunctionSemantic): self.parser.parseString(statement) def test_missing_citation(self): statements = [ test_set_evidence, 'SET TestAnnotation1 = "A"', 'SET TestAnnotation2 = "X"', 'g(TESTNS:1) -> g(TESTNS:2)' ] with self.assertRaises(MissingCitationException): self.parser.parse_lines(statements) def test_annotations(self): self.add_default_provenance() statements = [ 'SET TestAnnotation1 = "A"', 'SET TestAnnotation2 = "X"', 'g(TESTNS:1) -> g(TESTNS:2)' ] self.parser.parse_lines(statements) test_node_1 = gene(namespace='TESTNS', name='1') test_node_2 = gene(namespace='TESTNS', name='2') self.assertEqual(2, self.graph.number_of_nodes()) self.assertIn(test_node_1, self.graph) self.assertIn(test_node_2, self.graph) self.assertEqual(1, self.parser.graph.number_of_edges()) kwargs = { ANNOTATIONS: { 'TestAnnotation1': {'A': True}, 'TestAnnotation2': {'X': True}, }, EVIDENCE: test_evidence_text, CITATION: test_citation_dict } self.assert_has_edge(test_node_1, test_node_2, **kwargs) def test_annotations_with_list(self): self.add_default_provenance() statements = [ 'SET TestAnnotation1 = {"A","B"}', 'SET TestAnnotation2 = "X"', 'g(TESTNS:1) -> g(TESTNS:2)' ] self.parser.parse_lines(statements) test_node_1_dict = gene(namespace='TESTNS', name='1') test_node_2_dict = gene(namespace='TESTNS', name='2') self.assertEqual(2, self.parser.graph.number_of_nodes()) self.assertIn(test_node_1_dict, self.graph) self.assertIn(test_node_2_dict, self.graph) self.assertEqual(1, self.parser.graph.number_of_edges()) kwargs = { ANNOTATIONS: { 'TestAnnotation1': {'A': True, 'B': True}, 'TestAnnotation2': {'X': True} }, CITATION: test_citation_dict } self.assert_has_edge(test_node_1_dict, test_node_2_dict, **kwargs) def test_annotations_with_multilist(self): self.add_default_provenance() statements = [ 'SET TestAnnotation1 = {"A","B"}', 'SET TestAnnotation2 = "X"', 'SET TestAnnotation3 = {"D","E"}', 'g(TESTNS:1) -> g(TESTNS:2)' ] self.parser.parse_lines(statements) test_node_1_dict = gene(namespace='TESTNS', name='1') test_node_2_dict = gene(namespace='TESTNS', name='2') self.assertEqual(2, self.parser.graph.number_of_nodes()) self.assertIn(test_node_1_dict, self.graph) self.assertIn(test_node_2_dict, self.graph) self.assertEqual(1, self.parser.graph.number_of_edges()) kwargs = { ANNOTATIONS: { 'TestAnnotation1': {'A': True, 'B': True}, 'TestAnnotation2': {'X': True}, 'TestAnnotation3': {'D': True, 'E': True} }, CITATION: test_citation_dict } self.assert_has_edge(test_node_1_dict, test_node_2_dict, **kwargs) class TestRandom(unittest.TestCase): def test_import_warning(self): """Tests an error is thrown when the version is set wrong""" graph = BELGraph() # Much with stuff that would normally be set graph.graph[GRAPH_PYBEL_VERSION] = '0.0.0' graph_bytes = to_bytes(graph) with self.assertRaises(ImportVersionWarning) as cm: from_bytes(graph_bytes) self.assertEqual( import_version_message_fmt.format('0.0.0', PYBEL_MINIMUM_IMPORT_VERSION), str(cm.exception) ) if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_io/test_jgif.py000066400000000000000000000115421334645200200200500ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for interchange with JGIF.""" from __future__ import unicode_literals import json import logging import unittest from pybel import from_cbn_jgif, to_jgif from pybel.constants import ( ACTIVITY, ANNOTATIONS, BEL_DEFAULT_NAMESPACE, CITATION, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_OTHER, CITATION_TYPE_PUBMED, DECREASES, DIRECTLY_INCREASES, EFFECT, EVIDENCE, MODIFIER, NAME, NAMESPACE, OBJECT, RELATION, ) from pybel.dsl import Abundance, BiologicalProcess, ComplexAbundance, NamedComplexAbundance, Pathology, Protein, pmod from pybel.testing.constants import test_jgif_path from tests.constants import TestGraphMixin logging.getLogger('pybel.parser').setLevel(20) calcium = Abundance('SCHEM', 'Calcium') calcineurin_complex = NamedComplexAbundance('SCOMP', 'Calcineurin Complex') foxo3 = Protein('HGNC', 'FOXO3') tcell_proliferation = BiologicalProcess('GOBP', 'CD8-positive, alpha-beta T cell proliferation') il15 = Protein('HGNC', 'IL15') il2rg = Protein('MGI', 'Il2rg') jgif_expected_nodes = { calcium, calcineurin_complex, foxo3, tcell_proliferation, il15, il2rg, Protein('HGNC', 'CXCR6'), Protein('HGNC', 'IL15RA'), BiologicalProcess('GOBP', 'lymphocyte chemotaxis'), Protein('HGNC', 'IL2RG'), Protein('HGNC', 'ZAP70'), NamedComplexAbundance('SCOMP', 'T Cell Receptor Complex'), BiologicalProcess('GOBP', 'T cell activation'), Protein('HGNC', 'CCL3'), Protein('HGNC', 'PLCG1'), Protein('HGNC', 'FASLG'), Protein('HGNC', 'IDO1'), Protein('HGNC', 'IL2'), Protein('HGNC', 'CD8A'), Protein('HGNC', 'CD8B'), Protein('HGNC', 'PLCG1'), Protein('HGNC', 'BCL2'), Protein('HGNC', 'CCR3'), Protein('HGNC', 'IL2RB'), Protein('HGNC', 'CD28'), Pathology('SDIS', 'Cytotoxic T-cell activation'), Protein('HGNC', 'FYN'), Protein('HGNC', 'CXCL16'), Protein('HGNC', 'CCR5'), Protein('HGNC', 'LCK'), Protein('SFAM', 'Chemokine Receptor Family'), Protein('HGNC', 'CXCL9'), Pathology('SDIS', 'T-cell migration'), Protein('HGNC', 'CXCR3'), Abundance('CHEBI', 'acrolein'), Protein('HGNC', 'IDO2'), Pathology('MESHD', 'Pulmonary Disease, Chronic Obstructive'), Protein('HGNC', 'IFNG'), Protein('HGNC', 'TNFRSF4'), Protein('HGNC', 'CTLA4'), Protein('HGNC', 'GZMA'), Protein('HGNC', 'PRF1'), Protein('HGNC', 'TNF'), Protein('SFAM', 'Chemokine Receptor Family'), ComplexAbundance([Protein('HGNC', 'CD8A'), Protein('HGNC', 'CD8B')]), ComplexAbundance([Protein('HGNC', 'CD8A'), Protein('HGNC', 'CD8B')]), Protein('HGNC', 'PLCG1', variants=pmod('Ph', 'Tyr')), Protein('EGID', '21577'), } jgif_expected_edges = [ (calcium, calcineurin_complex, { RELATION: DIRECTLY_INCREASES, EVIDENCE: 'NMDA-mediated influx of calcium led to activated of the calcium-dependent phosphatase calcineurin and the subsequent dephosphorylation and activation of the protein-tyrosine phosphatase STEP', CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: '12483215' }, OBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'phos'}}, ANNOTATIONS: { 'Species': {'10116': True}, 'Cell': {'neuron': True} } }), (foxo3, tcell_proliferation, { RELATION: DECREASES, EVIDENCE: "\"These data suggested that FOXO3 downregulates the accumulation of CD8 T cells in tissue specific fashion during an acute LCMV [lymphocytic choriomeningitis virus] infection.\" (p. 3)", CITATION: { CITATION_TYPE: CITATION_TYPE_OTHER, CITATION_REFERENCE: "22359505" }, ANNOTATIONS: { 'Species': {'10090': True}, 'Disease': {'Viral infection': True} } }), (il15, il2rg, { RELATION: DIRECTLY_INCREASES, EVIDENCE: "IL-15 utilizes ... the common cytokine receptor γ-chain (CD132) for signal transduction in lymphocytes", CITATION: { CITATION_TYPE: CITATION_TYPE_OTHER, CITATION_REFERENCE: "20335267" }, OBJECT: {MODIFIER: ACTIVITY, EFFECT: {NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'cat'}}, ANNOTATIONS: { 'Tissue': {'lung': True} } }) ] class TestJgif(TestGraphMixin): """Tests data interchange of """ def test_jgif_interchange(self): """Tests data from CBN""" with open(test_jgif_path) as f: graph_jgif_dict = json.load(f) graph = from_cbn_jgif(graph_jgif_dict) self.assertEqual(jgif_expected_nodes, set(graph)) for u, v, d in jgif_expected_edges: self.assert_has_edge(graph, u, v, **d) # TODO test more thoroughly? export_jgif = to_jgif(graph) self.assertIsInstance(export_jgif, dict) if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_manager/000077500000000000000000000000001334645200200165205ustar00rootroot00000000000000pybel-0.12.1/tests/test_manager/__init__.py000066400000000000000000000000771334645200200206350ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.manager`.""" pybel-0.12.1/tests/test_manager/test_citation_utils.py000066400000000000000000000126651334645200200231750ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test the manager's citation utilities.""" from __future__ import unicode_literals import os import unittest import time from pybel import BELGraph from pybel.constants import ( CITATION, CITATION_AUTHORS, CITATION_DATE, CITATION_NAME, CITATION_TYPE_PUBMED, ) from pybel.dsl import protein from pybel.manager.citation_utils import enrich_pubmed_citations, get_citations_by_pmids, sanitize_date from pybel.manager.models import Citation from pybel.testing.cases import TemporaryCacheMixin from pybel.testing.utils import n class TestSanitizeDate(unittest.TestCase): """Test sanitization of dates in various formats.""" def test_sanitize_1(self): """Test YYYY Mon DD.""" self.assertEqual('2012-12-19', sanitize_date('2012 Dec 19')) def test_sanitize_2(self): """Test YYYY Mon.""" self.assertEqual('2012-12-01', sanitize_date('2012 Dec')) def test_sanitize_3(self): """Test YYYY.""" self.assertEqual('2012-01-01', sanitize_date('2012')) def test_sanitize_4(self): """Test YYYY Mon-Mon.""" self.assertEqual('2012-10-01', sanitize_date('2012 Oct-Dec')) def test_sanitize_5(self): """Test YYYY Season.""" self.assertEqual('2012-03-01', sanitize_date('2012 Spring')) def test_sanitize_6(self): """Test YYYY Mon DD-DD.""" self.assertEqual('2012-12-12', sanitize_date('2012 Dec 12-15')) def test_sanitize_7(self): """Test YYYY Mon DD-Mon DD.""" self.assertEqual('2005-01-29', sanitize_date('2005 Jan 29-Feb 4')) def test_sanitize_nope(self): """Test failure.""" self.assertEqual(None, sanitize_date('2012 Early Spring')) class TestCitations(TemporaryCacheMixin): """Tests for citations.""" def setUp(self): super(TestCitations, self).setUp() self.u, self.v = (protein(n(), n()) for _ in range(2)) self.pmid = "9611787" self.graph = BELGraph() self.graph.add_increases(self.u, self.v, citation=self.pmid, evidence=n()) def test_enrich(self): """""" self.assertEqual(0, self.manager.count_citations()) get_citations_by_pmids(manager=self.manager, pmids=[self.pmid]) self.assertEqual(1, self.manager.count_citations()) c = self.manager.get_citation_by_pmid(self.pmid) self.assertIsNotNone(c) self.assertIsInstance(c, Citation) self.assertEqual(CITATION_TYPE_PUBMED, c.type) self.assertEqual(self.pmid, c.reference) def test_enrich_list(self): pmids = [ '25818332', '27003210', '26438529', '26649137', ] get_citations_by_pmids(manager=self.manager, pmids=pmids) citation = self.manager.get_or_create_citation(type=CITATION_TYPE_PUBMED, reference='25818332') self.assertIsNotNone(citation) def test_enrich_list_grouped(self): pmids = [ '25818332', '27003210', '26438529', '26649137', ] get_citations_by_pmids(manager=self.manager, pmids=pmids, group_size=2) citation = self.manager.get_citation_by_pmid('25818332') self.assertIsNotNone(citation) def test_enrich_overwrite(self): citation = self.manager.get_or_create_citation(type=CITATION_TYPE_PUBMED, reference=self.pmid) self.manager.session.commit() self.assertIsNone(citation.date) self.assertIsNone(citation.name) enrich_pubmed_citations(manager=self.manager, graph=self.graph) _, _, d = list(self.graph.edges(data=True))[0] citation_dict = d[CITATION] self.assertIn(CITATION_NAME, citation_dict) self.assertIn(CITATION_DATE, citation_dict) self.assertEqual('1998-05-01', citation_dict[CITATION_DATE]) self.assertIn(CITATION_AUTHORS, citation_dict) self.assertEqual( {'Lewell XQ', 'Judd DB', 'Watson SP', 'Hann MM'}, set(citation_dict[CITATION_AUTHORS]) ) def test_enrich_graph(self): enrich_pubmed_citations(manager=self.manager, graph=self.graph) _, _, d = list(self.graph.edges(data=True))[0] citation_dict = d[CITATION] self.assertIn(CITATION_NAME, citation_dict) self.assertIn(CITATION_DATE, citation_dict) self.assertEqual('1998-05-01', citation_dict[CITATION_DATE]) self.assertIn(CITATION_AUTHORS, citation_dict) self.assertEqual( {'Lewell XQ', 'Judd DB', 'Watson SP', 'Hann MM'}, set(citation_dict[CITATION_AUTHORS]) ) @unittest.skipIf(os.environ.get('DB') == 'mysql', reason='MySQL collation is wonky') def test_accent_duplicate(self): """Test when two authors, Gomez C and Goméz C are both checked that they are not counted as duplicates.""" g1 = 'Gomez C' g2 = 'Gómez C' pmid_1, pmid_2 = pmids = [ '29324713', '29359844', ] get_citations_by_pmids(manager=self.manager, pmids=pmids) time.sleep(1) x = self.manager.get_citation_by_pmid(pmid_1) self.assertIsNotNone(x) self.assertEqual('Martínez-Guillén JR', x.first.name) self.assertIn(g1, self.manager.object_cache_author) self.assertIn(g2, self.manager.object_cache_author) a1 = self.manager.get_author_by_name(g1) self.assertEqual(g1, a1.name) a2 = self.manager.get_author_by_name(g2) self.assertEqual(g2, a2.name) pybel-0.12.1/tests/test_manager/test_connection.py000066400000000000000000000055271334645200200223010ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for instantiating the manager""" import os import tempfile import unittest from pybel import Manager from pybel.manager.base_manager import build_engine_session try: from unittest import mock except ImportError: import mock class TestInstantiation(unittest.TestCase): """Allows for testing with a consistent connection without changing the configuration.""" def setUp(self): """Add two class-level variables: ``mock_global_connection`` and ``mock_module_connection`` that can be used as context managers to mock the bio2bel connection getter functions.""" self.fd, self.path = tempfile.mkstemp() self.connection = 'sqlite:///' + self.path def mock_connection(): """Get the connection enclosed by this class. :rtype: str """ return self.connection self.mock_connection = mock.patch('pybel.manager.cache_manager.get_cache_connection', mock_connection) def tearDown(self): os.close(self.fd) os.remove(self.path) def test_fail_connection_none(self): """Test that a None causes a huge error.""" with self.assertRaises(ValueError): build_engine_session(None) def test_instantiate_init(self): """Test what happens when no connection is specified for the normal constructor.""" with self.mock_connection: manager = Manager() self.assertEqual(self.connection, str(manager.engine.url)) def test_instantiate_manager_positional(self): manager = Manager(self.connection) self.assertEqual(self.connection, str(manager.engine.url)) def test_instantiate_manager_positional_with_keyword(self): manager = Manager(self.connection, echo=False) self.assertEqual(self.connection, str(manager.engine.url)) def test_instantiate_manager_fail_positional(self): with self.assertRaises(ValueError): Manager(self.connection, True) def test_instantiate_manager_keyword(self): manager = Manager(connection=self.connection) self.assertEqual(self.connection, str(manager.engine.url)) def test_instantiate_manager_connection_fail_too_many_keyword(self): with self.assertRaises(ValueError): Manager(connection=self.connection, engine='something', session='something') def test_instantiate_manager_engine_fail_too_many_keywords(self): with self.assertRaises(ValueError): Manager(engine='something', session='something', echo=False) def test_instantiate_manager_engine_missing(self): with self.assertRaises(ValueError): Manager(engine=None, session='fake-session') def test_instantiate_manager_session_missing(self): with self.assertRaises(ValueError): Manager(engine='fake-engine', session=None) pybel-0.12.1/tests/test_manager/test_manager_definitions.py000066400000000000000000000114551334645200200241440ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os from pathlib import Path from pybel import BELGraph from pybel.constants import ANNOTATIONS, OPENBEL_ANNOTATION_RESOURCES from pybel.testing.cases import TemporaryCacheClsMixin from pybel.testing.constants import belns_dir_path, test_ns_nocache_path from pybel.testing.mocks import mock_bel_resources from tests.constants import HGNC_URL ns1 = Path(os.path.join(belns_dir_path, 'disease-ontology.belns')).as_uri() ns1_url = 'http://resources.openbel.org/belframework/20150611/namespace/disease-ontology-ids.belns' ns2 = Path(os.path.join(belns_dir_path, 'mesh-diseases.belns')).as_uri() ns2_url = 'http://resources.openbel.org/belframework/20150611/namespace/mesh-diseases.belns' CELL_LINE_URL = OPENBEL_ANNOTATION_RESOURCES + 'cell-line.belanno' CELL_LINE_KEYWORD = 'CellLine' class TestDefinitionManagers(TemporaryCacheClsMixin): def _help_check_hgnc(self, manager): """Help check the HGNC namespace was loaded properly. :type manager: pybel.Manager """ entry = manager.get_namespace_entry(HGNC_URL, 'MHS2') self.assertIsNotNone(entry) self.assertEqual('MHS2', entry.name) self.assertIn('G', entry.encoding) entry = manager.get_namespace_entry(HGNC_URL, 'MIATNB') self.assertIsNotNone(entry) self.assertEqual('MIATNB', entry.name) self.assertIn('G', entry.encoding) self.assertIn('R', entry.encoding) entry = manager.get_namespace_entry(HGNC_URL, 'MIA') self.assertIsNotNone(entry) self.assertEqual('MIA', entry.name) self.assertIn('G', entry.encoding) self.assertIn('P', entry.encoding) self.assertIn('R', entry.encoding) @mock_bel_resources def test_insert_namespace_persistent(self, mock_get): self.assertEqual(0, self.manager.count_namespaces()) self.assertEqual(0, self.manager.count_namespace_entries()) self.manager.get_or_create_namespace(HGNC_URL) self._help_check_hgnc(self.manager) self.manager.get_or_create_namespace(HGNC_URL) self._help_check_hgnc(self.manager) self.manager.drop_namespace_by_url(HGNC_URL) self.assertEqual(0, self.manager.count_namespaces()) self.assertEqual(0, self.manager.count_namespace_entries()) def test_insert_namespace_nocache(self): """Test that this namespace isn't cached""" self.assertEqual(0, self.manager.count_namespaces()) self.assertEqual(0, self.manager.count_namespace_entries()) self.manager.get_or_create_namespace(test_ns_nocache_path) self.assertEqual(0, self.manager.count_namespaces()) self.assertEqual(0, self.manager.count_namespace_entries()) @mock_bel_resources def test_insert_annotation(self, mock_get): self.assertEqual(0, self.manager.count_annotations()) self.assertEqual(0, self.manager.count_annotation_entries()) annotation = self.manager.get_or_create_annotation(CELL_LINE_URL) self.assertIsNotNone(annotation) self.assertEqual(CELL_LINE_URL, annotation.url) entry = self.manager.get_annotation_entry_by_name(CELL_LINE_URL, '1321N1 cell') self.assertEqual('1321N1 cell', entry.name) self.assertEqual('CLO_0001072', entry.identifier) entries = self.manager.get_annotation_entries_by_names(CELL_LINE_URL, ['1321N1 cell']) self.assertIsNotNone(entries) self.assertEqual(1, len(entries)) entry = entries[0] self.assertEqual('1321N1 cell', entry.name) self.assertEqual('CLO_0001072', entry.identifier) graph = BELGraph() graph.annotation_url[CELL_LINE_KEYWORD] = CELL_LINE_URL data = { ANNOTATIONS: { CELL_LINE_KEYWORD: { '1321N1 cell': True } } } annotations_iter = dict(self.manager._iter_from_annotations_dict(graph, annotations_dict=data[ANNOTATIONS])) self.assertIn(CELL_LINE_URL, annotations_iter) self.assertIn('1321N1 cell', annotations_iter[CELL_LINE_URL]) entries = self.manager._get_annotation_entries_from_data(graph, data) self.assertIsNotNone(entries) self.assertEqual(1, len(entries)) entry = entries[0] self.assertEqual('1321N1 cell', entry.name) self.assertEqual('CLO_0001072', entry.identifier) self.manager.drop_namespace_by_url(CELL_LINE_URL) self.assertEqual(0, self.manager.count_annotations()) self.assertEqual(0, self.manager.count_annotation_entries()) def test_get_annotation_entries_no_data(self): """Test that if there's no ANNOTATIONS entry in the data, it just returns none.""" graph = BELGraph() data = {} entries = self.manager._get_annotation_entries_from_data(graph, data) self.assertIsNone(entries) pybel-0.12.1/tests/test_manager/test_manager_drop.py000066400000000000000000000131761334645200200225770ustar00rootroot00000000000000# -*- coding: utf-8 -*- from pybel import BELGraph from pybel.constants import INCREASES, PROTEIN from pybel.dsl import protein from pybel.manager.models import Edge, Namespace, NamespaceEntry, Network, Node from pybel.testing.cases import TemporaryCacheMixin from pybel.testing.mocks import mock_bel_resources from pybel.testing.utils import make_dummy_annotations, make_dummy_namespaces, n from tests.constants import test_citation_dict, test_evidence_text yfg1 = protein(name='YFG1', namespace='HGNC') yfg2 = protein(name='YFG1', namespace='HGNC') class TestReconstituteNodeTuples(TemporaryCacheMixin): @mock_bel_resources def test_simple(self, mock): """This test checks that the network can be added and dropped""" graph = BELGraph(name='test', version='0.0.0') graph.add_increases( yfg1, yfg2, evidence=test_evidence_text, citation=test_citation_dict, annotations={ 'Disease': {'Disease1': True}, 'Cell': {'Cell1': True} } ) make_dummy_namespaces(self.manager, graph) make_dummy_annotations(self.manager, graph) network = self.manager.insert_graph(graph, store_parts=True) self.manager.drop_network_by_id(network.id) class TestCascades(TemporaryCacheMixin): def setUp(self): super(TestCascades, self).setUp() self.n1 = Node(type=PROTEIN, bel='p(HGNC:A)') self.n2 = Node(type=PROTEIN, bel='p(HGNC:B)') self.n3 = Node(type=PROTEIN, bel='p(HGNC:C)') self.e1 = Edge(source=self.n1, target=self.n2, relation=INCREASES, bel='p(HGNC:A) increases p(HGNC:B)') self.e2 = Edge(source=self.n2, target=self.n3, relation=INCREASES, bel='p(HGNC:B) increases p(HGNC:C)') self.e3 = Edge(source=self.n1, target=self.n3, relation=INCREASES, bel='p(HGNC:A) increases p(HGNC:C)') self.g1 = Network(name=n(), version=n(), edges=[self.e1, self.e2, self.e3]) self.g2 = Network(name=n(), version=n(), edges=[self.e1]) self.manager.session.add_all([self.n1, self.n2, self.n3, self.e1, self.e2, self.e3, self.g1, self.g2]) self.manager.session.commit() self.assertEqual(3, self.manager.count_nodes()) self.assertEqual(3, self.manager.count_edges()) self.assertEqual(2, self.manager.count_networks()) def test_drop_node(self): """Makes sure that when a node gets dropped, its in-edges AND out-edges also do""" self.manager.session.delete(self.n2) self.manager.session.commit() self.assertEqual(2, self.manager.count_nodes()) self.assertEqual(1, self.manager.count_edges()) self.assertEqual(2, self.manager.count_networks()) self.assertEqual(1, self.g1.edges.count()) self.assertEqual(0, self.g2.edges.count()) def test_drop_edge(self): """When an edge gets dropped, make sure the network doesn't have as many edges, but nodes get to stay""" self.manager.session.delete(self.e1) self.manager.session.commit() self.assertEqual(3, self.manager.count_nodes()) self.assertEqual(2, self.manager.count_edges()) self.assertEqual(2, self.manager.count_networks()) self.assertEqual(2, self.g1.edges.count()) self.assertEqual(0, self.g2.edges.count()) def test_get_orphan_edges(self): edges = [result.edge_id for result in self.manager.query_singleton_edges_from_network(self.g1)] self.assertEqual(2, len(edges)) self.assertIn(self.e2.id, edges) self.assertIn(self.e3.id, edges) def test_drop_network_1(self): """When a network gets dropped, drop all of the edges if they don't appear in other networks""" self.manager.drop_network(self.g1) self.assertEqual(3, self.manager.count_nodes()) self.assertEqual(1, self.manager.count_edges()) self.assertEqual(1, self.manager.count_networks()) self.assertEqual(1, self.g2.edges.count()) def test_drop_network_2(self): """When a network gets dropped, drop all of the edges if they don't appear in other networks""" self.manager.drop_network(self.g2) self.assertEqual(3, self.manager.count_nodes()) self.assertEqual(3, self.manager.count_edges()) self.assertEqual(1, self.manager.count_networks()) self.assertEqual(3, self.g1.edges.count()) def test_drop_all_networks(self): """When all networks are dropped, make sure all the edges and network_edge mappings are gone too""" self.manager.drop_networks() self.assertEqual(0, self.manager.count_edges()) self.assertEqual(0, self.manager.count_networks()) def test_drop_modification(self): """Don't let this happen""" def test_drop_property(self): """Don't let this happen""" def test_drop_namespace(self): keyword, url = n(), n() namespace = Namespace(keyword=keyword, url=url) self.manager.session.add(namespace) n_entries = 5 for _ in range(n_entries): entry = NamespaceEntry(name=n(), namespace=namespace) self.manager.session.add(entry) self.manager.session.commit() self.assertEqual(1, self.manager.count_namespaces(), msg='Should have one namespace') self.assertEqual(n_entries, self.manager.count_namespace_entries(), msg='Should have {} entries'.format(n_entries)) self.manager.drop_namespace_by_url(url) self.assertEqual(0, self.manager.count_namespaces(), msg='Should have no namespaces') self.assertEqual(0, self.manager.count_namespace_entries(), msg='Entries should have been dropped') pybel-0.12.1/tests/test_manager/test_manager_graph.py000066400000000000000000001536461334645200200227430ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for manager functions handling BEL networks.""" from __future__ import unicode_literals import unittest from collections import Counter import sqlalchemy.exc import time from sqlalchemy import not_ from pybel import BELGraph, from_database, from_path, to_database from pybel.constants import ( ABUNDANCE, BEL_DEFAULT_NAMESPACE, BIOPROCESS, CITATION_AUTHORS, CITATION_DATE, CITATION_NAME, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_OTHER, CITATION_TYPE_PUBMED, DECREASES, HAS_COMPONENT, HAS_PRODUCT, HAS_REACTANT, INCREASES, LOCATION, METADATA_NAME, METADATA_VERSION, PATHOLOGY, PROTEIN, RELATION, ) from pybel.dsl import ( BaseEntity, activity, complex_abundance, composite_abundance, degradation, entity, fragment, fusion_range, gene, gene_fusion, gmod, hgvs, location, named_complex_abundance, pmod, protein, protein_fusion, reaction, secretion, translocation, ) from pybel.dsl.namespaces import chebi, hgnc from pybel.examples import ras_tloc_graph, sialic_acid_graph from pybel.manager import models from pybel.manager.models import Author, Citation, Edge, Evidence, NamespaceEntry, Node, Property from pybel.testing.cases import FleetingTemporaryCacheMixin, TemporaryCacheClsMixin, TemporaryCacheMixin from pybel.testing.constants import test_bel_simple from pybel.testing.mocks import mock_bel_resources from pybel.testing.utils import make_dummy_annotations, make_dummy_namespaces, n from pybel.utils import hash_citation, hash_evidence from tests.constants import ( BelReconstitutionMixin, akt1, casp8, egfr, expected_test_simple_metadata, fadd, test_citation_dict, test_evidence_text, ) fos = hgnc('FOS') jun = hgnc('JUN') ap1_complex = complex_abundance([fos, jun]) egfr_dimer = complex_abundance([egfr, egfr]) yfg_data = hgnc('YFG') e2f4_data = hgnc('E2F4') bound_ap1_e2f4 = complex_abundance([ap1_complex, e2f4_data]) superoxide = chebi('superoxide') hydrogen_peroxide = chebi('hydrogen peroxide') oxygen = chebi('oxygen') superoxide_decomposition = reaction(reactants=[superoxide], products=[hydrogen_peroxide, oxygen]) def assert_unqualified_edge(self, u, v, rel): """Assert there's only one edge and get the data for it :param unittest.TestCase self: :param u: :param v: :param rel: :return: """ if isinstance(u, BaseEntity): u = u self.assertIn(u, self.graph) if isinstance(v, BaseEntity): v = v self.assertIn(v, self.graph[u]) edges = list(self.graph[u][v].values()) self.assertEqual(1, len(edges)) data = edges[0] self.assertEqual(rel, data[RELATION]) class TestNetworkCache(BelReconstitutionMixin, FleetingTemporaryCacheMixin): def test_get_network_missing(self): network = self.manager.get_most_recent_network_by_name('This network is not here') self.assertIsNone(network) def test_get_graph_missing(self): network = self.manager.get_graph_by_most_recent('This network is not here') self.assertIsNone(network) @mock_bel_resources def test_reload(self, mock_get): """Tests that a graph with the same name and version can't be added twice""" graph = sialic_acid_graph.copy() self.assertEqual('1.0.0', graph.version) to_database(graph, manager=self.manager, store_parts=False) time.sleep(1) self.assertEqual(1, self.manager.count_networks()) networks = self.manager.list_networks() self.assertEqual(1, len(networks)) network = networks[0] self.assertEqual(graph.name, network.name) self.assertEqual(graph.version, network.version) self.assertEqual(graph.description, network.description) reconstituted = self.manager.get_graph_by_name_version(graph.name, graph.version) self.assertIsInstance(reconstituted, BELGraph) self.assertEqual(graph.nodes(data=True), reconstituted.nodes(data=True)) # self.bel_thorough_reconstituted(reconstituted) # Test that the graph can't be added a second time with self.assertRaises(sqlalchemy.exc.IntegrityError): self.manager.insert_graph(graph, store_parts=False) self.manager.session.rollback() time.sleep(1) self.assertEqual(1, self.manager.count_networks()) graph_copy = graph.copy() graph_copy.version = '1.0.1' network_copy = self.manager.insert_graph(graph_copy, store_parts=False) time.sleep(1) # Sleep so the first graph always definitely goes in first self.assertNotEqual(network.id, network_copy.id) self.assertTrue(self.manager.has_name_version(graph_copy.name, graph_copy.version)) self.assertFalse(self.manager.has_name_version('wrong name', '0.1.2')) self.assertFalse(self.manager.has_name_version(graph_copy.name, '0.1.2')) self.assertFalse(self.manager.has_name_version('wrong name', graph_copy.version)) self.assertEqual(2, self.manager.count_networks()) self.assertEqual('1.0.1', self.manager.get_most_recent_network_by_name(graph.name).version) query_ids = {-1, network.id, network_copy.id} query_networks_result = self.manager.get_networks_by_ids(query_ids) self.assertEqual(2, len(query_networks_result)) self.assertEqual({network.id, network_copy.id}, {network.id for network in query_networks_result}) expected_versions = {'1.0.1', '1.0.0'} self.assertEqual(expected_versions, set(self.manager.get_network_versions(graph.name))) exact_name_version = from_database(graph.name, graph.version, manager=self.manager) self.assertEqual(graph.name, exact_name_version.name) self.assertEqual(graph.version, exact_name_version.version) exact_name_version = from_database(graph.name, '1.0.1', manager=self.manager) self.assertEqual(graph.name, exact_name_version.name) self.assertEqual('1.0.1', exact_name_version.version) most_recent_version = from_database(graph.name, manager=self.manager) self.assertEqual(graph.name, most_recent_version.name) self.assertEqual('1.0.1', exact_name_version.version) recent_networks = list(self.manager.list_recent_networks()) # just try it to see if it fails self.assertIsNotNone(recent_networks) self.assertEqual([(network.name, '1.0.1')], [(n.name, n.version) for n in recent_networks]) self.assertEqual('1.0.1', recent_networks[0].version) @mock_bel_resources def test_upload_with_tloc(self, mock_get): """Test that the RAS translocation example graph can be uploaded.""" make_dummy_namespaces(self.manager, ras_tloc_graph) to_database(ras_tloc_graph, manager=self.manager) class TestTemporaryInsertNetwork(TemporaryCacheMixin): @mock_bel_resources def test_insert_with_list_annotations(self, mock): """This test checks that graphs that contain list annotations, which aren't cached, can be loaded properly into the database.""" graph = BELGraph(name='test', version='0.0.0') graph.annotation_list['TEST'] = {'a', 'b', 'c'} graph.add_increases( fos, jun, evidence=test_evidence_text, citation=test_citation_dict, annotations={'TEST': 'a'} ) make_dummy_namespaces(self.manager, graph) self.manager.insert_graph(graph, store_parts=True) # TODO check that the database doesn't have anything for TEST in it class TestQuery(TemporaryCacheMixin): def setUp(self): super(TestQuery, self).setUp() graph = BELGraph(name='test', version='0.0.0') graph.annotation_list['TEST'] = {'a', 'b', 'c'} graph.add_increases( fos, jun, evidence=test_evidence_text, citation=test_citation_dict, annotations={ 'TEST': 'a' } ) make_dummy_namespaces(self.manager, graph) make_dummy_annotations(self.manager, graph) with mock_bel_resources: self.manager.insert_graph(graph, store_parts=True) def test_query_node_bel_1(self): rv = self.manager.query_nodes(bel='p(HGNC:FOS)') self.assertEqual(1, len(rv)) self.assertEqual(fos, rv[0].to_json()) def test_query_node_bel_2(self): rv = self.manager.query_nodes(bel='p(HGNC:JUN)') self.assertEqual(1, len(rv)) self.assertEqual(jun, rv[0].to_json()) def test_query_node_namespace_wildcard(self): rv = self.manager.query_nodes(namespace='HG%') self.assertEqual(2, len(rv)) self.assertTrue(any(x.to_json() == fos for x in rv)) self.assertTrue(any(x.to_json() == jun for x in rv)) def test_query_node_name_wildcard(self): rv = self.manager.query_nodes(name='%J%') self.assertEqual(1, len(rv), 1) self.assertEqual(jun, rv[0].to_json()) def test_query_node_type(self): rv = self.manager.query_nodes(type=PROTEIN) self.assertEqual(2, len(rv)) def test_query_node_type_missing(self): rv = self.manager.query_nodes(type=ABUNDANCE) self.assertEqual(0, len(rv)) def test_query_edge_by_bel(self): rv = self.manager.query_edges(bel="p(HGNC:FOS) increases p(HGNC:JUN)") self.assertEqual(1, len(rv)) def test_query_edge_by_relation_wildcard(self): # relation like, data increased_list = self.manager.query_edges(relation='increase%') self.assertEqual(1, len(increased_list)) # self.assertIn(..., increased_list) def test_query_edge_by_evidence_wildcard(self): # evidence like, data evidence_list = self.manager.search_edges_with_evidence(evidence='%3%') self.assertEqual(len(evidence_list), 0) evidence_list = self.manager.search_edges_with_evidence(evidence='%Twit%') self.assertEqual(len(evidence_list), 1) def test_query_edge_by_mixed_no_result(self): # no result empty_list = self.manager.query_edges(source='p(HGNC:FADD)', relation=DECREASES) self.assertEqual(len(empty_list), 0) def test_query_edge_by_mixed(self): # source, relation, data source_list = self.manager.query_edges(source='p(HGNC:FOS)', relation=INCREASES) self.assertEqual(len(source_list), 1) def test_query_edge_by_source_function(self): edges = self.manager.query_edges(source_function=PROTEIN) self.assertEqual(1, len(edges), msg='Wrong number of edges: {}'.format(edges)) edges = self.manager.query_edges(source_function=BIOPROCESS) self.assertEqual(0, len(edges), msg='Wrong number of edges: {}'.format(edges)) def test_query_edge_by_target_function(self): edges = self.manager.query_edges(target_function=PROTEIN) self.assertEqual(1, len(edges), msg='Wrong number of edges: {}'.format(edges)) edges = self.manager.query_edges(target_function=PATHOLOGY) self.assertEqual(0, len(edges), msg='Wrong number of edges: {}'.format(edges)) def test_query_citation_by_type(self): rv = self.manager.query_citations(type=CITATION_TYPE_PUBMED) self.assertEqual(1, len(rv)) self.assertTrue(rv[0].is_pubmed) self.assertFalse(rv[0].is_enriched) def test_query_citaiton_by_reference(self): rv = self.manager.query_citations(type=CITATION_TYPE_PUBMED, reference=test_citation_dict[CITATION_REFERENCE]) self.assertEqual(1, len(rv)) self.assertTrue(rv[0].is_pubmed) self.assertFalse(rv[0].is_enriched) self.assertEqual(test_citation_dict, rv[0].to_json()) @unittest.skip def test_query_by_author_wildcard(self): author_list = self.manager.query_citations(author="Example%") self.assertEqual(len(author_list), 1) @unittest.skip def test_query_by_name(self): # type, name, data name_dict_list = self.manager.query_citations(type=CITATION_TYPE_PUBMED, name="That other article from last week") self.assertEqual(len(name_dict_list), 1) # self.assertIn(..., name_dict_list) @unittest.skip def test_query_by_name_wildcard(self): # type, name like, data name_dict_list2 = self.manager.query_citations(type=CITATION_TYPE_PUBMED, name="%article from%") self.assertEqual(len(name_dict_list2), 2) # self.assertIn(..., name_dict_list2) # self.assertIn(..., name_dict_list2) class TestEnsure(TemporaryCacheMixin): def test_get_or_create_citation(self): reference = '1234AB' citation_dict = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_NAME: 'TestCitation_basic', CITATION_REFERENCE: reference, } citation_hash = hash_citation(type=citation_dict[CITATION_TYPE], reference=citation_dict[CITATION_REFERENCE]) citation = self.manager.get_or_create_citation(**citation_dict) self.manager.session.commit() self.assertIsInstance(citation, Citation) self.assertEqual(citation_dict, citation.to_json()) citation_reloaded_from_reference = self.manager.get_citation_by_pmid(reference) self.assertIsNotNone(citation_reloaded_from_reference) self.assertEqual(citation_dict, citation_reloaded_from_reference.to_json()) citation_reloaded_from_dict = self.manager.get_or_create_citation(**citation_dict) self.assertIsNotNone(citation_reloaded_from_dict) self.assertEqual(citation_dict, citation_reloaded_from_dict.to_json()) citation_reloaded_from_hash = self.manager.get_citation_by_hash(citation_hash) self.assertIsNotNone(citation_reloaded_from_hash) self.assertEqual(citation_dict, citation_reloaded_from_hash.to_json()) def test_get_or_create_citation_full(self): reference = 'CD5678' citation_dict = { CITATION_TYPE: CITATION_TYPE_OTHER, CITATION_NAME: 'TestCitation_full', CITATION_REFERENCE: reference, CITATION_DATE: '2017-04-11', CITATION_AUTHORS: sorted(['Jackson M', 'Lajoie J']) } citation_hash = hash_citation(type=citation_dict[CITATION_TYPE], reference=citation_dict[CITATION_REFERENCE]) citation = self.manager.get_or_create_citation(**citation_dict) self.manager.session.commit() self.assertIsInstance(citation, Citation) self.assertEqual(citation_dict, citation.to_json()) citation_reloaded_from_reference = self.manager.get_citation_by_reference(CITATION_TYPE_OTHER, reference) self.assertIsNotNone(citation_reloaded_from_reference) self.assertEqual(citation_dict, citation_reloaded_from_reference.to_json()) citation_reloaded_from_dict = self.manager.get_or_create_citation(**citation_dict) self.assertIsNotNone(citation_reloaded_from_dict) self.assertEqual(citation_dict, citation_reloaded_from_dict.to_json()) citation_reloaded_from_hash = self.manager.get_citation_by_hash(citation_hash) self.assertIsNotNone(citation_reloaded_from_hash) self.assertEqual(citation_dict, citation_reloaded_from_hash.to_json()) full_citation_basic = { CITATION_TYPE: 'Other', CITATION_NAME: 'TestCitation_full', CITATION_REFERENCE: 'CD5678' } citation_truncated = self.manager.get_or_create_citation(**full_citation_basic) self.assertIsNotNone(citation_truncated) self.assertEqual(citation_dict, citation_truncated.to_json()) def test_get_or_create_evidence(self): basic_citation = self.manager.get_or_create_citation(**test_citation_dict) utf8_test_evidence = u"Yes, all the information is true! This contains a unicode alpha: α" evidence_hash = hash_evidence( text=utf8_test_evidence, type=CITATION_TYPE_PUBMED, reference=test_citation_dict[CITATION_REFERENCE] ) evidence = self.manager.get_or_create_evidence(basic_citation, utf8_test_evidence) self.assertIsInstance(evidence, Evidence) self.assertIn(evidence_hash, self.manager.object_cache_evidence) # Objects cached? reloaded_evidence = self.manager.get_or_create_evidence(basic_citation, utf8_test_evidence) self.assertEqual(evidence, reloaded_evidence) def test_get_or_create_author(self): """This tests getting or creating author with unicode characters""" author_name = "Jαckson M" # Create author = self.manager.get_or_create_author(author_name) self.manager.session.commit() self.assertIsInstance(author, Author) self.assertEqual(author.name, author_name) author_from_name = self.manager.get_author_by_name(author_name) self.assertIsNotNone(author_from_name) self.assertEqual(author_name, author_from_name.name) # Get author_from_get = self.manager.get_or_create_author(author_name) self.assertEqual(author.name, author_from_get.name) self.assertEqual(author, author_from_get) class TestEdgeStore(TemporaryCacheClsMixin, BelReconstitutionMixin): """Tests that the cache can be queried.""" @classmethod def setUpClass(cls): """Set up the class with a BEL graph and its corresponding SQLAlchemy model.""" super(TestEdgeStore, cls).setUpClass() with mock_bel_resources: cls.graph = from_path(test_bel_simple, manager=cls.manager, allow_nested=True) cls.network = cls.manager.insert_graph(cls.graph, store_parts=True) def test_citations(self): citations = self.manager.session.query(Citation).all() self.assertEqual(2, len(citations), msg='Citations: {}'.format(citations)) citation_references = {'123455', '123456'} self.assertEqual(citation_references, { citation.reference for citation in citations }) def test_authors(self): authors = {'Example Author', 'Example Author2'} self.assertEqual(authors, { author.name for author in self.manager.session.query(Author).all() }) def test_evidences(self): evidences = self.manager.session.query(Evidence).all() self.assertEqual(3, len(evidences)) evidences_texts = {'Evidence 1 w extra notes', 'Evidence 2', 'Evidence 3'} self.assertEqual(evidences_texts, { evidence.text for evidence in evidences }) def test_nodes(self): nodes = self.manager.session.query(Node).all() self.assertEqual(4, len(nodes)) def test_edges(self): edges = self.manager.session.query(Edge).all() x = Counter((e.source.bel, e.target.bel) for e in edges) d = { (akt1.as_bel(), egfr.as_bel()): 1, (egfr.as_bel(), fadd.as_bel()): 1, (egfr.as_bel(), casp8.as_bel()): 1, (fadd.as_bel(), casp8.as_bel()): 1, (akt1.as_bel(), casp8.as_bel()): 1, # two way association (casp8.as_bel(), akt1.as_bel()): 1 # two way association } self.assertEqual(dict(x), d) network_edge_associations = self.manager.session.query(models.network_edge).filter_by( network_id=self.network.id).all() self.assertEqual( {network_edge_association.edge_id for network_edge_association in network_edge_associations}, {edge.id for edge in edges} ) def test_reconstitute(self): g2 = self.manager.get_graph_by_name_version( expected_test_simple_metadata[METADATA_NAME], expected_test_simple_metadata[METADATA_VERSION] ) self.bel_simple_reconstituted(g2) class TestAddNodeFromData(unittest.TestCase): def setUp(self): self.graph = BELGraph() def test_simple(self): self.graph.add_node_from_data(yfg_data) self.assertIn(yfg_data, self.graph) self.assertEqual(1, self.graph.number_of_nodes()) def test_single_variant(self): node_data = gene('HGNC', 'AKT1', variants=hgvs('p.Phe508del')) node_parent_data = node_data.get_parent() self.graph.add_node_from_data(node_data) self.assertIn(node_data, self.graph) self.assertIn(node_parent_data, self.graph) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) def test_multiple_variants(self): node_data = gene('HGNC', 'AKT1', variants=[ hgvs('p.Phe508del'), hgvs('p.Phe509del') ]) node_parent_data = node_data.get_parent() node_parent_tuple = node_parent_data self.graph.add_node_from_data(node_data) self.assertIn(node_data, self.graph) self.assertIn(node_parent_tuple, self.graph) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) def test_fusion(self): node_data = gene_fusion( partner_5p=gene('HGNC', 'TMPRSS2'), partner_3p=gene('HGNC', 'ERG'), range_5p=fusion_range('c', 1, 79), range_3p=fusion_range('c', 312, 5034) ) node_data = node_data self.graph.add_node_from_data(node_data) self.assertIn(node_data, self.graph) self.assertEqual(1, self.graph.number_of_nodes()) self.assertEqual(0, self.graph.number_of_edges()) def test_composite(self): il23 = named_complex_abundance('GOCC', 'interleukin-23 complex') il6 = protein('HGNC', 'IL6') node_data = composite_abundance([il23, il6]) self.graph.add_node_from_data(node_data) self.assertIn(node_data, self.graph) self.assertEqual(3, self.graph.number_of_nodes()) self.assertIn(il6, self.graph, msg='Nodes:\n'.format('\n'.join(map(str, self.graph)))) self.assertIn(il23, self.graph) self.assertEqual(2, self.graph.number_of_edges()) self.assertIn(il6, self.graph[node_data]) edges = list(self.graph[node_data][il6].values()) self.assertEqual(1, len(edges)) data = edges[0] self.assertEqual(HAS_COMPONENT, data[RELATION]) self.assertIn(il23, self.graph[node_data]) edges = list(self.graph[node_data][il23].values()) self.assertEqual(1, len(edges)) data = edges[0] self.assertEqual(HAS_COMPONENT, data[RELATION]) def test_reaction(self): self.graph.add_node_from_data(superoxide_decomposition) self.assertIn(superoxide_decomposition, self.graph) self.assertEqual(4, self.graph.number_of_nodes()) self.assertEqual(3, self.graph.number_of_edges()) assert_unqualified_edge(self, superoxide_decomposition, superoxide, HAS_REACTANT) assert_unqualified_edge(self, superoxide_decomposition, hydrogen_peroxide, HAS_PRODUCT) assert_unqualified_edge(self, superoxide_decomposition, oxygen, HAS_PRODUCT) def test_complex(self): node = complex_abundance(members=[fos, jun]) self.graph.add_node_from_data(node) self.assertIn(node, self.graph) self.assertEqual(3, self.graph.number_of_nodes()) self.assertEqual(2, self.graph.number_of_edges()) assert_unqualified_edge(self, node, fos, HAS_COMPONENT) assert_unqualified_edge(self, node, jun, HAS_COMPONENT) def test_dimer_complex(self): """Tests what happens if a BEL statement complex(p(X), p(X)) is added""" self.graph.add_node_from_data(egfr_dimer) self.assertIn(egfr, self.graph) self.assertIn(egfr_dimer, self.graph) self.assertEqual(2, self.graph.number_of_nodes()) self.assertEqual(1, self.graph.number_of_edges()) assert_unqualified_edge(self, egfr_dimer, egfr, HAS_COMPONENT) def test_nested_complex(self): """Checks what happens if a theoretical BEL statement `complex(p(X), complex(p(Y), p(Z)))` is added""" self.graph.add_node_from_data(bound_ap1_e2f4) self.assertIn(bound_ap1_e2f4, self.graph) self.assertEqual(5, self.graph.number_of_nodes()) self.assertIn(fos, self.graph) self.assertIn(jun, self.graph) self.assertIn(e2f4_data, self.graph) self.assertIn(ap1_complex, self.graph) self.assertEqual(4, self.graph.number_of_edges()) assert_unqualified_edge(self, ap1_complex, fos, HAS_COMPONENT) assert_unqualified_edge(self, ap1_complex, jun, HAS_COMPONENT) assert_unqualified_edge(self, bound_ap1_e2f4, ap1_complex, HAS_COMPONENT) assert_unqualified_edge(self, bound_ap1_e2f4, e2f4_data, HAS_COMPONENT) class TestReconstituteNodeTuples(TemporaryCacheMixin): """Tests the ability to go from PyBEL to relational database""" def help_reconstitute(self, node, number_nodes, number_edges): """Help test the round-trip conversion from PyBEL data dictionary to node model. :param BaseEntity node: PyBEL node data dictionary :param int number_nodes: :param int number_edges: """ self.assertIsInstance(node, BaseEntity) graph = BELGraph(name='test', version='0.0.0') graph.add_node_from_data(node) make_dummy_namespaces(self.manager, graph) self.manager.insert_graph(graph, store_parts=True) self.assertEqual(number_nodes, self.manager.count_nodes()) self.assertEqual(number_edges, self.manager.count_edges()) node_model = self.manager.get_or_create_node(graph, node) self.assertEqual(node.sha512, node_model.sha512) self.manager.session.commit() self.assertEqual(node, node_model.to_json()) self.assertEqual(node, self.manager.get_dsl_by_hash(node.as_sha512())) @mock_bel_resources def test_simple(self, mock): self.help_reconstitute(yfg_data, 1, 0) @mock_bel_resources def test_hgvs(self, mock): node_data = gene(namespace='HGNC', name='AKT1', variants=hgvs('p.Phe508del')) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_fragment_unspecified(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[fragment()]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_fragment_specified(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[fragment(start=5, stop=8)]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_fragment_specified_start_only(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[fragment(start=5, stop='*')]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_fragment_specified_end_only(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[fragment(start='*', stop=1000)]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_gmod_custom(self, mock): """Tests a gene modification that uses a non-default namespace""" dummy_namespace = 'HGNC' dummy_name = 'AKT1' dummy_mod_namespace = 'GO' dummy_mod_name = 'DNA Methylation' node_data = gene(namespace=dummy_namespace, name=dummy_name, variants=[gmod(name=dummy_mod_name, namespace=dummy_mod_namespace)]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_gmod_default(self, mock): """Tests a gene modification that uses the BEL default namespace""" dummy_namespace = n() dummy_name = n() node_data = gene(namespace=dummy_namespace, name=dummy_name, variants=[gmod('Me')]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_pmod_default_simple(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[pmod('Me')]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_pmod_custom_simple(self, mock): dummy_namespace = 'HGNC' dummy_name = 'AKT1' dummy_mod_namespace = 'GO' dummy_mod_name = 'Protein phosphorylation' node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[pmod(name=dummy_mod_name, namespace=dummy_mod_namespace)]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_pmod_default_with_residue(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[pmod('Me', code='Ser')]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_pmod_custom_with_residue(self, mock): dummy_namespace = 'HGNC' dummy_name = 'AKT1' dummy_mod_namespace = 'GO' dummy_mod_name = 'Protein phosphorylation' node_data = protein( namespace=dummy_namespace, name=dummy_name, variants=[pmod(name=dummy_mod_name, namespace=dummy_mod_namespace, code='Ser')] ) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_pmod_default_full(self, mock): dummy_namespace = n() dummy_name = n() node_data = protein(namespace=dummy_namespace, name=dummy_name, variants=[pmod('Me', code='Ser', position=5)]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_pmod_custom_full(self, mock): dummy_namespace = 'HGNC' dummy_name = 'AKT1' dummy_mod_namespace = 'GO' dummy_mod_name = 'Protein phosphorylation' node_data = protein( namespace=dummy_namespace, name=dummy_name, variants=[pmod(name=dummy_mod_name, namespace=dummy_mod_namespace, code='Ser', position=5)] ) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_multiple_variants(self, mock): node_data = gene(namespace='HGNC', name='AKT1', variants=[ hgvs('p.Phe508del'), hgvs('p.Phe509del') ]) self.help_reconstitute(node_data, 2, 1) @mock_bel_resources def test_fusion_specified(self, mock): node_data = gene_fusion( gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), fusion_range('c', 1, 79), fusion_range('c', 312, 5034), ) self.help_reconstitute(node_data, 1, 0) @mock_bel_resources def test_fusion_unspecified(self, mock): node_data = gene_fusion( gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), ) self.help_reconstitute(node_data, 1, 0) @mock_bel_resources def test_composite(self, mock): interleukin_23_complex = named_complex_abundance('GOCC', 'interleukin-23 complex') il6 = hgnc('IL6') interleukin_23_and_il6 = composite_abundance([interleukin_23_complex, il6]) self.help_reconstitute(interleukin_23_and_il6, 3, 2) @mock_bel_resources def test_reaction(self, mock): self.help_reconstitute(superoxide_decomposition, 4, 3) @mock_bel_resources def test_complex(self, mock): self.help_reconstitute(ap1_complex, 3, 2) @mock_bel_resources def test_nested_complex(self, mock): self.help_reconstitute(bound_ap1_e2f4, 5, 4) class TestReconstituteEdges(TemporaryCacheMixin): """This class tests that edges with varying properties can be added and extracted losslessly""" def setUp(self): """Creates a unit test with a manager and graph""" super(TestReconstituteEdges, self).setUp() self.graph = BELGraph( name=n(), version=n() ) @mock_bel_resources def test_translocation_default(self, mock): """This test checks that a translocation gets in the database properly""" self.graph.add_increases( protein(name='F2', namespace='HGNC'), protein(name='EDN1', namespace='HGNC'), evidence='In endothelial cells, ET-1 secretion is detectable under basal conditions, whereas thrombin induces its secretion.', citation='10473669', subject_modifier=secretion() ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(2, edge.properties.count()) @mock_bel_resources def test_subject_translocation_custom_to_loc(self, mock): self.graph.add_increases( protein(name='F2', namespace='HGNC'), protein(name='EDN1', namespace='HGNC'), evidence='In endothelial cells, ET-1 secretion is detectable under basal conditions, whereas thrombin induces its secretion.', citation='10473669', subject_modifier=translocation( from_loc=entity(namespace='TEST', name='A'), to_loc=entity(namespace='GOCC', name='extracellular space'), ) ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(2, edge.properties.count()) @mock_bel_resources def test_subject_activity_default(self, mock): p1_name = n() p2_name = n() self.graph.add_increases( protein(name=p1_name, namespace='HGNC'), protein(name=p2_name, namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=activity('kin') ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) kin_list = self.manager.session.query(NamespaceEntry).filter(NamespaceEntry.name == 'kin').all() self.assertEqual(1, len(kin_list)) kin = list(kin_list)[0] self.assertEqual('kin', kin.name) effects = self.manager.session.query(Property).join(NamespaceEntry).filter(Property.effect == kin) self.assertEqual(1, effects.count()) @mock_bel_resources def test_subject_activity_custom(self, mock): p1_name = n() p2_name = n() dummy_activity_namespace = n() dummy_activity_name = n() self.graph.add_increases( protein(name=p1_name, namespace='HGNC'), protein(name=p2_name, namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=activity(name=dummy_activity_name, namespace=dummy_activity_namespace) ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) kin_list = self.manager.session.query(NamespaceEntry).filter(NamespaceEntry.name == dummy_activity_name).all() self.assertEqual(1, len(kin_list)) kin = list(kin_list)[0] self.assertEqual(dummy_activity_name, kin.name) effects = self.manager.session.query(Property).join(NamespaceEntry).filter(Property.effect == kin) self.assertEqual(1, effects.count()) @mock_bel_resources def test_object_activity_default(self, mock): p1_name = n() p2_name = n() self.graph.add_increases( protein(name=p1_name, namespace='HGNC'), protein(name=p2_name, namespace='HGNC'), evidence=n(), citation=n(), object_modifier=activity('kin') ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) kin_list = self.manager.session.query(NamespaceEntry).filter(NamespaceEntry.name == 'kin').all() self.assertEqual(1, len(kin_list)) kin = list(kin_list)[0] self.assertEqual('kin', kin.name) effects = self.manager.session.query(Property).join(NamespaceEntry).filter(Property.effect == kin) self.assertEqual(1, effects.count()) @mock_bel_resources def test_object_activity_custom(self, mock): p1_name = n() p2_name = n() dummy_activity_namespace = n() dummy_activity_name = n() self.graph.add_increases( protein(name=p1_name, namespace='HGNC'), protein(name=p2_name, namespace='HGNC'), evidence=n(), citation=n(), object_modifier=activity(name=dummy_activity_name, namespace=dummy_activity_namespace) ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) kin_list = self.manager.session.query(NamespaceEntry).filter(NamespaceEntry.name == dummy_activity_name).all() self.assertEqual(1, len(kin_list)) kin = list(kin_list)[0] self.assertEqual(dummy_activity_name, kin.name) effects = self.manager.session.query(Property).join(NamespaceEntry).filter(Property.effect == kin) self.assertEqual(1, effects.count()) def test_subject_degradation(self): self.graph.add_association( protein(name='YFG', namespace='HGNC'), protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=degradation(), ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(1, edge.properties.count()) def test_object_degradation(self): self.graph.add_association( protein(name='YFG', namespace='HGNC'), protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), object_modifier=degradation(), ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(1, edge.properties.count()) def test_subject_location(self): self.graph.add_association( protein(name='YFG', namespace='HGNC'), protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=location(entity(namespace='GO', name='nucleus', identifier='GO:0005634')) ) make_dummy_namespaces(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(1, edge.properties.count()) def test_mixed_1(self): """Test mixed having location and something else.""" self.graph.add_increases( protein(namespace='HGNC', name='CDC42'), protein(namespace='HGNC', name='PAK2'), evidence="""Summary: PAK proteins, a family of serine/threonine p21-activating kinases, include PAK1, PAK2, PAK3 and PAK4. PAK proteins are critical effectors that link Rho GTPases to cytoskeleton reorganization and nuclear signaling. They serve as targets for the small GTP binding proteins Cdc42 and Rac and have been implicated in a wide range of biological activities. PAK4 interacts specifically with the GTP-bound form of Cdc42Hs and weakly activates the JNK family of MAP kinases. PAK4 is a mediator of filopodia formation and may play a role in the reorganization of the actin cytoskeleton. Multiple alternatively spliced transcript variants encoding distinct isoforms have been found for this gene.""", citation={CITATION_TYPE: "Online Resource", CITATION_REFERENCE: "PAK4 Hs ENTREZ Gene Summary"}, annotations={'Species': '9606'}, subject_modifier=activity('gtp'), object_modifier=activity('kin'), ) make_dummy_namespaces(self.manager, self.graph) make_dummy_annotations(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(2, edge.properties.count()) subject = edge.properties.filter(Property.is_subject).one() self.assertTrue(subject.is_subject) self.assertEqual('gtp', subject.effect.name) self.assertIsNotNone(subject.effect.namespace) self.assertEqual(BEL_DEFAULT_NAMESPACE, subject.effect.namespace.keyword) object = edge.properties.filter(not_(Property.is_subject)).one() self.assertFalse(object.is_subject) self.assertEqual('kin', object.effect.name) self.assertIsNotNone(object.effect.namespace) self.assertEqual(BEL_DEFAULT_NAMESPACE, object.effect.namespace.keyword) def test_mixed_2(self): """Tests both subject and object activity with location information as well.""" self.graph.add_directly_increases( protein(namespace='HGNC', name='HDAC4'), protein(namespace='HGNC', name='MEF2A'), citation='10487761', evidence=""""In the nucleus, HDAC4 associates with the myocyte enhancer factor MEF2A. Binding of HDAC4 to MEF2A results in the repression of MEF2A transcriptional activation, a function that requires the deacetylase domain of HDAC4.""", annotations={'Species': '9606'}, subject_modifier=activity('cat', location=entity(namespace='GOCC', name='nucleus')), object_modifier=activity('tscript', location=entity(namespace='GOCC', name='nucleus')) ) make_dummy_namespaces(self.manager, self.graph) make_dummy_annotations(self.manager, self.graph) network = self.manager.insert_graph(self.graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(1, network.edges.count()) edge = network.edges.first() self.assertEqual(4, edge.properties.count()) self.assertEqual(2, edge.properties.filter(Property.is_subject).count()) self.assertEqual(2, edge.properties.filter(not_(Property.is_subject)).count()) class TestNoAddNode(TemporaryCacheMixin): """Tests scenarios where an instance of :class:`BELGraph` may contain edges that refer to uncached resources, and therefore should not be added to the edge store.""" @mock_bel_resources def test_no_node_name(self, mock): """Test that a node whose namespace is in the uncached namespaces set can't be added.""" graph = BELGraph(name='Test No Add Nodes', version='1.0.0') dummy_namespace = n() dummy_url = n() graph.namespace_url[dummy_namespace] = dummy_url graph.uncached_namespaces.add(dummy_url) node_data = protein(name=n(), namespace=dummy_namespace) graph.add_node_from_data(node_data) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph) self.assertEqual(0, len(network.nodes.all())) @mock_bel_resources def test_no_node_fusion_3p(self, mock): """Test that a fusion node whose 3P partner's namespace is in the uncached namespaces set can't be added.""" graph = BELGraph(name='Test No Add Nodes', version='1.0.0') dummy_namespace_name = n() dummy_url = n() graph.namespace_url[dummy_namespace_name] = dummy_url graph.uncached_namespaces.add(dummy_url) node_data = protein_fusion( partner_3p=protein(namespace=dummy_namespace_name, name='AKT1'), partner_5p=protein(namespace='HGNC', name='YFG'), ) graph.add_node_from_data(node_data) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph) self.assertEqual(0, len(network.nodes.all())) @mock_bel_resources def test_no_node_fusion_5p(self, mock): """Test that a node whose namespace is in the uncached namespaces set can't be added.""" dummy_namespace_name = n() node_data = protein_fusion( partner_3p=protein(namespace='HGNC', name='YFG'), partner_5p=protein(namespace=dummy_namespace_name, name='YFG'), ) graph = BELGraph(name='Test No Add Nodes', version='1.0.0') dummy_url = n() graph.namespace_url[dummy_namespace_name] = dummy_url graph.uncached_namespaces.add(dummy_url) graph.add_node_from_data(node_data) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph) self.assertEqual(0, len(network.nodes.all())) @mock_bel_resources def test_no_protein_modification(self, mock): """Test that a protein node whose pmod variant is in the uncached namespaces set can't be added.""" graph = BELGraph(name='Test No Add Nodes', version='1.0.0') dummy_namespace_name = n() dummy_url = n() graph.namespace_url[dummy_namespace_name] = dummy_url graph.uncached_namespaces.add(dummy_url) node_data = protein(namespace='HGNC', name='YFG', variants=pmod(name='dummy', namespace=dummy_namespace_name)) graph.add_node_from_data(node_data) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph) self.assertEqual(1, network.nodes.count()) @mock_bel_resources def test_no_gene_modification(self, mock): """Test that a gene node whose gmod variant is in the uncached namespaces set can't be added.""" graph = BELGraph(name='Test No Add Nodes', version='1.0.0') dummy_namespace_name = n() dummy_url = n() graph.namespace_url[dummy_namespace_name] = dummy_url graph.uncached_namespaces.add(dummy_url) node_data = gene(namespace='HGNC', name='YFG', variants=[ gmod(name='dummy', namespace=dummy_namespace_name) ]) graph.add_node_from_data(node_data) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph) self.assertEqual(1, network.nodes.count()) @mock_bel_resources def test_no_translocation(self, mock): """Test that a translocation using custom namespaces doesn't get stored.""" graph = BELGraph(name='dummy graph', version='0.0.1') dummy_namespace_name = n() dummy_namespace_url = n() graph.namespace_url[dummy_namespace_name] = dummy_namespace_url graph.uncached_namespaces.add(dummy_namespace_url) graph.add_association( protein(name='YFG', namespace='HGNC'), protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=translocation( from_loc=entity(namespace=dummy_namespace_name, name='intracellular'), to_loc=entity(namespace='GOCC', name='extracellular space') ), ) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(0, network.edges.count()) @mock_bel_resources def test_no_location(self, mock): """Test that when using a custom namespace in the location the edge doesn't get stored.""" graph = BELGraph(name='dummy graph', version='0.0.1') dummy_namespace_name = n() dummy_url = n() graph.namespace_url[dummy_namespace_name] = dummy_url graph.uncached_namespaces.add(dummy_url) graph.add_association( protein(name='YFG', namespace='HGNC'), protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), subject_modifier={ LOCATION: entity(namespace=dummy_namespace_name, name='lysozome') }, ) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(0, network.edges.count()) @mock_bel_resources def test_no_activity(self, mock): """Test that when an uncached custom namespace is used in the activity on an edge, the edge doesn't get stored.""" graph = BELGraph(name='dummy graph', version='0.0.1') dummy_namespace_name = n() dummy_url = n() graph.namespace_url[dummy_namespace_name] = dummy_url graph.uncached_namespaces.add(dummy_url) graph.add_association( protein(name='YFG', namespace='HGNC'), protein(name='YFG2', namespace='HGNC'), evidence=n(), citation=n(), subject_modifier=activity(name='dummy', namespace=dummy_namespace_name) ) make_dummy_namespaces(self.manager, graph) network = self.manager.insert_graph(graph, store_parts=True) self.assertEqual(2, network.nodes.count()) self.assertEqual(0, network.edges.count()) @mock_bel_resources def test_regex_lookup(self, mock): # FIXME this test needs to be put somewhere else """Test that regular expression nodes get love too.""" graph = BELGraph(name='Regular Expression Test Graph', description='Help test regular expression namespaces', version='1.0.0') dbsnp = 'dbSNP' DBSNP_PATTERN = 'rs[0-9]+' graph.namespace_pattern[dbsnp] = DBSNP_PATTERN rs1234 = gene(namespace=dbsnp, name='rs1234') rs1235 = gene(namespace=dbsnp, name='rs1235') graph.add_node_from_data(rs1234) graph.add_node_from_data(rs1235) rs1234_hash = rs1234.as_sha512() rs1235_hash = rs1235.as_sha512() self.manager.insert_graph(graph, store_parts=True) rs1234_lookup = self.manager.get_node_by_hash(rs1234_hash) self.assertIsNotNone(rs1234_lookup) self.assertEqual('Gene', rs1234_lookup.type) self.assertEqual('g(dbSNP:rs1234)', rs1234_lookup.bel) self.assertEqual(rs1234_hash, rs1234_lookup.sha512) self.assertIsNotNone(rs1234_lookup.namespace_entry) self.assertEqual('rs1234', rs1234_lookup.namespace_entry.name) self.assertEqual('dbSNP', rs1234_lookup.namespace_entry.namespace.keyword) self.assertEqual(DBSNP_PATTERN, rs1234_lookup.namespace_entry.namespace.pattern) rs1235_lookup = self.manager.get_node_by_hash(rs1235_hash) self.assertIsNotNone(rs1235_lookup) self.assertEqual('Gene', rs1235_lookup.type) self.assertEqual('g(dbSNP:rs1235)', rs1235_lookup.bel) self.assertEqual(rs1235_hash, rs1235_lookup.sha512) self.assertIsNotNone(rs1235_lookup.namespace_entry) self.assertEqual('rs1235', rs1235_lookup.namespace_entry.name) self.assertEqual('dbSNP', rs1235_lookup.namespace_entry.namespace.keyword) self.assertEqual(DBSNP_PATTERN, rs1235_lookup.namespace_entry.namespace.pattern) class TestEquivalentNodes(unittest.TestCase): def test_direct_has_namespace(self): graph = BELGraph() n1_data = protein(namespace='HGNC', name='CD33', identifier='1659') n2_data = protein(namespace='NOPE', name='NOPE', identifier='NOPE') n1 = graph.add_node_from_data(n1_data) n2 = graph.add_node_from_data(n2_data) graph.add_increases( n1_data, n2_data, n(), n() ) self.assertEqual({n1}, graph.get_equivalent_nodes(n1)) self.assertTrue(graph.node_has_namespace(n1, 'HGNC')) self.assertFalse(graph.node_has_namespace(n2, 'HGNC')) def test_indirect_has_namespace(self): graph = BELGraph() a_node = protein(namespace='HGNC', name='CD33') b_node = protein(namespace='HGNCID', identifier='1659') graph.add_equivalence(a_node, b_node) a = a_node b = b_node self.assertEqual({a, b}, graph.get_equivalent_nodes(a_node)) self.assertEqual({a, b}, graph.get_equivalent_nodes(b_node)) self.assertTrue(graph.node_has_namespace(a, 'HGNC')) self.assertTrue(graph.node_has_namespace(b, 'HGNC')) def test_triangle_has_namespace(self): graph = BELGraph() a_node = protein(namespace='A', name='CD33') b_node = protein(namespace='B', identifier='1659') c_node = protein(namespace='C', identifier='1659') d_node = protein(namespace='HGNC', identifier='1659') a = graph.add_node_from_data(a_node) b = graph.add_node_from_data(b_node) c = graph.add_node_from_data(c_node) d = graph.add_node_from_data(d_node) graph.add_equivalence(a_node, b_node) graph.add_equivalence(b_node, c_node) graph.add_equivalence(c_node, a_node) graph.add_equivalence(c_node, d_node) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(a_node)) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(b_node)) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(c_node)) self.assertEqual({a, b, c, d}, graph.get_equivalent_nodes(d_node)) self.assertTrue(graph.node_has_namespace(a, 'HGNC')) self.assertTrue(graph.node_has_namespace(b, 'HGNC')) self.assertTrue(graph.node_has_namespace(c, 'HGNC')) self.assertTrue(graph.node_has_namespace(d, 'HGNC')) if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_manager/test_manager_model.py000066400000000000000000000113771334645200200227340ustar00rootroot00000000000000# -*- coding: utf-8 -*- """This module tests the to_json functions for all of the database models.""" import datetime import json import unittest from pybel.constants import ( CITATION_REFERENCE, CITATION_TITLE, CITATION_TYPE, CITATION_TYPE_PUBMED, IDENTIFIER, METADATA_AUTHORS, METADATA_CONTACT, METADATA_COPYRIGHT, METADATA_DESCRIPTION, METADATA_DISCLAIMER, METADATA_LICENSES, METADATA_NAME, METADATA_VERSION, NAME, NAMESPACE, NAMESPACE_DOMAIN_OTHER, ) from pybel.manager.models import Citation, Namespace, NamespaceEntry, Network from pybel.testing.utils import n class TestNetwork(unittest.TestCase): def setUp(self): self.name = n() self.version = n() self.created = datetime.datetime.utcnow() self.model = Network( name=self.name, version=self.version, created=self.created, ) self.expected = { METADATA_NAME: self.name, METADATA_VERSION: self.version, 'created': str(self.created), } def test_to_json(self): model_json = self.model.to_json() self.assertIn(METADATA_NAME, model_json) self.assertEqual(self.name, model_json[METADATA_NAME]) self.assertIn(METADATA_VERSION, model_json) self.assertEqual(self.version, model_json[METADATA_VERSION]) self.assertIn('created', model_json) self.assertEqual(str(self.created), model_json['created']) self.assertEqual(self.expected, model_json) def test_dump(self): json.dumps(self.model) def test_network(self): self.expected[METADATA_AUTHORS] = self.model.authors = n() self.assertEqual(self.expected, self.model.to_json()) self.expected[METADATA_CONTACT] = self.model.contact = n() self.assertEqual(self.expected, self.model.to_json()) self.expected[METADATA_DESCRIPTION] = self.model.description = n() self.assertEqual(self.expected, self.model.to_json()) self.expected[METADATA_COPYRIGHT] = self.model.copyright = n() self.assertEqual(self.expected, self.model.to_json()) self.expected[METADATA_DISCLAIMER] = self.model.disclaimer = n() self.assertEqual(self.expected, self.model.to_json()) self.expected[METADATA_LICENSES] = self.model.licenses = n() self.assertEqual(self.expected, self.model.to_json()) self.expected['id'] = None self.assertEqual(self.expected, self.model.to_json(include_id=True)) class TestModels(unittest.TestCase): def test_namespace_url(self): uploaded = datetime.datetime.now() model = Namespace( keyword='TEST', url='http://test.com', name='Test Namespace', domain=NAMESPACE_DOMAIN_OTHER, species='9606', version='1.0.0', author='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', uploaded=uploaded ) expected = dict( keyword='TEST', url='http://test.com', name='Test Namespace', version='1.0.0', ) self.assertEqual(model.to_json(), expected) expected['id'] = model.id = 1 self.assertEqual(model.to_json(include_id=True), expected) def test_namespace_pattern(self): uploaded = datetime.datetime.now() model = Namespace( keyword='TEST', pattern='\w+', name='Test Namespace', domain=NAMESPACE_DOMAIN_OTHER, species='9606', version='1.0.0', author='Charles Tapley Hoyt', contact='charles.hoyt@scai.fraunhofer.de', uploaded=uploaded ) expected = dict( keyword='TEST', pattern='\w+', name='Test Namespace', version='1.0.0', ) self.assertEqual(model.to_json(), expected) def test_namespace_entry(self): model = NamespaceEntry( name='entry', namespace=Namespace(keyword='test') ) expected = { NAMESPACE: 'test', NAME: 'entry' } self.assertEqual(model.to_json(), expected) expected['id'] = model.id = 1 self.assertEqual(model.to_json(include_id=True), expected) expected[IDENTIFIER] = model.identifier = 'test:00001' self.assertEqual(model.to_json(include_id=True), expected) def test_citation(self): ref = n() model = Citation( type=CITATION_TYPE_PUBMED, reference=ref ) expected = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: ref } self.assertEqual(expected, model.to_json()) expected[CITATION_TITLE] = model.title = n() pybel-0.12.1/tests/test_manager/test_seeding.py000066400000000000000000000102711334645200200215500ustar00rootroot00000000000000# -*- coding: utf-8 -*- from pybel.examples import sialic_acid_graph from pybel.examples.sialic_acid_example import cd33, cd33_phosphorylated, shp2, syk, trem2 from pybel.manager.models import Edge, Namespace, Network from pybel.manager.query_manager import graph_from_edges from pybel.testing.cases import TemporaryCacheClsMixin from pybel.testing.mocks import mock_bel_resources chebi_url = 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/chebi/chebi-20170725.belns' class TestSeeding(TemporaryCacheClsMixin): """This module tests the seeding functions in the query manager""" @classmethod def setUpClass(cls): """Adds the sialic acid subgraph for all query tests""" super(TestSeeding, cls).setUpClass() @mock_bel_resources def insert(mock): """Inserts the Sialic Acid Subgraph using the mock resources""" cls.manager.insert_graph(sialic_acid_graph, store_parts=True) insert() def test_namespace_existence(self): a = 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/hgnc-human-genes/hgnc-human-genes-20170725.belns' n = self.manager.session.query(Namespace).filter(Namespace.url == a).one() def test_namespace_existence_b(self): ns = self.manager.session.query(Namespace).filter(Namespace.url == chebi_url).one() self.assertIsNotNone(ns) def test_sialic_acid_in_node_store(self): r = 'sialic acid' n = self.manager.get_namespace_entry(chebi_url, r) self.assertIsNotNone(n) self.assertEqual(r, n.name) def test_namespace_existence_c(self): a = 'https://arty.scai.fraunhofer.de/artifactory/bel/namespace/go-biological-process/go-biological-process-20170725.belns' self.manager.session.query(Namespace).filter(Namespace.url == a).one() def test_network_existence(self): networks = self.manager.session.query(Network).all() self.assertEqual(1, len(networks)) def test_edge_existence(self): edges = self.manager.session.query(Edge).all() self.assertEqual(11, len(edges)) def test_seed_by_pmid(self): pmids = ['26438529'] edges = self.manager.query_edges_by_pubmed_identifiers(pmids) self.assertLess(0, len(edges)) def test_seed_by_pmid_no_result(self): missing_pmids = ['11111'] edges = self.manager.query_edges_by_pubmed_identifiers(missing_pmids) self.assertEqual(0, len(edges)) def test_seed_by_induction_raise(self): """Test that seeding by induction fails when an empty list is given.""" with self.assertRaises(ValueError): self.manager.query_induction([]) def test_seed_by_induction_raise_length_one(self): """Test that seeding by induction fails when a list of length one is given.""" shp2_model = self.manager.get_node_by_dsl(shp2) with self.assertRaises(ValueError): self.manager.query_induction([shp2_model]) def test_seed_by_induction(self): """Test seeding by inducing over a list of nodes.""" shp2_model = self.manager.get_node_by_dsl(shp2) syk_model = self.manager.get_node_by_dsl(syk) trem2_model = self.manager.get_node_by_dsl(trem2) edges = self.manager.query_induction([shp2_model, syk_model, trem2_model]) self.assertEqual(2, len(edges)) graph = graph_from_edges(edges) self.assertEqual(3, graph.number_of_nodes(), msg='Nodes: {}'.format(graph.nodes())) self.assertIn(trem2, graph) self.assertIn(syk, graph) self.assertIn(shp2, graph) self.assertEqual(2, graph.number_of_edges()) def test_seed_by_neighbors(self): """Test seeding a graph by neighbors of a list of nodes.""" node = self.manager.get_node_by_dsl(shp2) edges = self.manager.query_neighbors([node]) self.assertEqual(2, len(edges)) graph = graph_from_edges(edges) self.assertEqual(4, graph.number_of_nodes(), msg='Nodes: {}'.format(graph.nodes())) self.assertIn(cd33_phosphorylated, graph) self.assertIn(cd33, graph) self.assertIn(syk,graph) self.assertIn(shp2, graph) self.assertEqual(3, graph.number_of_edges()) pybel-0.12.1/tests/test_parse/000077500000000000000000000000001334645200200162205ustar00rootroot00000000000000pybel-0.12.1/tests/test_parse/__init__.py000066400000000000000000000000761334645200200203340ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.parser`.""" pybel-0.12.1/tests/test_parse/test_parse_bel.py000066400000000000000000002057651334645200200216040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for the BEL parser.""" import logging import unittest from pybel import BELGraph from pybel.constants import ( ABUNDANCE, ACTIVITY, BEL_DEFAULT_NAMESPACE, BIOPROCESS, COMPLEX, COMPOSITE, DEGRADATION, DIRECTLY_INCREASES, DIRTY, EFFECT, FRAGMENT, FROM_LOC, FUNCTION, FUSION, FUSION_MISSING, FUSION_REFERENCE, FUSION_START, FUSION_STOP, GENE, HAS_COMPONENT, HAS_VARIANT, HGVS, IDENTIFIER, KIND, LOCATION, MEMBERS, MIRNA, MODIFIER, NAME, NAMESPACE, OBJECT, PARTNER_3P, PARTNER_5P, PATHOLOGY, PRODUCTS, PROTEIN, RANGE_3P, RANGE_5P, REACTANTS, REACTION, RELATION, RNA, SUBJECT, TARGET, TO_LOC, TRANSLOCATION, VARIANTS, ) from pybel.dsl import ( abundance, bioprocess, cell_surface_expression, complex_abundance, composite_abundance, entity, fragment, fusion_range, gene, gene_fusion, gmod, hgvs, mirna, named_complex_abundance, pathology, pmod, protein, protein_fusion, reaction, rna, rna_fusion, secretion, translocation, Fragment, ) from pybel.dsl.namespaces import hgnc from pybel.parser import BELParser from pybel.parser.exc import MalformedTranslocationWarning from pybel.parser.parse_bel import modifier_po_to_dict from tests.constants import TestTokenParserBase, assert_has_edge, assert_has_node, update_provenance log = logging.getLogger(__name__) TEST_GENE_VARIANT = 'c.308G>A' TEST_PROTEIN_VARIANT = 'p.Phe508del' class TestAbundance(TestTokenParserBase): """2.1.1""" def setUp(self): self.parser.clear() self.parser.general_abundance.setParseAction(self.parser.handle_term) self.expected_node_data = abundance(namespace='CHEBI', name='oxygen atom') self.expected_canonical_bel = 'a(CHEBI:"oxygen atom")' def _test_abundance_helper(self, statement): result = self.parser.general_abundance.parseString(statement) self.assertEqual(dict(self.expected_node_data), result.asDict()) self.assertIn(self.expected_node_data, self.graph) self.assertEqual(self.expected_canonical_bel, self.graph.node_to_bel(self.expected_node_data)) self.assertEqual({}, modifier_po_to_dict(result), msg='The modifier dictionary should be empty') def test_abundance(self): """Test short/long abundance name.""" self._test_abundance_helper('a(CHEBI:"oxygen atom")') self._test_abundance_helper('abundance(CHEBI:"oxygen atom")') def _test_abundance_with_location_helper(self, statement): result = self.parser.general_abundance.parseString(statement) expected_result = { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'oxygen atom', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } } self.assertEqual(expected_result, result.asDict()) self.assertIn(self.expected_node_data, self.graph) self.assertEqual(self.expected_canonical_bel, self.graph.node_to_bel(self.expected_node_data)) modifier = modifier_po_to_dict(result) expected_modifier = { LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'} } self.assertEqual(expected_modifier, modifier) def test_abundance_with_location(self): """Test short/long abundance name and short/long location name.""" self._test_abundance_with_location_helper('a(CHEBI:"oxygen atom", loc(GOCC:intracellular))') self._test_abundance_with_location_helper('abundance(CHEBI:"oxygen atom", loc(GOCC:intracellular))') self._test_abundance_with_location_helper('a(CHEBI:"oxygen atom", location(GOCC:intracellular))') self._test_abundance_with_location_helper('abundance(CHEBI:"oxygen atom", location(GOCC:intracellular))') class TestGene(TestTokenParserBase): """2.1.4 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XgeneA""" def setUp(self): self.parser.clear() self.parser.gene.setParseAction(self.parser.handle_term) def test_214a(self): statement = 'g(HGNC:AKT1)' result = self.parser.gene.parseString(statement) expected_list = [GENE, 'HGNC', 'AKT1'] self.assertEqual(expected_list, result.asList()) expected_dict = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1' } self.assertEqual(expected_dict, result.asDict()) expected_node = gene('HGNC', 'AKT1') self.assert_has_node(expected_node) self.assertEqual('g(HGNC:AKT1)', self.graph.node_to_bel(expected_node)) self.assertEqual(1, len(self.graph)) def test_214b(self): statement = 'g(HGNC:AKT1, loc(GOCC:intracellular))' result = self.parser.gene.parseString(statement) expected_dict = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } } self.assertEqual(expected_dict, result.asDict()) expected_node = gene('HGNC', 'AKT1') self.assert_has_node(expected_node) self.assertEqual('g(HGNC:AKT1)', self.graph.node_to_bel(expected_node)) def test_214c(self): """Test variant""" statement = 'g(HGNC:AKT1, var(p.Phe508del))' result = self.parser.gene.parseString(statement) expected_result = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1', VARIANTS: [hgvs(TEST_PROTEIN_VARIANT)] } self.assertEqual(expected_result, result.asDict()) expected_node = gene('HGNC', 'AKT1', variants=hgvs(TEST_PROTEIN_VARIANT)) self.assert_has_node(expected_node) self.assertEqual('g(HGNC:AKT1, var("p.Phe508del"))', self.graph.node_to_bel(expected_node)) parent = gene('HGNC', 'AKT1') self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_gmod(self): """Test Gene Modification""" statement = 'geneAbundance(HGNC:AKT1,gmod(M))' result = self.parser.gene.parseString(statement) expected_result = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1', VARIANTS: [gmod('Me')] } self.assertEqual(expected_result, result.asDict()) expected_node = gene('HGNC', 'AKT1', variants=gmod('Me')) self.assert_has_node(expected_node) self.assertEqual('g(HGNC:AKT1, gmod(Me))', self.graph.node_to_bel(expected_node)) parent = gene('HGNC', 'AKT1') self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_214d(self): """Test BEL 1.0 gene substitution""" statement = 'g(HGNC:AKT1,sub(G,308,A))' result = self.parser.gene.parseString(statement) expected_result = gene( name='AKT1', namespace='HGNC', variants=[hgvs(TEST_GENE_VARIANT)] ) self.assertEqual(dict(expected_result), result.asDict()) expected_node = gene('HGNC', 'AKT1', variants=hgvs("c.308G>A")) self.assert_has_node(expected_node) self.assertEqual('g(HGNC:AKT1, var("c.308G>A"))', self.graph.node_to_bel(expected_node)) parent = gene('HGNC', 'AKT1') self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_variant_location(self): """Test BEL 1.0 gene substitution with location tag""" statement = 'g(HGNC:AKT1,sub(G,308,A),loc(GOCC:intracellular))' result = self.parser.gene.parseString(statement) expected_result = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1', VARIANTS: [ { KIND: HGVS, IDENTIFIER: TEST_GENE_VARIANT } ], LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } } self.assertEqual(expected_result, result.asDict()) expected_node = gene('HGNC', 'AKT1', variants=hgvs("c.308G>A")) self.assert_has_node(expected_node) self.assertEqual('g(HGNC:AKT1, var("c.308G>A"))', self.graph.node_to_bel(expected_node)) parent = gene('HGNC', 'AKT1') self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_multiple_variants(self): """Test multiple variants""" statement = 'g(HGNC:AKT1, var(p.Phe508del), sub(G,308,A), var(c.1521_1523delCTT))' result = self.parser.gene.parseString(statement) expected_result = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1', VARIANTS: [ hgvs(TEST_PROTEIN_VARIANT), hgvs(TEST_GENE_VARIANT), hgvs('c.1521_1523delCTT') ] } self.assertEqual(expected_result, result.asDict()) expected_node = gene('HGNC', 'AKT1', variants=[ hgvs('c.1521_1523delCTT'), hgvs(TEST_GENE_VARIANT), hgvs(TEST_PROTEIN_VARIANT) ]) self.assert_has_node(expected_node) self.assertEqual( 'g(HGNC:AKT1, var("c.1521_1523delCTT"), var("c.308G>A"), var("p.Phe508del"))', self.graph.node_to_bel(expected_node), ) parent = gene('HGNC', 'AKT1') self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def _help_test_gene_fusion_1(self, statement): result = self.parser.gene.parseString(statement) expected_dict = { FUNCTION: GENE, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'TMPRSS2'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'ERG'}, RANGE_5P: { FUSION_REFERENCE: 'c', FUSION_START: 1, FUSION_STOP: 79 }, RANGE_3P: { FUSION_REFERENCE: 'c', FUSION_START: 312, FUSION_STOP: 5034 } } } self.assertEqual(expected_dict, result.asDict()) en = gene_fusion( partner_5p=gene('HGNC', 'TMPRSS2'), range_5p=fusion_range('c', 1, 79), partner_3p=gene('HGNC', 'ERG'), range_3p=fusion_range('c', 312, 5034) ) self.assert_has_node(en) self.assertEqual('g(fus(HGNC:TMPRSS2, "c.1_79", HGNC:ERG, "c.312_5034"))', self.graph.node_to_bel(en)) def test_gene_fusion_1(self): # no quotes self._help_test_gene_fusion_1('g(fus(HGNC:TMPRSS2, c.1_79, HGNC:ERG, c.312_5034))') # quotes self._help_test_gene_fusion_1('g(fus(HGNC:TMPRSS2, "c.1_79", HGNC:ERG, "c.312_5034"))') def _help_test_gene_fusion_2(self, statement): result = self.parser.gene.parseString(statement) expected_dict = { FUNCTION: GENE, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'TMPRSS2'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'ERG'}, RANGE_5P: { FUSION_REFERENCE: 'c', FUSION_START: 1, FUSION_STOP: '?' }, RANGE_3P: { FUSION_REFERENCE: 'c', FUSION_START: 312, FUSION_STOP: 5034 } } } self.assertEqual(expected_dict, result.asDict()) expected_node = gene_fusion(gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), fusion_range('c', 1, '?'), fusion_range('c', 312, 5034)) self.assert_has_node(expected_node) canonical_bel = self.graph.node_to_bel(expected_node) self.assertEqual('g(fus(HGNC:TMPRSS2, "c.1_?", HGNC:ERG, "c.312_5034"))', canonical_bel) def test_gene_fusion_2(self): # no quotes self._help_test_gene_fusion_2('g(fus(HGNC:TMPRSS2, c.1_?, HGNC:ERG, c.312_5034))') # correct self._help_test_gene_fusion_2('g(fus(HGNC:TMPRSS2, "c.1_?", HGNC:ERG, "c.312_5034"))') def _help_test_gene_fusion_3(self, statement): result = self.parser.gene.parseString(statement) expected_dict = { FUNCTION: GENE, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'TMPRSS2'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'ERG'}, RANGE_5P: { FUSION_MISSING: '?' }, RANGE_3P: { FUSION_REFERENCE: 'c', FUSION_START: 312, FUSION_STOP: 5034 } } } self.assertEqual(expected_dict, result.asDict()) expected_node = gene_fusion(gene('HGNC', 'TMPRSS2'), gene('HGNC', 'ERG'), range_3p=fusion_range('c', 312, 5034)) self.assert_has_node(expected_node) self.assertEqual('g(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "c.312_5034"))', self.graph.node_to_bel(expected_node)) def test_gene_fusion_3(self): # no quotes self._help_test_gene_fusion_3('g(fus(HGNC:TMPRSS2, ?, HGNC:ERG, c.312_5034))') # correct self._help_test_gene_fusion_3('g(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "c.312_5034"))') def _help_test_gene_fusion_legacy_1(self, statement): result = self.parser.gene.parseString(statement) expected_dict = { FUNCTION: GENE, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'BCR'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'JAK2'}, RANGE_5P: { FUSION_REFERENCE: 'c', FUSION_START: '?', FUSION_STOP: 1875 }, RANGE_3P: { FUSION_REFERENCE: 'c', FUSION_START: 2626, FUSION_STOP: '?' } } } self.assertEqual(expected_dict, result.asDict()) expected_node = gene_fusion(gene('HGNC', 'BCR'), gene('HGNC', 'JAK2'), fusion_range('c', '?', 1875), fusion_range('c', 2626, '?')) self.assert_has_node(expected_node) self.assertEqual('g(fus(HGNC:BCR, "c.?_1875", HGNC:JAK2, "c.2626_?"))', self.graph.node_to_bel(expected_node)) def test_gene_fusion_legacy_1(self): # legacy self._help_test_gene_fusion_legacy_1('g(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626))') # no quotes self._help_test_gene_fusion_legacy_1('g(fus(HGNC:BCR, c.?_1875, HGNC:JAK2, c.2626_?))') # correct self._help_test_gene_fusion_legacy_1('g(fus(HGNC:BCR, "c.?_1875", HGNC:JAK2, "c.2626_?"))') def _help_test_gene_fusion_legacy_2(self, statement): result = self.parser.gene.parseString(statement) expected_dict = { FUNCTION: GENE, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'CHCHD4'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'AIFM1'}, RANGE_5P: {FUSION_MISSING: '?'}, RANGE_3P: {FUSION_MISSING: '?'} } } self.assertEqual(expected_dict, result.asDict()) expected_node = gene_fusion(gene('HGNC', 'CHCHD4'), gene('HGNC', 'AIFM1')) self.assert_has_node(expected_node) self.assertEqual('g(fus(HGNC:CHCHD4, "?", HGNC:AIFM1, "?"))', self.graph.node_to_bel(expected_node)) def test_gene_fusion_legacy_2(self): # legacy self._help_test_gene_fusion_legacy_2('g(HGNC:CHCHD4, fusion(HGNC:AIFM1))') # no quotes self._help_test_gene_fusion_legacy_2('g(fus(HGNC:CHCHD4, ?, HGNC:AIFM1, ?))') # correct self._help_test_gene_fusion_legacy_2('g(fus(HGNC:CHCHD4, "?", HGNC:AIFM1, "?"))') def test_gene_variant_snp(self): """2.2.2 SNP""" statement = 'g(SNP:rs113993960, var(c.1521_1523delCTT))' result = self.parser.gene.parseString(statement) expected_result = [GENE, 'SNP', 'rs113993960', [HGVS, 'c.1521_1523delCTT']] self.assertEqual(expected_result, result.asList()) expected_node = gene('SNP', 'rs113993960', variants=hgvs('c.1521_1523delCTT')) self.assert_has_node(expected_node) self.assertEqual('g(SNP:rs113993960, var("c.1521_1523delCTT"))', self.graph.node_to_bel(expected_node)) gene_node = expected_node.get_parent() self.assert_has_node(gene_node) self.assert_has_edge(gene_node, expected_node, relation=HAS_VARIANT) def test_gene_variant_chromosome(self): """2.2.2 chromosome""" statement = 'g(REF:"NC_000007.13", var(g.117199646_117199648delCTT))' result = self.parser.gene.parseString(statement) expected_result = [GENE, 'REF', 'NC_000007.13', [HGVS, 'g.117199646_117199648delCTT']] self.assertEqual(expected_result, result.asList()) gene_node = gene('REF', 'NC_000007.13', variants=hgvs('g.117199646_117199648delCTT')) self.assert_has_node(gene_node) parent = gene_node.get_parent() self.assert_has_node(parent) self.assert_has_edge(parent, gene_node, relation=HAS_VARIANT) def test_gene_variant_deletion(self): """2.2.2 gene-coding DNA reference sequence""" statement = 'g(HGNC:CFTR, var(c.1521_1523delCTT))' result = self.parser.gene.parseString(statement) expected_result = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'CFTR', VARIANTS: [ {KIND: HGVS, IDENTIFIER: 'c.1521_1523delCTT'} ] } self.assertEqual(expected_result, result.asDict()) expected_node = gene('HGNC', 'CFTR', variants=hgvs('c.1521_1523delCTT')) self.assert_has_node(expected_node) self.assertEqual('g(HGNC:CFTR, var("c.1521_1523delCTT"))', self.graph.node_to_bel(expected_node)) gene_node = expected_node.get_parent() self.assert_has_node(gene_node) self.assert_has_edge(gene_node, expected_node, relation=HAS_VARIANT) class TestMiRNA(TestTokenParserBase): """2.1.5 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XmicroRNAA""" def setUp(self): self.parser.clear() self.parser.mirna.setParseAction(self.parser.handle_term) def _test_no_variant_helper(self, statement): result = self.parser.mirna.parseString(statement) expected_result = [MIRNA, 'HGNC', 'MIR21'] self.assertEqual(expected_result, result.asList()) expected_dict = { FUNCTION: MIRNA, NAMESPACE: 'HGNC', NAME: 'MIR21' } self.assertEqual(expected_dict, result.asDict()) node = mirna('HGNC', 'MIR21') self.assert_has_node(node) def test_short(self): self._test_no_variant_helper('m(HGNC:MIR21)') self._test_no_variant_helper('microRNAAbundance(HGNC:MIR21)') def test_mirna_location(self): statement = 'm(HGNC:MIR21,loc(GOCC:intracellular))' result = self.parser.mirna.parseString(statement) expected_dict = { FUNCTION: MIRNA, NAMESPACE: 'HGNC', NAME: 'MIR21', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } } self.assertEqual(expected_dict, result.asDict()) expected_node = mirna('HGNC', 'MIR21') self.assert_has_node(expected_node) def test_mirna_variant(self): statement = 'm(HGNC:MIR21,var(p.Phe508del))' result = self.parser.mirna.parseString(statement) expected_dict = { FUNCTION: MIRNA, NAMESPACE: 'HGNC', NAME: 'MIR21', VARIANTS: [ { KIND: HGVS, IDENTIFIER: TEST_PROTEIN_VARIANT }, ] } self.assertEqual(expected_dict, result.asDict()) node = mirna('HGNC', 'MIR21', variants=hgvs(TEST_PROTEIN_VARIANT)) self.assert_has_node(node) self.assertEqual('m(HGNC:MIR21, var("p.Phe508del"))', self.graph.node_to_bel(node)) self.assertEqual(2, self.parser.graph.number_of_nodes()) expected_parent = node.get_parent() self.assert_has_node(expected_parent) self.assert_has_edge(expected_parent, node, relation=HAS_VARIANT) def test_mirna_variant_location(self): statement = 'm(HGNC:MIR21,var(p.Phe508del),loc(GOCC:intracellular))' result = self.parser.mirna.parseString(statement) expected_dict = { FUNCTION: MIRNA, NAMESPACE: 'HGNC', NAME: 'MIR21', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'p.Phe508del' }, ], LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } } self.assertEqual(expected_dict, result.asDict()) node = mirna('HGNC', 'MIR21', variants=hgvs(TEST_PROTEIN_VARIANT)) self.assert_has_node(node) self.assertEqual('m(HGNC:MIR21, var("p.Phe508del"))', self.graph.node_to_bel(node)) self.assertEqual(2, self.parser.graph.number_of_nodes()) expected_parent = node.get_parent() self.assert_has_node(expected_parent) self.assert_has_edge(expected_parent, node, relation=HAS_VARIANT) class TestProtein(TestTokenParserBase): """2.1.6 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XproteinA""" def setUp(self): self.parser.clear() self.parser.protein.setParseAction(self.parser.handle_term) def _test_reference_helper(self, statement): result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'AKT1'] self.assertEqual(expected_result, result.asList()) expected_dict = { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1', } self.assertEqual(expected_dict, result.asDict()) node = protein('HGNC', 'AKT1') self.assert_has_node(node) self.assertEqual('p(HGNC:AKT1)', self.graph.node_to_bel(node)) def test_reference(self): self._test_reference_helper('p(HGNC:AKT1)') self._test_reference_helper('proteinAbundance(HGNC:AKT1)') def test_protein_with_location(self): statement = 'p(HGNC:AKT1, loc(GOCC:intracellular))' result = self.parser.protein.parseString(statement) expected_dict = { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } } self.assertEqual(expected_dict, result.asDict()) node = protein('HGNC', 'AKT1') self.assert_has_node(node) self.assertEqual('p(HGNC:AKT1)', self.graph.node_to_bel(node)) def test_multiple_variants(self): statement = 'p(HGNC:AKT1,sub(A,127,Y),pmod(Ph, Ser),loc(GOCC:intracellular))' result = self.parser.protein.parseString(statement) expected_dict = { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' }, VARIANTS: [ hgvs('p.Ala127Tyr'), pmod(name='Ph', code='Ser') ] } self.assertEqual(expected_dict, result.asDict()) parent = protein('HGNC', 'AKT1') node = parent.with_variants([hgvs('p.Ala127Tyr'), pmod('Ph', code='Ser')]) self.assert_has_node(node) self.assertEqual('p(HGNC:AKT1, pmod(Ph, Ser), var("p.Ala127Tyr"))', self.graph.node_to_bel(node)) self.assert_has_node(parent) self.assert_has_edge(parent, node, relation=HAS_VARIANT) def _help_test_protein_fusion_1(self, statement): result = self.parser.protein.parseString(statement) expected_dict = { FUNCTION: PROTEIN, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'TMPRSS2'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'ERG'}, RANGE_5P: { FUSION_REFERENCE: 'p', FUSION_START: 1, FUSION_STOP: 79 }, RANGE_3P: { FUSION_REFERENCE: 'p', FUSION_START: 312, FUSION_STOP: 5034 } } } self.assertEqual(expected_dict, result.asDict()) expected_node = protein_fusion( protein('HGNC', 'TMPRSS2'), protein('HGNC', 'ERG'), fusion_range('p', 1, 79), fusion_range('p', 312, 5034) ) self.assert_has_node(expected_node) self.assertEqual( 'p(fus(HGNC:TMPRSS2, "p.1_79", HGNC:ERG, "p.312_5034"))', self.graph.node_to_bel(expected_node), ) def test_protein_fusion_1(self): # no quotes self._help_test_protein_fusion_1('p(fus(HGNC:TMPRSS2, p.1_79, HGNC:ERG, p.312_5034))') # quotes self._help_test_protein_fusion_1('p(fus(HGNC:TMPRSS2, "p.1_79", HGNC:ERG, "p.312_5034"))') def _help_test_protein_fusion_legacy_1(self, statement): result = self.parser.protein.parseString(statement) expected_dict = { FUNCTION: PROTEIN, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'BCR'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'JAK2'}, RANGE_5P: { FUSION_REFERENCE: 'p', FUSION_START: '?', FUSION_STOP: 1875 }, RANGE_3P: { FUSION_REFERENCE: 'p', FUSION_START: 2626, FUSION_STOP: '?' } } } self.assertEqual(expected_dict, result.asDict()) expected_node = protein_fusion( protein('HGNC', 'BCR'), protein('HGNC', 'JAK2'), fusion_range('p', '?', 1875), fusion_range('p', 2626, '?') ) self.assert_has_node(expected_node) canonical_bel = self.graph.node_to_bel(expected_node) self.assertEqual('p(fus(HGNC:BCR, "p.?_1875", HGNC:JAK2, "p.2626_?"))', canonical_bel) def test_protein_fusion_legacy_1(self): # legacy (BEL 1.0) self._help_test_protein_fusion_legacy_1('p(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626))') # missing quotes self._help_test_protein_fusion_legacy_1('p(fus(HGNC:BCR, p.?_1875, HGNC:JAK2, p.2626_?))') # correct self._help_test_protein_fusion_legacy_1('p(fus(HGNC:BCR, "p.?_1875", HGNC:JAK2, "p.2626_?"))') def _help_test_protein_legacy_fusion_2(self, statement): result = self.parser.protein.parseString(statement) expected_dict = { FUNCTION: PROTEIN, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'CHCHD4'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'AIFM1'}, RANGE_5P: {FUSION_MISSING: '?'}, RANGE_3P: {FUSION_MISSING: '?'} } } self.assertEqual(expected_dict, result.asDict()) expected_node = protein_fusion( protein('HGNC', 'CHCHD4'), protein('HGNC', 'AIFM1'), ) self.assert_has_node(expected_node) canonical_bel = self.graph.node_to_bel(expected_node) self.assertEqual('p(fus(HGNC:CHCHD4, "?", HGNC:AIFM1, "?"))', canonical_bel) def test_protein_fusion_legacy_2(self): # legacy (BEL 1.0) self._help_test_protein_legacy_fusion_2('proteinAbundance(HGNC:CHCHD4, fusion(HGNC:AIFM1))') # legacy shorthand (BEL 1.0) self._help_test_protein_legacy_fusion_2('p(HGNC:CHCHD4, fus(HGNC:AIFM1))') # missing quotes self._help_test_protein_legacy_fusion_2('p(fus(HGNC:CHCHD4, ?, HGNC:AIFM1, ?))') # correct self._help_test_protein_legacy_fusion_2('p(fus(HGNC:CHCHD4, "?", HGNC:AIFM1, "?"))') def _help_test_protein_trunc_1(self, statement): result = self.parser.protein.parseString(statement) expected_node = protein('HGNC', 'AKT1', variants=hgvs('p.40*')) self.assert_has_node(expected_node) canonical_bel = self.graph.node_to_bel(expected_node) self.assertEqual('p(HGNC:AKT1, var("p.40*"))', canonical_bel) protein_node = expected_node.get_parent() self.assert_has_node(protein_node) self.assert_has_edge(protein_node, expected_node, relation=HAS_VARIANT) def test_protein_trunc_1(self): # legacy self._help_test_protein_trunc_1('p(HGNC:AKT1, trunc(40))') # missing quotes self._help_test_protein_trunc_1('p(HGNC:AKT1, var(p.40*))') # correct self._help_test_protein_trunc_1('p(HGNC:AKT1, var("p.40*"))') def test_protein_trunc_2(self): statement = 'p(HGNC:AKT1, var(p.Cys40*))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'AKT1', [HGVS, 'p.Cys40*']] self.assertEqual(expected_result, result.asList()) parent = protein('HGNC', 'AKT1') expected_node = parent.with_variants(hgvs('p.Cys40*')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:AKT1, var("p.Cys40*"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_trunc_3(self): statement = 'p(HGNC:AKT1, var(p.Arg1851*))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'AKT1', [HGVS, 'p.Arg1851*']] self.assertEqual(expected_result, result.asList()) parent = protein('HGNC', 'AKT1') expected_node = parent.with_variants(hgvs('p.Arg1851*')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:AKT1, var("p.Arg1851*"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_pmod_1(self): """2.2.1 Test default BEL namespace and 1-letter amino acid code:""" statement = 'p(HGNC:AKT1, pmod(Ph, S, 473))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'AKT1') expected_node = parent.with_variants(pmod('Ph', code='Ser', position=473)) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:AKT1, pmod(Ph, Ser, 473))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_pmod_2(self): """2.2.1 Test default BEL namespace and 3-letter amino acid code:""" statement = 'p(HGNC:AKT1, pmod(Ph, Ser, 473))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'AKT1') expected_node = parent.with_variants(pmod('Ph', code='Ser', position=473)) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:AKT1, pmod(Ph, Ser, 473))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_pmod_3(self): """2.2.1 Test PSI-MOD namespace and 3-letter amino acid code:""" statement = 'p(HGNC:AKT1, pmod(MOD:PhosRes,Ser,473))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'AKT1') expected_node = parent.with_variants(pmod(namespace='MOD', name='PhosRes', code='Ser', position=473)) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:AKT1, pmod(MOD:PhosRes, Ser, 473))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_pmod_4(self): """2.2.1 Test HRAS palmitoylated at an unspecified residue. Default BEL namespace""" statement = 'p(HGNC:HRAS,pmod(Palm))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'HRAS') expected_node = parent.with_variants(pmod('Palm')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:HRAS, pmod(Palm))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_variant_reference(self): """2.2.2 Test reference allele""" statement = 'p(HGNC:CFTR, var(=))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'CFTR', [HGVS, '=']] self.assertEqual(expected_result, result.asList()) expected_node = protein('HGNC', 'CFTR', variants=hgvs('=')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:CFTR, var("="))', self.graph.node_to_bel(expected_node)) protein_node = expected_node.get_parent() self.assert_has_node(protein_node) self.assert_has_edge(protein_node, expected_node, relation=HAS_VARIANT) def test_protein_variant_unspecified(self): """2.2.2 Test unspecified variant""" statement = 'p(HGNC:CFTR, var(?))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'CFTR', [HGVS, '?']] self.assertEqual(expected_result, result.asList()) expected_node = protein('HGNC', 'CFTR', variants=hgvs('?')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:CFTR, var("?"))', self.graph.node_to_bel(expected_node)) parent = expected_node.get_parent() self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_variant_substitution(self): """2.2.2 Test substitution""" statement = 'p(HGNC:CFTR, var(p.Gly576Ala))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'CFTR', [HGVS, 'p.Gly576Ala']] self.assertEqual(expected_result, result.asList()) expected_node = protein('HGNC', 'CFTR', variants=hgvs('p.Gly576Ala')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:CFTR, var("p.Gly576Ala"))', self.graph.node_to_bel(expected_node)) parent = expected_node.get_parent() self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_variant_deletion(self): """2.2.2 deletion""" statement = 'p(HGNC:CFTR, var(p.Phe508del))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'CFTR', [HGVS, TEST_PROTEIN_VARIANT]] self.assertEqual(expected_result, result.asList()) expected_node = protein('HGNC', 'CFTR', variants=hgvs('p.Phe508del')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:CFTR, var("p.Phe508del"))', self.graph.node_to_bel(expected_node)) protein_node = expected_node.get_parent() self.assert_has_node(protein_node) self.assert_has_edge(protein_node, expected_node, relation=HAS_VARIANT) def test_protein_fragment_known(self): """2.2.3 fragment with known start/stop""" statement = 'p(HGNC:YFG, frag(5_20))' self.parser.protein.parseString(statement) parent = protein('HGNC', 'YFG') expected_node = parent.with_variants(fragment(5, 20)) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:YFG, frag("5_20"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_fragment_unbounded(self): """2.2.3 amino-terminal fragment of unknown length""" statement = 'p(HGNC:YFG, frag(1_?))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'YFG') expected_node = parent.with_variants(fragment(1, '?')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:YFG, frag("1_?"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_fragment_unboundTerminal(self): """2.2.3 carboxyl-terminal fragment of unknown length""" statement = 'p(HGNC:YFG, frag(?_*))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'YFG') expected_node = parent.with_variants(fragment('?', '*')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:YFG, frag("?_*"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_fragment_unknown(self): """2.2.3 fragment with unknown start/stop""" statement = 'p(HGNC:YFG, frag(?))' result = self.parser.protein.parseString(statement) expected_result = [PROTEIN, 'HGNC', 'YFG', [FRAGMENT, '?']] self.assertEqual(expected_result, result.asList()) parent = protein('HGNC', 'YFG') expected_node = parent.with_variants(fragment()) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:YFG, frag("?"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_protein_fragment_descriptor(self): """2.2.3 fragment with unknown start/stop and a descriptor""" statement = 'p(HGNC:YFG, frag(?, "55kD"))' result = self.parser.protein.parseString(statement) parent = protein('HGNC', 'YFG') expected_node = parent.with_variants(fragment('?', description='55kD')) self.assert_has_node(expected_node) self.assertEqual('p(HGNC:YFG, frag("?", "55kD"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def test_ensure_no_dup_edges(self): """Ensure node and edges aren't added twice, even if from different statements and has origin completion""" s1 = 'p(HGNC:AKT1)' s2 = 'deg(p(HGNC:AKT1))' node = protein('HGNC', 'AKT1') self.parser.bel_term.parseString(s1) self.assert_has_node(node) self.assertEqual(1, self.parser.graph.number_of_nodes()) self.assertEqual(0, self.parser.graph.number_of_edges()) self.parser.bel_term.parseString(s2) self.assert_has_node(node) self.assertEqual(1, self.parser.graph.number_of_nodes()) self.assertEqual(0, self.parser.graph.number_of_edges()) class TestRna(TestTokenParserBase): """2.1.7 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XrnaA""" def setUp(self): self.parser.clear() self.parser.rna.setParseAction(self.parser.handle_term) def _help_test_reference(self, statement): result = self.parser.rna.parseString(statement) expected_result = [RNA, 'HGNC', 'AKT1'] self.assertEqual(expected_result, result.asList()) expected_dict = { FUNCTION: RNA, NAMESPACE: 'HGNC', NAME: 'AKT1' } self.assertEqual(expected_dict, result.asDict()) expected_node = rna('HGNC', 'AKT1') self.assert_has_node(expected_node) self.assertEqual('r(HGNC:AKT1)', self.graph.node_to_bel(expected_node)) def test_reference(self): # short self._help_test_reference('r(HGNC:AKT1)') # long self._help_test_reference('rnaAbundance(HGNC:AKT1)') def test_multiple_variants(self): """Test multiple variants.""" statement = 'r(HGNC:AKT1, var(p.Phe508del), var(c.1521_1523delCTT))' result = self.parser.rna.parseString(statement) expected_result = { FUNCTION: RNA, NAMESPACE: 'HGNC', NAME: 'AKT1', VARIANTS: [ hgvs(TEST_PROTEIN_VARIANT), hgvs('c.1521_1523delCTT') ] } self.assertEqual(expected_result, result.asDict()) parent = rna('HGNC', 'AKT1') expected_node = parent.with_variants([hgvs('c.1521_1523delCTT'), hgvs(TEST_PROTEIN_VARIANT)]) self.assert_has_node(expected_node) self.assertEqual( 'r(HGNC:AKT1, var("c.1521_1523delCTT"), var("p.Phe508del"))', self.graph.node_to_bel(expected_node), ) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) def _help_test_rna_fusion_1(self, statement): result = self.parser.rna.parseString(statement) expected_dict = { FUNCTION: RNA, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'TMPRSS2'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'ERG'}, RANGE_5P: { FUSION_REFERENCE: 'r', FUSION_START: 1, FUSION_STOP: 79 }, RANGE_3P: { FUSION_REFERENCE: 'r', FUSION_START: 312, FUSION_STOP: 5034 } } } self.assertEqual(expected_dict, result.asDict()) expected_node = rna_fusion( rna('HGNC', 'TMPRSS2'), rna('HGNC', 'ERG'), fusion_range('r', 1, 79), fusion_range('r', 312, 5034) ) self.assert_has_node(expected_node) self.assertEqual('r(fus(HGNC:TMPRSS2, "r.1_79", HGNC:ERG, "r.312_5034"))', self.graph.node_to_bel(expected_node)) def test_rna_fusion_known_breakpoints(self): """Test RNA fusions (2.6.1) with known breakpoints (2.6.1).""" # missing quotes self._help_test_rna_fusion_1('r(fus(HGNC:TMPRSS2, r.1_79, HGNC:ERG, r.312_5034))') # correct (short form) self._help_test_rna_fusion_1('r(fus(HGNC:TMPRSS2, "r.1_79", HGNC:ERG, "r.312_5034"))') # correct (long form) self._help_test_rna_fusion_1('rnaAbundance(fusion(HGNC:TMPRSS2, "r.1_79", HGNC:ERG, "r.312_5034"))') def _help_test_rna_fusion_unspecified_breakpoints(self, statement): result = self.parser.rna.parseString(statement) expected_dict = { FUNCTION: RNA, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'TMPRSS2'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'ERG'}, RANGE_5P: { FUSION_MISSING: '?', }, RANGE_3P: { FUSION_MISSING: '?', } } } self.assertEqual(expected_dict, result.asDict()) expected_node = rna_fusion( rna('HGNC', 'TMPRSS2'), rna('HGNC', 'ERG'), ) self.assert_has_node(expected_node) self.assertEqual('r(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))', self.graph.node_to_bel(expected_node)) def test_rna_fusion_unspecified_breakpoints(self): """Test RNA fusions (2.6.1) with unspecified breakpoints.""" # legacy self._help_test_rna_fusion_unspecified_breakpoints('r(HGNC:TMPRSS2, fusion(HGNC:ERG))') # missing quotes self._help_test_rna_fusion_unspecified_breakpoints('r(fus(HGNC:TMPRSS2, ?, HGNC:ERG, ?))') # correct (short form) self._help_test_rna_fusion_unspecified_breakpoints('r(fus(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))') # correct (long form) self._help_test_rna_fusion_unspecified_breakpoints('rnaAbundance(fusion(HGNC:TMPRSS2, "?", HGNC:ERG, "?"))') def _help_test_rna_fusion_legacy_1(self, statement): result = self.parser.rna.parseString(statement) expected_dict = { FUNCTION: RNA, FUSION: { PARTNER_5P: {NAMESPACE: 'HGNC', NAME: 'BCR'}, PARTNER_3P: {NAMESPACE: 'HGNC', NAME: 'JAK2'}, RANGE_5P: { FUSION_REFERENCE: 'r', FUSION_START: '?', FUSION_STOP: 1875 }, RANGE_3P: { FUSION_REFERENCE: 'r', FUSION_START: 2626, FUSION_STOP: '?' } } } self.assertEqual(expected_dict, result.asDict()) expected_node = rna_fusion( rna('HGNC', 'BCR'), rna('HGNC', 'JAK2'), fusion_range('r', '?', 1875), fusion_range('r', 2626, '?') ) self.assert_has_node(expected_node) self.assertEqual('r(fus(HGNC:BCR, "r.?_1875", HGNC:JAK2, "r.2626_?"))', self.graph.node_to_bel(expected_node)) def test_rna_fusion_legacy_1(self): # legacy self._help_test_rna_fusion_legacy_1('r(HGNC:BCR, fus(HGNC:JAK2, 1875, 2626))') # no quotes self._help_test_rna_fusion_legacy_1('r(fus(HGNC:BCR, r.?_1875, HGNC:JAK2, r.2626_?))') # correct self._help_test_rna_fusion_legacy_1('r(fus(HGNC:BCR, "r.?_1875", HGNC:JAK2, "r.2626_?"))') def test_rna_variant_codingReference(self): """2.2.2 RNA coding reference sequence""" statement = 'r(HGNC:CFTR, var(r.1521_1523delcuu))' result = self.parser.rna.parseString(statement) expected_dict = { FUNCTION: RNA, NAMESPACE: 'HGNC', NAME: 'CFTR', VARIANTS: [hgvs('r.1521_1523delcuu')] } self.assertEqual(expected_dict, result.asDict()) parent = rna('HGNC', 'CFTR') expected_node = parent.with_variants(hgvs('r.1521_1523delcuu')) self.assert_has_node(expected_node) self.assertEqual('r(HGNC:CFTR, var("r.1521_1523delcuu"))', self.graph.node_to_bel(expected_node)) self.assert_has_node(parent) self.assert_has_edge(parent, expected_node, relation=HAS_VARIANT) class TestComplex(TestTokenParserBase): """2.1.2 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XcomplexA""" def setUp(self): self.parser.clear() self.parser.complex_abundances.setParseAction(self.parser.handle_term) def test_named_complex_singleton(self): statement = 'complex(SCOMP:"AP-1 Complex")' result = self.parser.complex_abundances.parseString(statement) expected_dict = { FUNCTION: COMPLEX, NAMESPACE: 'SCOMP', NAME: 'AP-1 Complex' } self.assertEqual(expected_dict, result.asDict()) expected_node = named_complex_abundance('SCOMP', 'AP-1 Complex') self.assert_has_node(expected_node) def test_complex_list_short(self): statement = 'complex(p(HGNC:FOS), p(HGNC:JUN))' result = self.parser.complex_abundances.parseString(statement) expected_result = [COMPLEX, [PROTEIN, 'HGNC', 'FOS'], [PROTEIN, 'HGNC', 'JUN']] self.assertEqual(expected_result, result.asList()) expected_result = { FUNCTION: COMPLEX, MEMBERS: [ { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'FOS' }, { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'JUN' } ] } self.assertEqual(expected_result, result.asDict()) child_1 = protein('HGNC', 'FOS') self.assert_has_node(child_1) child_2 = protein('HGNC', 'JUN') self.assert_has_node(child_2) expected_node = complex_abundance([child_1, child_2]) self.assert_has_node(expected_node) self.assert_has_edge(expected_node, child_1, relation=HAS_COMPONENT) self.assert_has_edge(expected_node, child_2, relation=HAS_COMPONENT) def test_complex_list_long(self): statement = 'complexAbundance(proteinAbundance(HGNC:HBP1),geneAbundance(HGNC:NCF1))' self.parser.complex_abundances.parseString(statement) class TestComposite(TestTokenParserBase): """Tests the parsing of the composite function .. seealso:: `BEL 2.0 Specification 2.1.3 `_ """ def setUp(self): self.parser.clear() self.parser.composite_abundance.setParseAction(self.parser.handle_term) def test_213a(self): """Evidence: ``IL-6 and IL-23 synergistically induce Th17 differentiation""" statement = 'composite(p(HGNC:IL6), complex(GOCC:"interleukin-23 complex"))' result = self.parser.composite_abundance.parseString(statement) expected_result = [COMPOSITE, [PROTEIN, 'HGNC', 'IL6'], [COMPLEX, 'GOCC', 'interleukin-23 complex']] self.assertEqual(expected_result, result.asList()) expected_dict = { FUNCTION: COMPOSITE, MEMBERS: [ { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'IL6' }, { FUNCTION: COMPLEX, NAMESPACE: 'GOCC', NAME: 'interleukin-23 complex' } ] } self.assertEqual(expected_dict, result.asDict()) il23 = named_complex_abundance('GOCC', 'interleukin-23 complex') il6 = protein('HGNC', 'IL6') expected_node = composite_abundance([il23, il6]) self.assert_has_node(expected_node) self.assertEqual(2, len(expected_node[MEMBERS])) self.assertEqual(il23, expected_node[MEMBERS][0]) self.assertEqual(il6, expected_node[MEMBERS][1]) self.assertEqual('composite(complex(GOCC:"interleukin-23 complex"), p(HGNC:IL6))', self.graph.node_to_bel(expected_node)) self.assertEqual(3, self.parser.graph.number_of_nodes()) self.assert_has_node(expected_node) self.assert_has_node(il23) self.assert_has_node(il6) self.assertEqual(2, self.parser.graph.number_of_edges()) class TestBiologicalProcess(TestTokenParserBase): def setUp(self): self.parser.clear() self.parser.biological_process.setParseAction(self.parser.handle_term) def test_231a(self): """""" statement = 'bp(GOBP:"cell cycle arrest")' result = self.parser.biological_process.parseString(statement) expected_result = [BIOPROCESS, 'GOBP', 'cell cycle arrest'] self.assertEqual(expected_result, result.asList()) expected_dict = { FUNCTION: BIOPROCESS, NAMESPACE: 'GOBP', NAME: 'cell cycle arrest' } self.assertEqual(expected_dict, result.asDict()) expected_node = bioprocess('GOBP', 'cell cycle arrest') self.assert_has_node(expected_node) class TestPathology(TestTokenParserBase): def setUp(self): self.parser.clear() self.parser.pathology.setParseAction(self.parser.handle_term) def test_232a(self): statement = 'pathology(MESH:adenocarcinoma)' result = self.parser.pathology.parseString(statement) expected_dict = { FUNCTION: PATHOLOGY, NAMESPACE: 'MESH', NAME: 'adenocarcinoma' } self.assertEqual(expected_dict, result.asDict()) expected_node = pathology('MESH', 'adenocarcinoma') self.assert_has_node(expected_node) self.assertEqual('path(MESH:adenocarcinoma)', self.graph.node_to_bel(expected_node)) class TestActivity(TestTokenParserBase): """Tests for molecular activity terms.""" def setUp(self): """Set up parser for testing the activity language.""" self.parser.clear() self.parser.activity.setParseAction(self.parser.handle_term) def test_activity_bare(self): statement = 'act(p(HGNC:AKT1))' result = self.parser.activity.parseString(statement) expected_result = [ACTIVITY, [PROTEIN, 'HGNC', 'AKT1']] self.assertEqual(expected_result, result.asList()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: ACTIVITY } self.assertEqual(expected_mod, mod) def test_activity_withMolecularActivityDefault(self): """Tests activity modifier with molecular activity from default BEL namespace""" statement = 'act(p(HGNC:AKT1), ma(kin))' result = self.parser.activity.parseString(statement) expected_dict = { MODIFIER: ACTIVITY, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE }, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1' } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: ACTIVITY, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE } } self.assertEqual(expected_mod, mod) def test_activity_withMolecularActivityDefaultLong(self): """Tests activity modifier with molecular activity from custom namespaced""" statement = 'act(p(HGNC:AKT1), ma(catalyticActivity))' result = self.parser.activity.parseString(statement) expected_dict = { MODIFIER: ACTIVITY, EFFECT: { NAME: 'cat', NAMESPACE: BEL_DEFAULT_NAMESPACE }, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1' } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: ACTIVITY, EFFECT: { NAME: 'cat', NAMESPACE: BEL_DEFAULT_NAMESPACE } } self.assertEqual(expected_mod, mod) def test_activity_withMolecularActivityCustom(self): """Tests activity modifier with molecular activity from custom namespaced""" statement = 'act(p(HGNC:AKT1), ma(GOMF:"catalytic activity"))' result = self.parser.activity.parseString(statement) expected_dict = { MODIFIER: ACTIVITY, EFFECT: { NAMESPACE: 'GOMF', NAME: 'catalytic activity' }, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1' } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: ACTIVITY, EFFECT: { NAMESPACE: 'GOMF', NAME: 'catalytic activity' } } self.assertEqual(expected_mod, mod) def test_activity_legacy(self): """Test BEL 1.0 style molecular activity annotation""" statement = 'kin(p(HGNC:AKT1))' result = self.parser.activity.parseString(statement) expected_dict = { MODIFIER: ACTIVITY, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE }, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1' } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: ACTIVITY, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE } } self.assertEqual(expected_mod, mod) node = protein('HGNC', 'AKT1') self.assert_has_node(node) def test_kinase_activity_on_named_complex(self): statement = 'kin(complex(FPLX:C1))' self.parser.activity.parseString(statement) def test_activity_on_named_complex(self): statement = 'act(complex(FPLX:C1), ma(kin))' self.parser.activity.parseString(statement) def test_kinase_activity_on_listed_complex(self): statement = 'kin(complex(p(HGNC:A), p(HGNC:B)))' self.parser.activity.parseString(statement) def test_activity_on_listed_complex(self): statement = 'act(complex(p(HGNC:A), p(HGNC:B)), ma(kin))' self.parser.activity.parseString(statement) class TestTranslocationPermissive(unittest.TestCase): @classmethod def setUpClass(cls): cls.graph = BELGraph() cls.parser = BELParser( cls.graph, disallow_unqualified_translocations=False, ) def setUp(self): self.parser.clear() self.parser.transformation.setParseAction(self.parser.handle_term) def assert_has_node(self, member, **kwargs): assert_has_node(self, member, self.parser.graph, **kwargs) def assert_has_edge(self, u, v, **kwargs): assert_has_edge(self, u, v, self.parser.graph, **kwargs) def test_unqualified_translocation_single(self): """translocation example""" statement = 'tloc(p(HGNC:EGFR))' result = self.parser.transformation.parseString(statement) expected_dict = { MODIFIER: TRANSLOCATION, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'EGFR' }, } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: TRANSLOCATION, } self.assertEqual(expected_mod, mod) node = protein('HGNC', 'EGFR') self.assert_has_node(node) def test_unqualified_translocation_relation(self): """Test translocation in object. 3.1.2 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XdIncreases """ update_provenance(self.parser.control_parser) statement = 'a(ADO:"Abeta_42") => tloc(a(CHEBI:"calcium(2+)"))' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: ABUNDANCE, NAMESPACE: 'ADO', NAME: 'Abeta_42' }, RELATION: DIRECTLY_INCREASES, OBJECT: { TARGET: { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'calcium(2+)' }, MODIFIER: TRANSLOCATION, } } self.assertEqual(expected_dict, result.asDict()) sub = abundance('ADO', 'Abeta_42') self.assert_has_node(sub) obj = abundance('CHEBI', 'calcium(2+)') self.assert_has_node(obj) expected_annotations = { RELATION: DIRECTLY_INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, } } self.assert_has_edge(sub, obj, **expected_annotations) class TestTransformation(TestTokenParserBase): def setUp(self): self.parser.clear() self.parser.transformation.setParseAction(self.parser.handle_term) def test_degredation_short(self): """Test the short form of degradation works""" statement = 'deg(p(HGNC:AKT1))' result = self.parser.transformation.parseString(statement) expected_result = [DEGRADATION, [PROTEIN, 'HGNC', 'AKT1']] self.assertEqual(expected_result, result.asList()) expected_dict = { MODIFIER: DEGRADATION, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1' } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: DEGRADATION, } self.assertEqual(expected_mod, mod) def test_degradation_long(self): """Test the long form of degradation works""" statement = 'degradation(p(HGNC:EGFR))' result = self.parser.transformation.parseString(statement) expected_dict = { MODIFIER: DEGRADATION, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'EGFR' } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: DEGRADATION, } self.assertEqual(expected_mod, mod) node = protein('HGNC', 'EGFR') self.assert_has_node(node) def test_translocation_standard(self): """translocation example""" statement = 'tloc(p(HGNC:EGFR), fromLoc(GOCC:"cell surface"), toLoc(GOCC:endosome))' result = self.parser.transformation.parseString(statement) expected_dict = { MODIFIER: TRANSLOCATION, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'EGFR' }, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'endosome'} } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = translocation( from_loc=entity(namespace='GOCC', name='cell surface'), to_loc=entity(namespace='GOCC', name='endosome'), ) self.assertEqual(expected_mod, mod) node = protein('HGNC', 'EGFR') self.assert_has_node(node) def test_translocation_bare(self): """translocation example""" statement = 'tloc(p(HGNC:EGFR), GOCC:"cell surface", GOCC:endosome)' result = self.parser.transformation.parseString(statement) expected_dict = { MODIFIER: TRANSLOCATION, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'EGFR' }, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'endosome'} } } self.assertEqual(expected_dict, result.asDict()) mod = modifier_po_to_dict(result) expected_mod = { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'GOCC', NAME: 'cell surface'}, TO_LOC: {NAMESPACE: 'GOCC', NAME: 'endosome'} } } self.assertEqual(expected_mod, mod) node = protein('HGNC', 'EGFR') self.assert_has_node(node) def test_unqualified_translocation_strict(self): """Fail on an improperly written single argument translocation""" statement = 'tloc(a(NS:"T-Lymphocytes"))' with self.assertRaises(MalformedTranslocationWarning): self.parser.translocation.parseString(statement) def test_translocation_secretion(self): """cell secretion short form""" statement = 'sec(p(HGNC:EGFR))' result = self.parser.transformation.parseString(statement) expected_result = ['CellSecretion', [PROTEIN, 'HGNC', 'EGFR']] self.assertEqual(expected_result, result.asList()) mod = modifier_po_to_dict(result) expected_mod = secretion() self.assertEqual(expected_mod, mod) node = protein('HGNC', 'EGFR') self.assert_has_node(node) def test_translocation_surface(self): """cell surface expression short form""" statement = 'surf(p(HGNC:EGFR))' result = self.parser.transformation.parseString(statement) expected_result = ['CellSurfaceExpression', [PROTEIN, 'HGNC', 'EGFR']] self.assertEqual(expected_result, result.asList()) expected_mod = cell_surface_expression() self.assertEqual(expected_mod, modifier_po_to_dict(result)) node = protein('HGNC', 'EGFR') self.assert_has_node(node) def test_reaction_1(self): statement = 'rxn(reactants(a(CHEBI:superoxide)), products(a(CHEBI:"hydrogen peroxide"), a(CHEBI:oxygen)))' result = self.parser.transformation.parseString(statement) expected_dict = { FUNCTION: REACTION, REACTANTS: [ { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'superoxide' } ], PRODUCTS: [ { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'hydrogen peroxide' }, { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'oxygen' } ] } self.assertEqual(expected_dict, result.asDict()) superoxide_node = abundance('CHEBI', 'superoxide') hydrogen_peroxide = abundance('CHEBI', 'hydrogen peroxide') oxygen_node = abundance('CHEBI', 'oxygen') expected_node = reaction([superoxide_node], [hydrogen_peroxide, oxygen_node]) self.assert_has_node(expected_node) self.assertEqual(statement, self.graph.node_to_bel(expected_node)) self.assert_has_node(superoxide_node) self.assert_has_edge(expected_node, superoxide_node) self.assert_has_node(hydrogen_peroxide) self.assert_has_edge(expected_node, hydrogen_peroxide) self.assert_has_node(oxygen_node) self.assert_has_edge(expected_node, oxygen_node) def test_reaction_2(self): statement = 'rxn(reactants(p(HGNC:APP)), products(p(HGNC:APP, frag(672_713))))' self.parser.transformation.parseString(statement) app = hgnc('APP') self.assertIn(app, self.graph) amyloid_beta_42 = app.with_variants(Fragment(start=672, stop=713)) self.assertIn(amyloid_beta_42, self.graph) expected_node = reaction(app, amyloid_beta_42) self.assertIn(expected_node, self.graph) def test_clearance(self): """Tests that after adding things, the graph and parser can be cleared properly""" s1 = 'surf(p(HGNC:EGFR))' s2 = 'rxn(reactants(a(CHEBI:superoxide)),products(a(CHEBI:"hydrogen peroxide"), a(CHEBI:"oxygen")))' self.parser.transformation.parseString(s1) self.parser.transformation.parseString(s2) self.assertGreater(self.parser.graph.number_of_nodes(), 0) self.assertGreater(self.parser.graph.number_of_edges(), 0) self.parser.clear() self.assertEqual(0, self.parser.graph.number_of_nodes()) self.assertEqual(0, self.parser.graph.number_of_edges()) self.assertEqual(0, len(self.parser.control_parser.annotations)) self.assertEqual(0, len(self.parser.control_parser.citation)) class TestSemantics(unittest.TestCase): def test_lenient_semantic_no_failure(self): graph = BELGraph() parser = BELParser(graph, allow_naked_names=True) update_provenance(parser.control_parser) parser.bel_term.addParseAction(parser.handle_term) parser.bel_term.parseString('bp(ABASD)') node_data = bioprocess(namespace=DIRTY, name='ABASD') self.assertIn(node_data, graph) pybel-0.12.1/tests/test_parse/test_parse_bel_relations.py000066400000000000000000001061171334645200200236530ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging import unittest from pyparsing import ParseException from pybel import BELGraph from pybel.canonicalize import edge_to_bel from pybel.constants import ( ABUNDANCE, ACTIVITY, ANNOTATIONS, BEL_DEFAULT_NAMESPACE, BIOPROCESS, CITATION, COMPLEX, COMPOSITE, DECREASES, DIRECTLY_DECREASES, DIRECTLY_INCREASES, EFFECT, EQUIVALENT_TO, EVIDENCE, FROM_LOC, FUNCTION, GENE, GMOD, HAS_COMPONENT, HAS_MEMBER, HAS_PRODUCT, HAS_REACTANT, HAS_VARIANT, HGVS, IDENTIFIER, INCREASES, IS_A, KIND, LOCATION, MEMBERS, MODIFIER, NAME, NAMESPACE, NEGATIVE_CORRELATION, OBJECT, ORTHOLOGOUS, PART_OF, PATHOLOGY, PRODUCTS, PROTEIN, REACTANTS, REACTION, REGULATES, RELATION, RNA, SUBJECT, SUBPROCESS_OF, TARGET, TO_LOC, TRANSCRIBED_TO, TRANSLATED_TO, TRANSLOCATION, VARIANTS, ) from pybel.dsl import ( abundance, activity, bioprocess, complex_abundance, composite_abundance, entity, gene, hgvs, pmod, protein, reaction, rna, Pathology, named_complex_abundance, gmod, ) from pybel.dsl.namespaces import hgnc from pybel.parser import BELParser from pybel.parser.exc import ( MissingNamespaceNameWarning, NestedRelationWarning, RelabelWarning, UndefinedNamespaceWarning, ) from tests.constants import TestTokenParserBase, test_citation_dict, test_evidence_text log = logging.getLogger(__name__) class TestRelations(TestTokenParserBase): @classmethod def setUpClass(cls): super(TestRelations, cls).setUpClass() cls.parser.relation.streamline() def setUp(self): super(TestRelations, self).setUp() self.add_default_provenance() def test_ensure_no_dup_nodes(self): """Ensure node isn't added twice, even if from different statements""" self.parser.gene.addParseAction(self.parser.handle_term) result = self.parser.bel_term.parseString('g(HGNC:AKT1)') expected_result_dict = { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'AKT1' } self.assertEqual(expected_result_dict, result.asDict()) self.parser.degradation.addParseAction(self.parser.handle_term) self.parser.degradation.parseString('deg(g(HGNC:AKT1))') akt1_gene = gene('HGNC', 'AKT1') self.assertEqual(1, self.parser.graph.number_of_nodes()) self.assert_has_node(akt1_gene) def test_singleton(self): """Test singleton composite in subject.""" statement = 'composite(p(HGNC:CASP8),p(HGNC:FADD),a(ADO:"Abeta_42"))' result = self.parser.statement.parseString(statement) expected = [ COMPOSITE, [PROTEIN, 'HGNC', 'CASP8'], [PROTEIN, 'HGNC', 'FADD'], [ABUNDANCE, 'ADO', 'Abeta_42'] ] self.assertEqual(expected, result.asList()) sub_member_1 = protein('HGNC', 'CASP8') self.assert_has_node(sub_member_1) sub_member_2 = protein('HGNC', 'FADD') self.assert_has_node(sub_member_2) sub_member_3 = abundance('ADO', 'Abeta_42') self.assert_has_node(sub_member_3) sub = composite_abundance([sub_member_1, sub_member_2, sub_member_3]) self.assert_has_node(sub) self.assert_has_edge(sub, sub_member_1, relation=HAS_COMPONENT) self.assert_has_edge(sub, sub_member_2, relation=HAS_COMPONENT) def test_predicate_failure(self): """Checks that if there's a problem with the relation/object, that an error gets thrown""" statement = 'composite(p(HGNC:CASP8),p(HGNC:FADD),a(ADO:"Abeta_42")) -> nope(GOBP:"neuron apoptotic process")' with self.assertRaises(ParseException): self.parser.relation.parseString(statement) def test_increases(self): """Test composite in subject. See BEL 2.0 specification `3.1.1 `_ """ statement = 'composite(p(HGNC:CASP8),p(HGNC:FADD),a(ADO:"Abeta_42")) -> bp(GOBP:"neuron apoptotic process")' result = self.parser.relation.parseString(statement) expected = [ [COMPOSITE, [PROTEIN, 'HGNC', 'CASP8'], [PROTEIN, 'HGNC', 'FADD'], [ABUNDANCE, 'ADO', 'Abeta_42']], INCREASES, [BIOPROCESS, 'GOBP', 'neuron apoptotic process'] ] self.assertEqual(expected, result.asList()) sub_member_1 = protein('HGNC', 'CASP8') self.assert_has_node(sub_member_1) sub_member_2 = protein('HGNC', 'FADD') self.assert_has_node(sub_member_2) sub_member_3 = abundance('ADO', 'Abeta_42') self.assert_has_node(sub_member_3) sub = composite_abundance([sub_member_1, sub_member_2, sub_member_3]) self.assert_has_node(sub) self.assert_has_edge(sub, sub_member_1, relation=HAS_COMPONENT) self.assert_has_edge(sub, sub_member_2, relation=HAS_COMPONENT) self.assert_has_edge(sub, sub_member_3, relation=HAS_COMPONENT) obj = bioprocess('GOBP', 'neuron apoptotic process') self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=INCREASES) def test_directlyIncreases_withTlocObject(self): """Test translocation in object. See BEL 2.0 specification `3.1.2 `_ """ statement = 'a(ADO:"Abeta_42") => tloc(a(CHEBI:"calcium(2+)"),fromLoc(MESHCS:"Cell Membrane"),' \ 'toLoc(MESHCS:"Intracellular Space"))' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: ABUNDANCE, NAMESPACE: 'ADO', NAME: 'Abeta_42' }, RELATION: DIRECTLY_INCREASES, OBJECT: { TARGET: { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'calcium(2+)' }, MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'MESHCS', NAME: 'Cell Membrane'}, TO_LOC: {NAMESPACE: 'MESHCS', NAME: 'Intracellular Space'} } } } self.assertEqual(expected_dict, result.asDict()) sub = abundance('ADO', 'Abeta_42') self.assert_has_node(sub) obj = abundance('CHEBI', 'calcium(2+)') self.assert_has_node(obj) expected_annotations = { RELATION: DIRECTLY_INCREASES, OBJECT: { MODIFIER: TRANSLOCATION, EFFECT: { FROM_LOC: {NAMESPACE: 'MESHCS', NAME: 'Cell Membrane'}, TO_LOC: {NAMESPACE: 'MESHCS', NAME: 'Intracellular Space'} } } } self.assert_has_edge(sub, obj, **expected_annotations) def test_decreases(self): """Test parsing a decreases relation with a reaction. 3.1.3 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#Xdecreases """ statement = 'pep(p(SFAM:"CAPN Family", location(GOCC:intracellular))) -| reaction(reactants(p(HGNC:CDK5R1)),products(p(HGNC:CDK5)))' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { MODIFIER: ACTIVITY, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'SFAM', NAME: 'CAPN Family', LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'} }, EFFECT: { NAME: 'pep', NAMESPACE: BEL_DEFAULT_NAMESPACE }, }, RELATION: 'decreases', OBJECT: { FUNCTION: REACTION, REACTANTS: [ {FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'CDK5R1'} ], PRODUCTS: [ {FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'CDK5'} ] } } self.assertEqual(expected_dict, result.asDict()) sub = protein('SFAM', 'CAPN Family') self.assert_has_node(sub) obj_member_1 = protein('HGNC', 'CDK5R1') self.assert_has_node(obj_member_1) obj_member_2 = protein('HGNC', 'CDK5') self.assert_has_node(obj_member_2) obj = reaction(reactants=[obj_member_1], products=[obj_member_2]) self.assert_has_node(obj) self.assert_has_edge(obj, obj_member_1, relation=HAS_REACTANT) self.assert_has_edge(obj, obj_member_2, relation=HAS_PRODUCT) expected_edge_attributes = { RELATION: DECREASES, SUBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAME: 'pep', NAMESPACE: BEL_DEFAULT_NAMESPACE, }, LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular', } } } self.assertEqual(expected_edge_attributes[SUBJECT], activity(name='pep', location=entity(name='intracellular', namespace='GOCC'))) self.assert_has_edge(sub, obj, **expected_edge_attributes) def test_directlyDecreases(self): """ 3.1.4 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XdDecreases Tests simple triple""" statement = 'proteinAbundance(HGNC:CAT, location(GOCC:intracellular)) directlyDecreases abundance(CHEBI:"hydrogen peroxide")' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'CAT', LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'} }, RELATION: 'directlyDecreases', OBJECT: { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'hydrogen peroxide' } } self.assertEqual(expected_dict, result.asDict()) sub = protein('HGNC', 'CAT') self.assert_has_node(sub) obj = abundance('CHEBI', 'hydrogen peroxide') self.assert_has_node(obj) expected_attrs = { SUBJECT: { LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'} }, RELATION: 'directlyDecreases', } self.assert_has_edge(sub, obj, **expected_attrs) def test_directlyDecreases_annotationExpansion(self): """ 3.1.4 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XdDecreases Tests simple triple""" statement = 'g(HGNC:CAT, location(GOCC:intracellular)) directlyDecreases abundance(CHEBI:"hydrogen peroxide")' annotations = { 'ListAnnotation': {'a', 'b'}, 'ScalarAnnotation': {'c'} } self.parser.control_parser.annotations.update(annotations) result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'CAT', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } }, RELATION: DIRECTLY_DECREASES, OBJECT: { FUNCTION: ABUNDANCE, NAMESPACE: 'CHEBI', NAME: 'hydrogen peroxide' } } self.assertEqual(expected_dict, result.asDict()) sub = gene('HGNC', 'CAT') self.assert_has_node(sub) obj = abundance('CHEBI', 'hydrogen peroxide') self.assert_has_node(obj) expected_attrs = { SUBJECT: { LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } }, RELATION: DIRECTLY_DECREASES, CITATION: test_citation_dict, EVIDENCE: test_evidence_text, ANNOTATIONS: { 'ListAnnotation': {'a': True, 'b': True}, 'ScalarAnnotation': {'c': True} } } self.assert_has_edge(sub, obj, **expected_attrs) def test_rateLimitingStepOf_subjectActivity(self): """3.1.5 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_ratelimitingstepof""" statement = 'act(p(HGNC:HMGCR), ma(cat)) rateLimitingStepOf bp(GOBP:"cholesterol biosynthetic process")' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { MODIFIER: ACTIVITY, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'HMGCR' }, EFFECT: { NAME: 'cat', NAMESPACE: BEL_DEFAULT_NAMESPACE }, }, RELATION: 'rateLimitingStepOf', OBJECT: { FUNCTION: BIOPROCESS, NAMESPACE: 'GOBP', NAME: 'cholesterol biosynthetic process' } } self.assertEqual(expected_dict, result.asDict()) sub = protein('HGNC', 'HMGCR') self.assert_has_node(sub) obj = bioprocess('GOBP', 'cholesterol biosynthetic process') self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=expected_dict[RELATION]) def test_cnc_with_subject_variant(self): """Test a causesNoChange relationship with a variant in the subject. See also: 3.1.6 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#Xcnc """ statement = 'g(HGNC:APP,sub(G,275341,C)) cnc path(MESHD:"Alzheimer Disease")' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'APP', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'c.275341G>C' } ] }, RELATION: 'causesNoChange', OBJECT: { FUNCTION: PATHOLOGY, NAMESPACE: 'MESHD', NAME: 'Alzheimer Disease' } } self.assertEqual(expected_dict, result.asDict()) app_gene = gene(namespace='HGNC', name='APP') self.assert_has_node(app_gene) sub = app_gene.with_variants(hgvs('c.275341G>C')) self.assert_has_node(sub) obj = Pathology('MESHD', 'Alzheimer Disease') self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=expected_dict[RELATION]) def test_regulates_with_multiple_nnotations(self): """ 3.1.7 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_regulates_reg Test nested definitions""" statement = 'pep(complex(p(HGNC:F3),p(HGNC:F7))) regulates pep(p(HGNC:F9))' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAME: 'pep', NAMESPACE: BEL_DEFAULT_NAMESPACE }, TARGET: { FUNCTION: COMPLEX, MEMBERS: [ {FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'F3'}, {FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'F7'} ] } }, RELATION: REGULATES, OBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAME: 'pep', NAMESPACE: BEL_DEFAULT_NAMESPACE }, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'F9' } } } self.assertEqual(expected_dict, result.asDict()) sub_member_1 = protein('HGNC', 'F3') self.assert_has_node(sub_member_1) sub_member_2 = protein('HGNC', 'F7') self.assert_has_node(sub_member_2) sub = complex_abundance([sub_member_1, sub_member_2]) self.assert_has_node(sub) self.assert_has_edge(sub, sub_member_1) self.assert_has_edge(sub, sub_member_2) obj = protein('HGNC', 'F9') self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=expected_dict[RELATION]) def test_nested_failure(self): """ 3.1 \ Test nested statement""" statement = 'p(HGNC:CAT) -| (a(CHEBI:"hydrogen peroxide") -> bp(GO:"apoptotic process"))' with self.assertRaises(NestedRelationWarning): self.parser.relation.parseString(statement) def test_nested_lenient(self): """ 3.1 \ Test nested statement""" statement = 'p(HGNC:CAT) -| (a(CHEBI:"hydrogen peroxide") -> bp(GO:"apoptotic process"))' self.parser.allow_nested = True self.parser.relation.parseString(statement) cat = protein('HGNC', 'CAT') h2o2 = abundance('CHEBI', "hydrogen peroxide") apoptosis = bioprocess('GO', "apoptotic process") self.assert_has_edge(cat, h2o2) self.assert_has_edge(h2o2, apoptosis) self.parser.lenient = False def test_negativeCorrelation_withObjectVariant(self): """ 3.2.1 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XnegCor Test phosphoralation tag""" statement = 'kin(p(SFAM:"GSK3 Family")) neg p(HGNC:MAPT,pmod(P))' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { MODIFIER: ACTIVITY, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE }, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'SFAM', NAME: 'GSK3 Family' } }, RELATION: NEGATIVE_CORRELATION, OBJECT: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'MAPT', VARIANTS: [pmod('Ph')] } } self.assertEqual(expected_dict, result.asDict()) sub = protein('SFAM', 'GSK3 Family') self.assert_has_node(sub) obj = protein('HGNC', 'MAPT', variants=pmod('Ph')) self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=expected_dict[RELATION]) self.assert_has_edge(obj, sub, relation=expected_dict[RELATION]) def test_positiveCorrelation_withSelfReferential(self): """ 3.2.2 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#XposCor Self-referential relationships""" statement = 'p(HGNC:GSK3B, pmod(P, S, 9)) pos act(p(HGNC:GSK3B), ma(kin))' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B', VARIANTS: [pmod('Ph', position=9, code='Ser')] }, RELATION: 'positiveCorrelation', OBJECT: { MODIFIER: ACTIVITY, TARGET: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'GSK3B' }, EFFECT: { NAME: 'kin', NAMESPACE: BEL_DEFAULT_NAMESPACE } }, } self.assertEqual(expected_dict, result.asDict()) subject_node = protein('HGNC', 'GSK3B', variants=pmod('Ph', code='Ser', position=9)) self.assert_has_node(subject_node) object_node = protein('HGNC', 'GSK3B') self.assert_has_node(object_node) self.assert_has_edge(subject_node, object_node, relation=expected_dict[RELATION]) self.assert_has_edge(object_node, subject_node, relation=expected_dict[RELATION]) def test_orthologous(self): """ 3.3.1 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_orthologous """ statement = 'g(HGNC:AKT1) orthologous g(MGI:AKT1)' result = self.parser.relation.parseString(statement) expected_result = [[GENE, 'HGNC', 'AKT1'], ORTHOLOGOUS, [GENE, 'MGI', 'AKT1']] self.assertEqual(expected_result, result.asList()) sub = gene('HGNC', 'AKT1') self.assert_has_node(sub) obj = gene('MGI', 'AKT1') self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=ORTHOLOGOUS) self.assert_has_edge(obj, sub, relation=ORTHOLOGOUS) def test_transcription(self): """ 3.3.2 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_transcribedto """ statement = 'g(HGNC:AKT1) :> r(HGNC:AKT1)' result = self.parser.relation.parseString(statement) expected_result = [[GENE, 'HGNC', 'AKT1'], TRANSCRIBED_TO, [RNA, 'HGNC', 'AKT1']] self.assertEqual(expected_result, result.asList()) sub = gene('HGNC', 'AKT1') self.assert_has_node(sub) obj = rna('HGNC', 'AKT1') self.assert_has_node(obj) self.assert_has_edge(sub, obj, **{RELATION: TRANSCRIBED_TO}) def test_translation(self): """ 3.3.3 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_translatedto """ statement = 'r(HGNC:AKT1,loc(GOCC:intracellular)) >> p(HGNC:AKT1)' result = self.parser.relation.parseString(statement) # [[RNA, ['HGNC', 'AKT1']], TRANSLATED_TO, [PROTEIN, ['HGNC', 'AKT1']]] expected_result = { SUBJECT: { FUNCTION: RNA, NAMESPACE: 'HGNC', NAME: 'AKT1', LOCATION: { NAMESPACE: 'GOCC', NAME: 'intracellular' } }, RELATION: TRANSLATED_TO, OBJECT: { FUNCTION: PROTEIN, NAMESPACE: 'HGNC', NAME: 'AKT1', } } self.assertEqual(expected_result, result.asDict()) self.assertEqual(2, self.graph.number_of_nodes()) source = rna(name='AKT1', namespace='HGNC') self.assertIn(source, self.graph) target = protein(name='AKT1', namespace='HGNC') self.assertIn(target, self.graph) self.assertEqual(1, self.graph.number_of_edges()) self.assertTrue(self.graph.has_edge(source, target)) key_data = self.parser.graph[source][target] self.assertEqual(1, len(key_data)) key = list(key_data)[0] data = key_data[key] self.assertIn(RELATION, data) self.assertEqual(TRANSLATED_TO, data[RELATION]) calculated_edge_bel = edge_to_bel(source, target, data=data) self.assertEqual('r(HGNC:AKT1, loc(GOCC:intracellular)) translatedTo p(HGNC:AKT1)', calculated_edge_bel) def test_component_list(self): s = 'complex(SCOMP:"C1 Complex") hasComponents list(p(HGNC:C1QB), p(HGNC:C1S))' result = self.parser.relation.parseString(s) expected_result_list = [ [COMPLEX, 'SCOMP', 'C1 Complex'], 'hasComponents', [ [PROTEIN, 'HGNC', 'C1QB'], [PROTEIN, 'HGNC', 'C1S'] ] ] self.assertEqual(expected_result_list, result.asList()) sub = named_complex_abundance('SCOMP', 'C1 Complex') self.assert_has_node(sub) child_1 = hgnc('C1QB') self.assert_has_node(child_1) self.assert_has_edge(sub, child_1, **{RELATION: HAS_COMPONENT}) child_2 = hgnc('C1S') self.assert_has_node(child_2) self.assert_has_edge(sub, child_2, **{RELATION: HAS_COMPONENT}) def test_member_list(self): """ 3.4.2 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_hasmembers """ statement = 'p(PKC:a) hasMembers list(p(HGNC:PRKCA), p(HGNC:PRKCB), p(HGNC:PRKCD), p(HGNC:PRKCE))' result = self.parser.relation.parseString(statement) expected_result = [ [PROTEIN, 'PKC', 'a'], 'hasMembers', [ [PROTEIN, 'HGNC', 'PRKCA'], [PROTEIN, 'HGNC', 'PRKCB'], [PROTEIN, 'HGNC', 'PRKCD'], [PROTEIN, 'HGNC', 'PRKCE'] ] ] self.assertEqual(expected_result, result.asList()) sub = protein('PKC', 'a') obj1 = protein('HGNC', 'PRKCA') obj2 = protein('HGNC', 'PRKCB') obj3 = protein('HGNC', 'PRKCD') obj4 = protein('HGNC', 'PRKCE') self.assert_has_node(sub) self.assert_has_node(obj1) self.assert_has_edge(sub, obj1, relation=HAS_MEMBER) self.assert_has_node(obj2) self.assert_has_edge(sub, obj2, relation=HAS_MEMBER) self.assert_has_node(obj3) self.assert_has_edge(sub, obj3, relation=HAS_MEMBER) self.assert_has_node(obj4) self.assert_has_edge(sub, obj4, relation=HAS_MEMBER) def test_isA(self): """ 3.4.5 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_isa """ statement = 'pathology(MESH:Psoriasis) isA pathology(MESH:"Skin Diseases")' result = self.parser.relation.parseString(statement) expected_result = [[PATHOLOGY, 'MESH', 'Psoriasis'], 'isA', [PATHOLOGY, 'MESH', 'Skin Diseases']] self.assertEqual(expected_result, result.asList()) sub = Pathology('MESH', 'Psoriasis') self.assert_has_node(sub) obj = Pathology('MESH', 'Skin Diseases') self.assert_has_node(obj) self.assert_has_edge(sub, obj, relation=IS_A) def test_label_1(self): statement = 'g(HGNC:APOE, var(c.526C>T), var(c.388T>C)) labeled "APOE E2"' result = self.parser.relation.parseString(statement) expected_dict = { SUBJECT: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'APOE', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'c.526C>T' }, { KIND: HGVS, IDENTIFIER: 'c.388T>C' } ] }, OBJECT: 'APOE E2' } self.assertEqual(expected_dict, result.asDict()) expected_node = gene('HGNC', 'APOE', variants=[hgvs('c.526C>T'), hgvs('c.388T>C')]) self.assert_has_node(expected_node) self.assertTrue(self.parser.graph.has_node_description(expected_node)) self.assertEqual('APOE E2', self.parser.graph.get_node_description(expected_node)) def test_raise_on_relabel(self): s1 = 'g(HGNC:APOE, var(c.526C>T), var(c.388T>C)) labeled "APOE E2"' s2 = 'g(HGNC:APOE, var(c.526C>T), var(c.388T>C)) labeled "APOE E2 Variant"' self.parser.relation.parseString(s1) with self.assertRaises(RelabelWarning): self.parser.relation.parseString(s2) def test_equivalentTo(self): statement = 'g(dbSNP:"rs123456") eq g(HGNC:YFG, var(c.123G>A))' result = self.parser.relation.parseString(statement) expected_result = { SUBJECT: { FUNCTION: GENE, NAMESPACE: 'dbSNP', NAME: 'rs123456', }, RELATION: EQUIVALENT_TO, OBJECT: { FUNCTION: GENE, NAMESPACE: 'HGNC', NAME: 'YFG', VARIANTS: [ { KIND: HGVS, IDENTIFIER: 'c.123G>A' } ] } } self.assertEqual(expected_result, result.asDict()) sub = gene('dbSNP', 'rs123456') self.assert_has_node(sub) obj = gene('HGNC', 'YFG', variants=hgvs('c.123G>A')) self.assert_has_node(obj) self.assert_has_edge(sub, obj, **{RELATION: EQUIVALENT_TO}) self.assert_has_edge(obj, sub, **{RELATION: EQUIVALENT_TO}) def test_partOf(self): statement = 'a(UBERON:"corpus striatum") partOf a(UBERON:"basal ganglion")' self.parser.relation.parseString(statement) corpus_striatum = abundance(namespace='UBERON', name='corpus striatum') basal_ganglion = abundance(namespace='UBERON', name='basal ganglion') self.assert_has_node(corpus_striatum) self.assert_has_node(basal_ganglion) self.assert_has_edge(corpus_striatum, basal_ganglion, relation=PART_OF) v = list(self.parser.graph[corpus_striatum][basal_ganglion].values()) self.assertEqual(1, len(v)) v = v[0] self.assertIn(RELATION, v) self.assertEqual(PART_OF, v[RELATION]) def test_subProcessOf(self): """ 3.4.6 http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_subprocessof """ statement = 'rxn(reactants(a(CHEBI:"(S)-3-hydroxy-3-methylglutaryl-CoA"),a(CHEBI:NADPH), \ a(CHEBI:hydron)),products(a(CHEBI:mevalonate), a(CHEBI:"CoA-SH"), a(CHEBI:"NADP(+)"))) \ subProcessOf bp(GOBP:"cholesterol biosynthetic process")' result = self.parser.relation.parseString(statement) expected_result = [ [ REACTION, [ [ABUNDANCE, 'CHEBI', '(S)-3-hydroxy-3-methylglutaryl-CoA'], [ABUNDANCE, 'CHEBI', 'NADPH'], [ABUNDANCE, 'CHEBI', 'hydron'], ], [ [ABUNDANCE, 'CHEBI', 'mevalonate'], [ABUNDANCE, 'CHEBI', 'CoA-SH'], [ABUNDANCE, 'CHEBI', 'NADP(+)'] ] ], SUBPROCESS_OF, [BIOPROCESS, 'GOBP', 'cholesterol biosynthetic process']] self.assertEqual(expected_result, result.asList()) sub_reactant_1 = abundance('CHEBI', '(S)-3-hydroxy-3-methylglutaryl-CoA') sub_reactant_2 = abundance('CHEBI', 'NADPH') sub_reactant_3 = abundance('CHEBI', 'hydron') sub_product_1 = abundance('CHEBI', 'mevalonate') sub_product_2 = abundance('CHEBI', 'CoA-SH') sub_product_3 = abundance('CHEBI', 'NADP(+)') self.assert_has_node(sub_reactant_1) self.assert_has_node(sub_reactant_2) self.assert_has_node(sub_reactant_3) self.assert_has_node(sub_product_1) self.assert_has_node(sub_product_2) self.assert_has_node(sub_product_3) sub = reaction([sub_reactant_1, sub_reactant_2, sub_reactant_3], [sub_product_1, sub_product_2, sub_product_3]) self.assert_has_edge(sub, sub_reactant_1, relation=HAS_REACTANT) self.assert_has_edge(sub, sub_reactant_2, relation=HAS_REACTANT) self.assert_has_edge(sub, sub_reactant_3, relation=HAS_REACTANT) self.assert_has_edge(sub, sub_product_1, relation=HAS_PRODUCT) self.assert_has_edge(sub, sub_product_2, relation=HAS_PRODUCT) self.assert_has_edge(sub, sub_product_3, relation=HAS_PRODUCT) obj = bioprocess('GOBP', 'cholesterol biosynthetic process') self.assert_has_node(obj) self.assert_has_edge(sub, obj, **{RELATION: 'subProcessOf'}) def test_extra_1(self): statement = 'abundance(CHEBI:"nitric oxide") increases cellSurfaceExpression(complexAbundance(proteinAbundance(HGNC:ITGAV),proteinAbundance(HGNC:ITGB3)))' self.parser.relation.parseString(statement) def test_has_variant(self): statement = 'g(HGNC:AKT1) hasVariant g(HGNC:AKT1, gmod(M))' self.parser.relation.parseString(statement) expected_parent = gene('HGNC', 'AKT1') expected_child = expected_parent.with_variants(gmod('Me')) self.assert_has_node(expected_parent) self.assert_has_node(expected_child) self.assertEqual('g(HGNC:AKT1)', self.graph.node_to_bel(expected_parent)) self.assertEqual('g(HGNC:AKT1, gmod(Me))', self.graph.node_to_bel(expected_child)) self.assert_has_edge(expected_parent, expected_child, **{RELATION: HAS_VARIANT}) def test_has_reaction_component(self): statement = 'rxn(reactants(a(CHEBI:"(S)-3-hydroxy-3-methylglutaryl-CoA"),a(CHEBI:NADPH), \ a(CHEBI:hydron)),products(a(CHEBI:mevalonate), a(CHEBI:"CoA-SH"), a(CHEBI:"NADP(+)"))) \ hasReactant a(CHEBI:"(S)-3-hydroxy-3-methylglutaryl-CoA")' result = self.parser.relation.parseString(statement) sub_reactant_1 = abundance('CHEBI', '(S)-3-hydroxy-3-methylglutaryl-CoA') sub_reactant_2 = abundance('CHEBI', 'NADPH') sub_reactant_3 = abundance('CHEBI', 'hydron') sub_product_1 = abundance('CHEBI', 'mevalonate') sub_product_2 = abundance('CHEBI', 'CoA-SH') sub_product_3 = abundance('CHEBI', 'NADP(+)') self.assert_has_node(sub_reactant_1) self.assert_has_node(sub_reactant_2) self.assert_has_node(sub_reactant_3) self.assert_has_node(sub_product_1) self.assert_has_node(sub_product_2) self.assert_has_node(sub_product_3) sub = reaction( reactants=[sub_reactant_1, sub_reactant_2, sub_reactant_3], products=[sub_product_1, sub_product_2, sub_product_3] ) self.assert_has_node(sub) self.assert_has_edge(sub, sub_reactant_1, relation=HAS_REACTANT) self.assert_has_edge(sub, sub_reactant_2, relation=HAS_REACTANT) self.assert_has_edge(sub, sub_reactant_3, relation=HAS_REACTANT) self.assert_has_edge(sub, sub_product_1, relation=HAS_PRODUCT) self.assert_has_edge(sub, sub_product_2, relation=HAS_PRODUCT) self.assert_has_edge(sub, sub_product_3, relation=HAS_PRODUCT) class TestCustom(unittest.TestCase): def setUp(self): graph = BELGraph() namespace_dict = { 'HGNC': { 'AKT1': 'GRP', 'YFG': 'GRP' }, 'MESHCS': { 'nucleus': 'A' } } self.parser = BELParser(graph, namespace_dict=namespace_dict, autostreamline=False) def test_tloc_undefined_namespace(self): s = 'tloc(p(HGNC:AKT1), fromLoc(MESHCS:nucleus), toLoc(MISSING:"undefined"))' with self.assertRaises(UndefinedNamespaceWarning): self.parser.translocation.parseString(s) def test_tloc_undefined_name(self): s = 'tloc(p(HGNC:AKT1), fromLoc(MESHCS:nucleus), toLoc(MESHCS:"undefined"))' with self.assertRaises(MissingNamespaceNameWarning): self.parser.translocation.parseString(s) def test_location_undefined_namespace(self): s = 'p(HGNC:AKT1, loc(MISSING:"nucleus")' with self.assertRaises(UndefinedNamespaceWarning): self.parser.protein.parseString(s) def test_location_undefined_name(self): s = 'p(HGNC:AKT1, loc(MESHCS:"undefined")' with self.assertRaises(MissingNamespaceNameWarning): self.parser.protein.parseString(s) pybel-0.12.1/tests/test_parse/test_parse_bel_variants.py000066400000000000000000000374221334645200200235040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test parsing variants.""" import logging import unittest from pybel.constants import ( BEL_DEFAULT_NAMESPACE, FRAGMENT, FRAGMENT_DESCRIPTION, FRAGMENT_MISSING, FRAGMENT_START, FRAGMENT_STOP, FUSION_MISSING, FUSION_REFERENCE, FUSION_START, FUSION_STOP, GMOD, IDENTIFIER, KIND, LOCATION, NAME, NAMESPACE, PARTNER_3P, PARTNER_5P, PMOD, PMOD_CODE, PMOD_POSITION, RANGE_3P, RANGE_5P, ) from pybel.dsl import entity, gmod, hgvs, pmod from pybel.parser.modifiers import ( get_fragment_language, get_fusion_language, get_gene_modification_language, get_gene_substitution_language, get_hgvs_language, get_location_language, get_protein_modification_language, get_protein_substitution_language, get_truncation_language, ) from pybel.parser.parse_identifier import IdentifierParser log = logging.getLogger(__name__) class TestHGVSParser(unittest.TestCase): def setUp(self): self.parser = get_hgvs_language() def test_protein_del(self): statement = 'variant(p.Phe508del)' expected = hgvs('p.Phe508del') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_protein_del_quoted(self): statement = 'variant("p.Phe508del")' expected = hgvs('p.Phe508del') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_protein_mut(self): statement = 'var(p.Gly576Ala)' expected = hgvs('p.Gly576Ala') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_unspecified(self): statement = 'var(=)' expected = hgvs('=') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_frameshift(self): statement = 'variant(p.Thr1220Lysfs)' expected = hgvs('p.Thr1220Lysfs') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_snp(self): statement = 'var(c.1521_1523delCTT)' expected = hgvs('c.1521_1523delCTT') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_chromosome_1(self): statement = 'variant(g.117199646_117199648delCTT)' expected = hgvs('g.117199646_117199648delCTT') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_chromosome_2(self): statement = 'var(c.1521_1523delCTT)' expected = hgvs('c.1521_1523delCTT') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_rna_del(self): statement = 'var(r.1653_1655delcuu)' expected = hgvs('r.1653_1655delcuu') result = self.parser.parseString(statement) self.assertEqual(expected, result.asDict()) def test_protein_trunc_triple(self): statement = 'var(p.Cys65*)' result = self.parser.parseString(statement) expected = hgvs('p.Cys65*') self.assertEqual(expected, result.asDict()) def test_protein_trunc_legacy(self): statement = 'var(p.65*)' result = self.parser.parseString(statement) expected = hgvs('p.65*') self.assertEqual(expected, result.asDict()) class TestPmod(unittest.TestCase): def setUp(self): identifier_parser = IdentifierParser() identifier_qualified = identifier_parser.identifier_qualified self.parser = get_protein_modification_language(identifier_qualified) def _help_test_pmod_simple(self, statement): result = self.parser.parseString(statement) expected = { KIND: PMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'Ph', }, } self.assertEqual(expected, pmod('Ph')) self.assertEqual(expected, result.asDict()) def test_bel_name(self): # long function, legacy modification self._help_test_pmod_simple('proteinModification(P)') # long function, new modification self._help_test_pmod_simple('proteinModification(Ph)') # long function, qualified modification self._help_test_pmod_simple('proteinModification(bel:Ph)') # short function, legacy modification self._help_test_pmod_simple('pmod(P)') # short function, new modification self._help_test_pmod_simple('pmod(Ph)') # short function, qualified modification self._help_test_pmod_simple('pmod(bel:Ph)') def _help_test_pmod_with_residue(self, statement): result = self.parser.parseString(statement) expected = { KIND: PMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'Ph', }, PMOD_CODE: 'Ser', } self.assertEqual(expected, pmod('Ph', code='Ser')) self.assertEqual(expected, result.asDict()) def test_residue(self): # short amino acid self._help_test_pmod_with_residue('pmod(Ph, S)') # long amino acid self._help_test_pmod_with_residue('pmod(Ph, Ser)') def _help_test_pmod_full(self, statement): result = self.parser.parseString(statement) expected = { KIND: PMOD, IDENTIFIER: { NAMESPACE: BEL_DEFAULT_NAMESPACE, NAME: 'Ph', }, PMOD_CODE: 'Ser', PMOD_POSITION: 473 } self.assertEqual(expected, pmod('Ph', code='Ser', position=473)) self.assertEqual(expected, result.asDict()) def test_full(self): self._help_test_pmod_full('proteinModification(P, Ser, 473)') self._help_test_pmod_full('proteinModification(P, S, 473)') self._help_test_pmod_full('proteinModification(Ph, Ser, 473)') self._help_test_pmod_full('proteinModification(Ph, S, 473)') self._help_test_pmod_full('proteinModification(bel:Ph, Ser, 473)') self._help_test_pmod_full('proteinModification(bel:Ph, S, 473)') self._help_test_pmod_full('pmod(P, Ser, 473)') self._help_test_pmod_full('pmod(P, S, 473)') self._help_test_pmod_full('pmod(Ph, Ser, 473)') self._help_test_pmod_full('pmod(Ph, S, 473)') self._help_test_pmod_full('pmod(bel:Ph, Ser, 473)') self._help_test_pmod_full('pmod(bel:Ph, S, 473)') def _help_test_non_standard_namespace(self, statement): result = self.parser.parseString(statement) expected = { KIND: PMOD, IDENTIFIER: entity('MOD', 'PhosRes'), PMOD_CODE: 'Ser', PMOD_POSITION: 473 } self.assertEqual(expected, pmod(name='PhosRes', namespace='MOD', code='Ser', position=473)) self.assertEqual(expected, result.asDict()) def test_full_with_non_standard_namespace(self): self._help_test_non_standard_namespace('proteinModification(MOD:PhosRes, S, 473)') self._help_test_non_standard_namespace('proteinModification(MOD:PhosRes, Ser, 473)') self._help_test_non_standard_namespace('pmod(MOD:PhosRes, S, 473)') self._help_test_non_standard_namespace('pmod(MOD:PhosRes, Ser, 473)') class TestGeneModification(unittest.TestCase): def setUp(self): identifier_parser = IdentifierParser() identifier_qualified = identifier_parser.identifier_qualified self.parser = get_gene_modification_language(identifier_qualified) self.expected = gmod('Me') def test_dsl(self): self.assertEqual({ KIND: GMOD, IDENTIFIER: { NAME: 'Me', NAMESPACE: BEL_DEFAULT_NAMESPACE } }, self.expected) def test_gmod_short(self): statement = 'gmod(M)' result = self.parser.parseString(statement) self.assertEqual(self.expected, result.asDict()) def test_gmod_unabbreviated(self): statement = 'gmod(Me)' result = self.parser.parseString(statement) self.assertEqual(self.expected, result.asDict()) def test_gmod_long(self): statement = 'geneModification(methylation)' result = self.parser.parseString(statement) self.assertEqual(self.expected, result.asDict()) class TestProteinSubstitution(unittest.TestCase): def setUp(self): self.parser = get_protein_substitution_language() def test_psub_1(self): statement = 'sub(A, 127, Y)' result = self.parser.parseString(statement) expected_list = hgvs('p.Ala127Tyr') self.assertEqual(expected_list, result.asDict()) def test_psub_2(self): statement = 'sub(Ala, 127, Tyr)' result = self.parser.parseString(statement) expected_list = hgvs('p.Ala127Tyr') self.assertEqual(expected_list, result.asDict()) class TestGeneSubstitutionParser(unittest.TestCase): def setUp(self): self.parser = get_gene_substitution_language() def test_gsub(self): statement = 'sub(G,308,A)' result = self.parser.parseString(statement) expected_dict = hgvs('c.308G>A') self.assertEqual(expected_dict, result.asDict()) class TestFragmentParser(unittest.TestCase): """See http://openbel.org/language/web/version_2.0/bel_specification_version_2.0.html#_examples_2""" def setUp(self): self.parser = get_fragment_language() def help_test_known_length(self, s): result = self.parser.parseString(s) expected = { KIND: FRAGMENT, FRAGMENT_START: 5, FRAGMENT_STOP: 20 } self.assertEqual(expected, result.asDict()) def test_known_length_unquoted(self): """test known length""" s = 'frag(5_20)' self.help_test_known_length(s) def test_known_length_quotes(self): """test known length""" s = 'frag("5_20")' self.help_test_known_length(s) def help_test_unknown_length(self, s): result = self.parser.parseString(s) expected = { KIND: FRAGMENT, FRAGMENT_START: 1, FRAGMENT_STOP: '?' } self.assertEqual(expected, result.asDict()) def test_unknown_length_unquoted(self): """amino-terminal fragment of unknown length""" s = 'frag(1_?)' self.help_test_unknown_length(s) def test_unknown_length_quoted(self): """amino-terminal fragment of unknown length""" s = 'frag("1_?")' self.help_test_unknown_length(s) def help_test_unknown_start_stop(self, s): result = self.parser.parseString(s) expected = { KIND: FRAGMENT, FRAGMENT_START: '?', FRAGMENT_STOP: '*' } self.assertEqual(expected, result.asDict()) def test_unknown_start_stop_unquoted(self): """fragment with unknown start/stop""" s = 'frag(?_*)' self.help_test_unknown_start_stop(s) def test_unknown_start_stop_quoted(self): """fragment with unknown start/stop""" s = 'frag("?_*")' self.help_test_unknown_start_stop(s) def help_test_descriptor(self, s): result = self.parser.parseString(s) expected = { KIND: FRAGMENT, FRAGMENT_MISSING: '?', FRAGMENT_DESCRIPTION: '55kD' } self.assertEqual(expected, result.asDict()) def test_descriptor_unquoted(self): """fragment with unknown start/stop and a descriptor""" s = 'frag(?, "55kD")' self.help_test_descriptor(s) def test_descriptor_quoted(self): """fragment with unknown start/stop and a descriptor""" s = 'frag("?", "55kD")' self.help_test_descriptor(s) class TestTruncationParser(unittest.TestCase): def setUp(self): self.parser = get_truncation_language() def test_trunc_1(self): statement = 'trunc(40)' result = self.parser.parseString(statement) expected = hgvs('p.40*') self.assertEqual(expected, result.asDict()) class TestFusionParser(unittest.TestCase): def setUp(self): identifier_parser = IdentifierParser() identifier_qualified = identifier_parser.identifier_qualified self.parser = get_fusion_language(identifier_qualified) def test_rna_fusion_known_breakpoints(self): """RNA abundance of fusion with known breakpoints""" statement = 'fus(HGNC:TMPRSS2, r.1_79, HGNC:ERG, r.312_5034)' result = self.parser.parseString(statement) expected = { PARTNER_5P: { NAMESPACE: 'HGNC', NAME: 'TMPRSS2' }, RANGE_5P: { FUSION_REFERENCE: 'r', FUSION_START: 1, FUSION_STOP: 79 }, PARTNER_3P: { NAMESPACE: 'HGNC', NAME: 'ERG' }, RANGE_3P: { FUSION_REFERENCE: 'r', FUSION_START: 312, FUSION_STOP: 5034 } } self.assertEqual(expected, result.asDict()) def test_rna_fusion_unspecified_breakpoints(self): """RNA abundance of fusion with unspecified breakpoints""" statement = 'fus(HGNC:TMPRSS2, ?, HGNC:ERG, ?)' result = self.parser.parseString(statement) expected = { PARTNER_5P: { NAMESPACE: 'HGNC', NAME: 'TMPRSS2' }, RANGE_5P: { FUSION_MISSING: '?' }, PARTNER_3P: { NAMESPACE: 'HGNC', NAME: 'ERG' }, RANGE_3P: { FUSION_MISSING: '?' } } self.assertEqual(expected, result.asDict()) def test_rna_fusion_specified_one_fuzzy_breakpoint(self): """RNA abundance of fusion with unspecified breakpoints""" statement = 'fusion(HGNC:TMPRSS2, r.1_79, HGNC:ERG, r.?_1)' result = self.parser.parseString(statement) expected = { PARTNER_5P: { NAMESPACE: 'HGNC', NAME: 'TMPRSS2' }, RANGE_5P: { FUSION_REFERENCE: 'r', FUSION_START: 1, FUSION_STOP: 79 }, PARTNER_3P: { NAMESPACE: 'HGNC', NAME: 'ERG' }, RANGE_3P: { FUSION_REFERENCE: 'r', FUSION_START: '?', FUSION_STOP: 1 } } self.assertEqual(expected, result.asDict()) def test_rna_fusion_specified_fuzzy_breakpoints(self): """RNA abundance of fusion with unspecified breakpoints""" statement = 'fusion(HGNC:TMPRSS2, r.1_?, HGNC:ERG, r.?_1)' result = self.parser.parseString(statement) expected = { PARTNER_5P: { NAMESPACE: 'HGNC', NAME: 'TMPRSS2' }, RANGE_5P: { FUSION_REFERENCE: 'r', FUSION_START: 1, FUSION_STOP: '?' }, PARTNER_3P: { NAMESPACE: 'HGNC', NAME: 'ERG' }, RANGE_3P: { FUSION_REFERENCE: 'r', FUSION_START: '?', FUSION_STOP: 1 } } self.assertEqual(expected, result.asDict()) class TestLocation(unittest.TestCase): def setUp(self): identifier_parser = IdentifierParser() identifier_qualified = identifier_parser.identifier_qualified self.parser = get_location_language(identifier_qualified) def test_a(self): statement = 'loc(GOCC:intracellular)' result = self.parser.parseString(statement) expected = { LOCATION: {NAMESPACE: 'GOCC', NAME: 'intracellular'} } self.assertEqual(expected, result.asDict()) pybel-0.12.1/tests/test_parse/test_parse_control.py000066400000000000000000000356301334645200200225120ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging import unittest from pybel.constants import ( ANNOTATIONS, CITATION, CITATION_AUTHORS, CITATION_COMMENTS, CITATION_DATE, CITATION_NAME, CITATION_REFERENCE, CITATION_TYPE, EVIDENCE, ) from pybel.parser import ControlParser from pybel.parser.exc import ( CitationTooLongException, CitationTooShortException, IllegalAnnotationValueWarning, InvalidCitationType, InvalidPubMedIdentifierWarning, MissingAnnotationKeyWarning, MissingAnnotationRegexWarning, UndefinedAnnotationWarning, ) from pybel.parser.parse_control import set_citation_stub from pybel.resources.document import sanitize_file_lines from tests.constants import SET_CITATION_TEST, test_citation_dict logging.getLogger("requests").setLevel(logging.WARNING) class TestParseControl(unittest.TestCase): def setUp(self): self.custom_annotation_dicts = { 'Custom1': {'Custom1_A', 'Custom1_B'}, 'Custom2': {'Custom2_A', 'Custom2_B'} } self.custom_annotation_expressions = { 'CustomRegex': '[0-9]+' } self.parser = ControlParser( annotation_dict=self.custom_annotation_dicts, annotation_regex=self.custom_annotation_expressions ) class TestParseControlUnsetStatementErrors(TestParseControl): def test_unset_missing_evidence(self): with self.assertRaises(MissingAnnotationKeyWarning): self.parser.parseString('UNSET Evidence') def test_unset_missing_citation(self): with self.assertRaises(MissingAnnotationKeyWarning): self.parser.parseString('UNSET Citation') def test_unset_missing_evidence_with_citation(self): """Tests that an evidence can't be unset without a citation""" s = [SET_CITATION_TEST, 'UNSET Evidence'] with self.assertRaises(MissingAnnotationKeyWarning): self.parser.parse_lines(s) def test_unset_missing_statement_group(self): with self.assertRaises(MissingAnnotationKeyWarning): self.parser.parseString('UNSET STATEMENT_GROUP') def test_unset_missing_command(self): s = [ SET_CITATION_TEST, 'UNSET Custom1' ] with self.assertRaises(MissingAnnotationKeyWarning): self.parser.parse_lines(s) def test_unset_invalid_command(self): s = [ SET_CITATION_TEST, 'UNSET MISSING' ] with self.assertRaises(UndefinedAnnotationWarning): self.parser.parse_lines(s) def test_unset_list_compact(self): """Tests unsetting an annotation list, without spaces in it""" s = [ SET_CITATION_TEST, 'SET Custom1 = "Custom1_A"', 'SET Custom2 = "Custom2_A"', ] self.parser.parse_lines(s) self.assertIn('Custom1', self.parser.annotations) self.assertIn('Custom2', self.parser.annotations) self.parser.parseString('UNSET {Custom1,Custom2}') self.assertFalse(self.parser.annotations) def test_unset_list_spaced(self): """Tests unsetting an annotation list, with spaces in it""" s = [ SET_CITATION_TEST, 'SET Custom1 = "Custom1_A"', 'SET Custom2 = "Custom2_A"', ] self.parser.parse_lines(s) self.assertIn('Custom1', self.parser.annotations) self.assertIn('Custom2', self.parser.annotations) self.parser.parseString('UNSET {Custom1, Custom2}') self.assertFalse(self.parser.annotations) class TestSetCitation(unittest.TestCase): def test_parser_double(self): set_citation_stub.parseString('Citation = {"PubMed","12928037"}') def test_parser_double_spaced(self): set_citation_stub.parseString('Citation = {"PubMed", "12928037"}') def test_parser_triple(self): set_citation_stub.parseString('Citation = {"PubMedCentral","Trends in molecular medicine","12928037"}') def test_parser_triple_spaced(self): set_citation_stub.parseString('Citation = {"PubMedCentral", "Trends in molecular medicine", "12928037"}') class TestParseControlSetStatementErrors(TestParseControl): def test_invalid_citation_type(self): with self.assertRaises(InvalidCitationType): self.parser.parseString('SET Citation = {"PubMedCentral","Trends in molecular medicine","12928037"}') def test_invalid_pmid(self): with self.assertRaises(InvalidPubMedIdentifierWarning): self.parser.parseString('SET Citation = {"PubMed","Trends in molecular medicine","NOT VALID NUMBER"}') def test_invalid_pmid_short(self): with self.assertRaises(InvalidPubMedIdentifierWarning): self.parser.parseString('SET Citation = {"PubMed","NOT VALID NUMBER"}') def test_set_missing_statement(self): statements = [ SET_CITATION_TEST, 'SET MissingKey = "lol"' ] with self.assertRaises(UndefinedAnnotationWarning): self.parser.parse_lines(statements) def test_custom_annotation_list_withInvalid(self): statements = [ SET_CITATION_TEST, 'SET Custom1 = {"Custom1_A","Custom1_B","Evil invalid!!!"}' ] with self.assertRaises(IllegalAnnotationValueWarning): self.parser.parse_lines(statements) def test_custom_value_failure(self): """Tests what happens for a valid annotation key, but an invalid value""" s = [ SET_CITATION_TEST, 'SET Custom1 = "Custom1_C"' ] with self.assertRaises(IllegalAnnotationValueWarning): self.parser.parse_lines(s) def test_regex_failure(self): s = [ SET_CITATION_TEST, 'SET CustomRegex = "abce13"' ] with self.assertRaises(MissingAnnotationRegexWarning): self.parser.parse_lines(s) class TestParseControl2(TestParseControl): def test_set_statement_group(self): """Tests a statement group gets set properly""" s1 = 'SET STATEMENT_GROUP = "my group"' self.assertIsNone(self.parser.statement_group) self.parser.parseString(s1) self.assertEqual('my group', self.parser.statement_group, msg='problem with integration') s2 = 'UNSET STATEMENT_GROUP' self.parser.parseString(s2) self.assertIsNone(self.parser.statement_group, msg='problem with unset') def test_citation_short(self): self.parser.parseString(SET_CITATION_TEST) self.assertEqual(test_citation_dict, self.parser.citation) expected_annotations = { EVIDENCE: None, ANNOTATIONS: {}, CITATION: test_citation_dict } self.assertEqual(expected_annotations, self.parser.get_annotations()) self.parser.parseString('UNSET Citation') self.assertEqual(0, len(self.parser.citation)) def test_citation_invalid_date(self): s = 'SET Citation = {"PubMed","Trends in molecular medicine","12928037","01-12-1999","de Nigris"}' self.parser.parseString(s) expected_citation = { CITATION_TYPE: 'PubMed', CITATION_NAME: 'Trends in molecular medicine', CITATION_REFERENCE: '12928037', } self.assertEqual(expected_citation, self.parser.citation) expected_dict = { EVIDENCE: None, ANNOTATIONS: {}, CITATION: expected_citation } self.assertEqual(expected_dict, self.parser.get_annotations()) def test_citation_with_empty_comment(self): s = 'SET Citation = {"PubMed","Test Name","12928037","1999-01-01","de Nigris|Lerman A|Ignarro LJ",""}' self.parser.parseString(s) expected_citation = { CITATION_TYPE: 'PubMed', CITATION_NAME: 'Test Name', CITATION_REFERENCE: '12928037', CITATION_DATE: '1999-01-01', CITATION_AUTHORS: 'de Nigris|Lerman A|Ignarro LJ', CITATION_COMMENTS: '' } self.assertEqual(expected_citation, self.parser.citation) expected_dict = { EVIDENCE: None, ANNOTATIONS: {}, CITATION: expected_citation } self.assertEqual(expected_dict, self.parser.get_annotations()) def test_double(self): s = 'SET Citation = {"PubMed","12928037"}' self.parser.parseString(s) expected_citation = { CITATION_TYPE: 'PubMed', CITATION_REFERENCE: '12928037', } self.assertEqual(expected_citation, self.parser.citation) def test_double_with_space(self): """Same as test_double, but has a space between the comma and next entry""" s = 'SET Citation = {"PubMed", "12928037"}' self.parser.parseString(s) expected_citation = { CITATION_TYPE: 'PubMed', CITATION_REFERENCE: '12928037', } self.assertEqual(expected_citation, self.parser.citation) def test_citation_too_short(self): s = 'SET Citation = {"PubMed"}' with self.assertRaises(CitationTooShortException): self.parser.parseString(s) def test_citation_too_long(self): s = 'SET Citation = {"PubMed","Name","1234","1999-01-01","Nope|Noper","Nope", "nope nope"}' with self.assertRaises(CitationTooLongException): self.parser.parseString(s) def test_evidence(self): s = 'SET Evidence = "For instance, during 7-ketocholesterol-induced apoptosis of U937 cells"' self.parser.parseString(s) self.assertIsNotNone(self.parser.evidence) expected_annotation = { CITATION: {}, ANNOTATIONS: {}, EVIDENCE: 'For instance, during 7-ketocholesterol-induced apoptosis of U937 cells' } self.assertEqual(expected_annotation, self.parser.get_annotations()) def test_custom_annotation(self): s = [ SET_CITATION_TEST, 'SET Custom1 = "Custom1_A"' ] self.parser.parse_lines(s) expected_annotation = { 'Custom1': 'Custom1_A' } self.assertEqual(expected_annotation, self.parser.annotations) def test_custom_annotation_list(self): s = [ SET_CITATION_TEST, 'SET Custom1 = {"Custom1_A","Custom1_B"}' ] self.parser.parse_lines(s) expected_annotation = { 'Custom1': {'Custom1_A', 'Custom1_B'} } self.assertEqual(expected_annotation, self.parser.annotations) expected_dict = { ANNOTATIONS: expected_annotation, CITATION: test_citation_dict, EVIDENCE: None } self.assertEqual(expected_dict, self.parser.get_annotations()) def test_overwrite_evidence(self): s1 = 'SET Evidence = "a"' s2 = 'SET Evidence = "b"' self.parser.parseString(s1) self.parser.parseString(s2) self.assertEqual('b', self.parser.evidence) def test_unset_evidence(self): s1 = 'SET Evidence = "a"' s2 = 'UNSET Evidence' self.parser.parseString(s1) self.parser.parseString(s2) self.assertEqual({}, self.parser.annotations) def test_unset_custom(self): statements = [ SET_CITATION_TEST, 'SET Custom1 = "Custom1_A"', 'UNSET Custom1' ] self.parser.parse_lines(statements) self.assertEqual({}, self.parser.annotations) def test_reset_citation(self): s1 = 'SET Citation = {"PubMed","Test Reference 1","11111"}' s2 = 'SET Evidence = "d"' s3 = 'SET Citation = {"PubMed","Test Reference 2","22222"}' s4 = 'SET Evidence = "h"' s5 = 'SET Custom1 = "Custom1_A"' s6 = 'SET Custom2 = "Custom2_A"' statements = [s1, s2, s3, s4, s5, s6] self.parser.parse_lines(statements) self.assertEqual('h', self.parser.evidence) self.assertEqual('PubMed', self.parser.citation[CITATION_TYPE]) self.assertEqual('Test Reference 2', self.parser.citation[CITATION_NAME]) self.assertEqual('22222', self.parser.citation[CITATION_REFERENCE]) self.parser.parseString('UNSET {Custom1,Evidence}') self.assertNotIn('Custom1', self.parser.annotations) self.assertIsNone(self.parser.evidence) self.assertIn('Custom2', self.parser.annotations) self.assertNotEqual(0, len(self.parser.citation)) self.parser.parseString('UNSET ALL') self.assertEqual(0, len(self.parser.annotations)) self.assertEqual(0, len(self.parser.citation)) def test_set_regex(self): s = [ SET_CITATION_TEST, 'SET CustomRegex = "1234"' ] self.parser.parse_lines(s) self.assertEqual('1234', self.parser.annotations['CustomRegex']) class TestParseEvidence(unittest.TestCase): def test_111(self): statement = '''SET Evidence = "1.1.1 Easy case"''' expect = '''SET Evidence = "1.1.1 Easy case''' lines = list(sanitize_file_lines(statement.split('\n'))) self.assertEqual(1, len(lines)) line = lines[0] self.assertTrue(expect, line) def test_131(self): statement = '''SET Evidence = "3.1 Backward slash break test \\ second line"''' expect = '''SET Evidence = "3.1 Backward slash break test second line"''' lines = [line for i, line in sanitize_file_lines(statement.split('\n'))] self.assertEqual(1, len(lines)) line = lines[0] self.assertEqual(expect, line) def test_132(self): statement = '''SET Evidence = "3.2 Backward slash break test with whitespace \\ second line"''' expect = '''SET Evidence = "3.2 Backward slash break test with whitespace second line"''' lines = [line for i, line in sanitize_file_lines(statement.split('\n'))] self.assertEqual(1, len(lines)) line = lines[0] self.assertEqual(expect, line) def test_133(self): statement = '''SET Evidence = "3.3 Backward slash break test \\ second line \\ third line"''' expect = '''SET Evidence = "3.3 Backward slash break test second line third line"''' lines = [line for i, line in sanitize_file_lines(statement.split('\n'))] self.assertEqual(1, len(lines)) line = lines[0] self.assertEqual(expect, line) def test_141(self): statement = '''SET Evidence = "4.1 Malformed line breakcase second line"''' expect = '''SET Evidence = "4.1 Malformed line breakcase second line"''' lines = [line for i, line in sanitize_file_lines(statement.split('\n'))] self.assertEqual(1, len(lines)) line = lines[0] self.assertEqual(expect, line) def test_142(self): statement = '''SET Evidence = "4.2 Malformed line breakcase second line third line"''' expect = '''SET Evidence = "4.2 Malformed line breakcase second line third line"''' lines = [line for i, line in sanitize_file_lines(statement.split('\n'))] self.assertEqual(1, len(lines)) line = lines[0] self.assertEqual(expect, line) pybel-0.12.1/tests/test_parse/test_parse_identifier.py000066400000000000000000000077001334645200200231510ustar00rootroot00000000000000# -*- coding: utf-8 -*- import unittest from pybel.constants import DIRTY from pybel.parser.exc import NakedNameWarning from pybel.parser.parse_identifier import IdentifierParser class TestIdentifierParser(unittest.TestCase): def setUp(self): self.namespace_dicts = { 'A': {'1', '2', '3'}, 'B': {'4', '5', '6'} } self.parser = IdentifierParser(namespace_dict=self.namespace_dicts) def test_valid_1(self): s = 'A:3' result = self.parser.parseString(s) self.assertIn('namespace', result) self.assertIn('name', result) self.assertEqual('A', result['namespace']) self.assertEqual('3', result['name']) def test_valid_2(self): s = 'A:"3"' result = self.parser.parseString(s) self.assertIn('namespace', result) self.assertIn('name', result) self.assertEqual('A', result['namespace']) self.assertEqual('3', result['name']) def test_invalid_1(self): s = 'C:4' with self.assertRaises(Exception): self.parser.parseString(s) def test_invalid_2(self): s = 'A:4' with self.assertRaises(Exception): self.parser.parseString(s) def test_invalid_3(self): s = 'bare' with self.assertRaises(NakedNameWarning): self.parser.parseString(s) def test_invalid_4(self): s = '"quoted"' with self.assertRaises(NakedNameWarning): self.parser.parseString(s) class TestNamespaceParserDefault(unittest.TestCase): def setUp(self): nsd = { 'A': {'1', '2', '3'}, 'B': {'4', '5', '6'} } dns = {'X', 'Y', 'W Z'} self.parser = IdentifierParser(namespace_dict=nsd, default_namespace=dns) def test_valid_1(self): s = 'A:3' result = self.parser.parseString(s) self.assertIn('namespace', result) self.assertIn('name', result) self.assertEqual('A', result['namespace']) self.assertEqual('3', result['name']) def test_valid_2(self): s = 'X' result = self.parser.parseString(s) self.assertIn('name', result) self.assertEqual('X', result['name']) def test_valid_3(self): s = '"W Z"' result = self.parser.parseString(s) self.assertIn('name', result) self.assertEqual('W Z', result['name']) def test_not_in_defaultNs(self): s = 'D' with self.assertRaises(Exception): self.parser.parseString(s) class TestNamespaceParserLenient(unittest.TestCase): def setUp(self): nsd = { 'A': {'1', '2', '3'}, 'B': {'4', '5', '6'} } self.parser = IdentifierParser(namespace_dict=nsd, allow_naked_names=True) def test_valid_1(self): s = 'A:3' result = self.parser.parseString(s) self.assertIn('namespace', result) self.assertIn('name', result) self.assertEqual('A', result['namespace']) self.assertEqual('3', result['name']) def test_valid_2(self): s = 'A:"3"' result = self.parser.parseString(s) self.assertIn('namespace', result) self.assertIn('name', result) self.assertEqual('A', result['namespace']) self.assertEqual('3', result['name']) def test_invalid_1(self): s = 'C:4' with self.assertRaises(Exception): self.parser.parseString(s) def test_invalid_2(self): s = 'A:4' with self.assertRaises(Exception): self.parser.parseString(s) def test_not_invalid_3(self): s = 'bare' result = self.parser.parseString(s) self.assertEqual(DIRTY, result['namespace']) self.assertEqual('bare', result['name']) def test_not_invalid_4(self): s = '"quoted"' result = self.parser.parseString(s) self.assertEqual(DIRTY, result['namespace']) self.assertEqual('quoted', result['name']) pybel-0.12.1/tests/test_parse/test_parse_metadata.py000066400000000000000000000202011334645200200225760ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test parsing metadata from a BEL script.""" import logging import os import unittest from pathlib import Path from pybel.parser import MetadataParser from pybel.parser.exc import ( InvalidMetadataException, RedefinedAnnotationError, RedefinedNamespaceError, VersionFormatWarning, ) from pybel.resources.document import split_file_to_annotations_and_definitions from pybel.testing.cases import FleetingTemporaryCacheMixin from pybel.testing.constants import test_an_1, test_bel_simple, test_ns_1, test_ns_nocache_path from pybel.testing.mocks import mock_bel_resources from tests.constants import ( HGNC_KEYWORD, HGNC_URL, MESH_DISEASES_KEYWORD, MESH_DISEASES_URL, help_check_hgnc, ) logging.getLogger("requests").setLevel(logging.WARNING) class TestSplitLines(unittest.TestCase): def test_parts(self): with open(test_bel_simple) as lines: docs, definitions, statements = split_file_to_annotations_and_definitions(lines) self.assertEqual(8, len(list(docs))) self.assertEqual(4, len(list(definitions))) self.assertEqual(14, len(list(statements))) class TestParseMetadata(FleetingTemporaryCacheMixin): def setUp(self): super(TestParseMetadata, self).setUp() self.parser = MetadataParser(manager=self.manager) def test_namespace_nocache(self): """Checks namespace is loaded into parser but not cached""" s = 'DEFINE NAMESPACE TESTNS3 AS URL "{}"'.format(test_ns_nocache_path) self.parser.parseString(s) self.assertIn('TESTNS3', self.parser.namespace_dict) self.assertEqual(0, len(self.manager.list_namespaces())) @mock_bel_resources def test_namespace_name_persistience(self, mock_get): """Tests that a namespace defined by a URL can't be overwritten by a definition by another URL""" s = 'DEFINE NAMESPACE {} AS URL "{}"'.format(HGNC_KEYWORD, HGNC_URL) self.parser.parseString(s) help_check_hgnc(self, self.parser.namespace_dict) s = 'DEFINE NAMESPACE {} AS URL "{}"'.format(HGNC_KEYWORD, 'XXXXX') with self.assertRaises(RedefinedNamespaceError): self.parser.parseString(s) help_check_hgnc(self, self.parser.namespace_dict) @mock_bel_resources def test_annotation_name_persistience_1(self, mock_get): """Tests that an annotation defined by a URL can't be overwritten by a definition by a list""" s = 'DEFINE ANNOTATION {} AS URL "{}"'.format(MESH_DISEASES_KEYWORD, MESH_DISEASES_URL) self.parser.parseString(s) self.assertIn(MESH_DISEASES_KEYWORD, self.parser.annotation_dict) s = 'DEFINE ANNOTATION {} AS LIST {{"A","B","C"}}'.format(MESH_DISEASES_KEYWORD) with self.assertRaises(RedefinedAnnotationError): self.parser.parseString(s) self.assertIn(MESH_DISEASES_KEYWORD, self.parser.annotation_dict) self.assertNotIn('A', self.parser.annotation_dict[MESH_DISEASES_KEYWORD]) self.assertIn('46, XX Disorders of Sex Development', self.parser.annotation_dict[MESH_DISEASES_KEYWORD]) def test_annotation_name_persistience_2(self): """Tests that an annotation defined by a list can't be overwritten by a definition by URL""" s = 'DEFINE ANNOTATION TextLocation AS LIST {"Abstract","Results","Legend","Review"}' self.parser.parseString(s) self.assertIn('TextLocation', self.parser.annotation_dict) s = 'DEFINE ANNOTATION TextLocation AS URL "{}"'.format(MESH_DISEASES_URL) with self.assertRaises(RedefinedAnnotationError): self.parser.parseString(s) self.assertIn('TextLocation', self.parser.annotation_dict) self.assertIn('Abstract', self.parser.annotation_dict['TextLocation']) def test_underscore(self): """Tests that an underscore is a valid character in an annotation name""" s = 'DEFINE ANNOTATION Text_Location AS LIST {"Abstract","Results","Legend","Review"}' self.parser.parseString(s) self.assertIn('Text_Location', self.parser.annotation_dict) @mock_bel_resources def test_control_compound(self, mock_get): lines = [ 'DEFINE ANNOTATION {} AS URL "{}"'.format(MESH_DISEASES_KEYWORD, MESH_DISEASES_URL), 'DEFINE NAMESPACE {} AS URL "{}"'.format(HGNC_KEYWORD, HGNC_URL), 'DEFINE ANNOTATION TextLocation AS LIST {"Abstract","Results","Legend","Review"}' ] self.parser.parse_lines(lines) self.assertIn(MESH_DISEASES_KEYWORD, self.parser.annotation_dict) self.assertIn(HGNC_KEYWORD, self.parser.namespace_dict) self.assertIn('TextLocation', self.parser.annotation_dict) @unittest.skipUnless('PYBEL_BASE' in os.environ, "Need local files to test local files") def test_squiggly_filepath(self): line = 'DEFINE NAMESPACE {} AS URL "~/dev/pybel/src/pybel/testing/resources/belns/hgnc-human-genes.belns"'.format(HGNC_KEYWORD) self.parser.parseString(line) help_check_hgnc(self, self.parser.namespace_dict) def test_document_metadata_exception(self): s = 'SET DOCUMENT InvalidKey = "nope"' with self.assertRaises(InvalidMetadataException): self.parser.parseString(s) def test_parse_document(self): s = '''SET DOCUMENT Name = "Alzheimer's Disease Model"''' self.parser.parseString(s) self.assertIn('name', self.parser.document_metadata) self.assertEqual("Alzheimer's Disease Model", self.parser.document_metadata['name']) # Check nothing bad happens # with self.assertLogs('pybel', level='WARNING'): self.parser.parseString(s) @mock_bel_resources def test_parse_namespace_url_file(self, mock): """Tests parsing a namespace by file URL""" s = 'DEFINE NAMESPACE TESTNS1 AS URL "{}"'.format(test_ns_1) self.parser.parseString(s) expected_values = { 'TestValue1': {'O'}, 'TestValue2': {'O'}, 'TestValue3': {'O'}, 'TestValue4': {'O'}, 'TestValue5': {'O'} } self.assertIn('TESTNS1', self.parser.namespace_dict) for k, values in expected_values.items(): self.assertIn(k, self.parser.namespace_dict['TESTNS1']) self.assertEqual(set(values), set(self.parser.namespace_dict['TESTNS1'][k])) def test_parse_annotation_url_file(self): """Tests parsing an annotation by file URL""" keyword = 'TESTAN1' url = Path(test_an_1).as_uri() line = 'DEFINE ANNOTATION {keyword} AS URL "{url}"'.format( keyword=keyword, url=url, ) self.parser.parseString(line) expected_values = { 'TestAnnot1': 'O', 'TestAnnot2': 'O', 'TestAnnot3': 'O', 'TestAnnot4': 'O', 'TestAnnot5': 'O' } self.assertEqual(set(expected_values), self.parser.manager.get_annotation_entry_names(url)) # FIXME ''' def test_lexicography_namespace(self): s = 'DEFINE NAMESPACE hugo AS URL "{}"'.format(HGNC_URL) with self.assertRaises(LexicographyWarning): self.parser.parseString(s) def test_lexicography_annotation(self): s = 'DEFINE ANNOTATION mesh AS URL "{}"'.format(MESH_DISEASES_URL) with self.assertRaises(LexicographyWarning): self.parser.parseString(s) ''' def test_parse_annotation_pattern(self): s = 'DEFINE ANNOTATION Test AS PATTERN "\w+"' self.parser.parseString(s) self.assertNotIn('Test', self.parser.annotation_dict) self.assertIn('Test', self.parser.annotation_regex) self.assertEqual('\w+', self.parser.annotation_regex['Test']) def test_define_namespace_regex(self): s = 'DEFINE NAMESPACE dbSNP AS PATTERN "rs[0-9]*"' self.parser.parseString(s) self.assertNotIn('dbSNP', self.parser.namespace_dict) self.assertIn('dbSNP', self.parser.namespace_regex) self.assertEqual('rs[0-9]*', self.parser.namespace_regex['dbSNP']) def test_not_semantic_version(self): s = 'SET DOCUMENT Version = "1.0"' with self.assertRaises(VersionFormatWarning): self.parser.parseString(s) pybel-0.12.1/tests/test_parse/test_parse_utils.py000066400000000000000000000137641334645200200221760ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for parsing utilities.""" import unittest import networkx as nx from pybel.resources.document import sanitize_file_lines from pybel.testing.constants import test_bel_simple from pybel.utils import ensure_quotes, subdict_matches from tests.constants import any_subdict_matches class TestSubdictMatching(unittest.TestCase): """Tests for matching sub-dictionaries.""" def test_dict_matches_1(self): target = { 'k1': 'v1', 'k2': 'v2' } query = { 'k1': 'v1', 'k2': 'v2' } self.assertTrue(subdict_matches(target, query)) def test_dict_matches_2(self): target = { 'k1': 'v1', 'k2': 'v2', 'k3': 'v3' } query = { 'k1': 'v1', 'k2': 'v2' } self.assertTrue(subdict_matches(target, query)) def test_dict_matches_3(self): target = { 'k1': 'v1', } query = { 'k1': 'v1', 'k2': 'v2' } self.assertFalse(subdict_matches(target, query)) def test_dict_matches_4(self): target = { 'k1': 'v1', 'k2': 'v4', 'k3': 'v3' } query = { 'k1': 'v1', 'k2': 'v2' } self.assertFalse(subdict_matches(target, query)) def test_dict_matches_5(self): target = { 'k1': 'v1', 'k2': 'v2' } query = { 'k1': 'v1', 'k2': ['v2', 'v3'] } self.assertTrue(subdict_matches(target, query)) def test_dict_matches_6(self): target = { 'k1': 'v1', 'k2': ['v2', 'v3'] } query = { 'k1': 'v1', 'k2': 'v4' } self.assertFalse(subdict_matches(target, query)) def test_dict_matches_7_partial(self): """Tests a partial match""" target = { 'k1': 'v1', 'k2': 'v2' } query = { 'k1': 'v1', 'k2': {'v2': 'v3'} } self.assertFalse(subdict_matches(target, query)) def test_dict_matches_7_exact(self): """Test a partial match.""" target = { 'k1': 'v1', 'k2': 'v2' } query = { 'k1': 'v1', 'k2': {'v2': 'v3'} } self.assertFalse(subdict_matches(target, query, partial_match=False)) def test_dict_matches_8_partial(self): """Test a partial match.""" target = { 'k1': 'v1', 'k2': {'v2': 'v3', 'v4': 'v5'} } query = { 'k1': 'v1', 'k2': {'v2': 'v3'} } self.assertTrue(subdict_matches(target, query)) def test_dict_matches_graph(self): """Test matching a graph.""" g = nx.MultiDiGraph() g.add_node(1) g.add_node(2) g.add_edge(1, 2, relation='yup') g.add_edge(1, 2, relation='nope') d = {'relation': 'yup'} self.assertTrue(any_subdict_matches(g[1][2], d)) class TestSanitize(unittest.TestCase): def test_a(self): s = '''SET Evidence = "The phosphorylation of S6K at Thr389, which is the TORC1-mediated site, was not inhibited in the SIN1-/- cells (Figure 5A)."'''.split('\n') expect = [ (1, 'SET Evidence = "The phosphorylation of S6K at Thr389, which is the TORC1-mediated site, was not ' 'inhibited in the SIN1-/- cells (Figure 5A)."')] result = list(sanitize_file_lines(s)) self.assertEqual(expect, result) def test_b(self): s = [ '# Set document-defined annotation values\n', 'SET Species = 9606', 'SET Tissue = "t-cells"', '# Create an Evidence Line for a block of BEL Statements', 'SET Evidence = "Here we show that interfereon-alpha (IFNalpha) is a potent producer \\', 'of SOCS expression in human T cells, as high expression of CIS, SOCS-1, SOCS-2, \\', 'and SOCS-3 was detectable after IFNalpha stimulation. After 4 h of stimulation \\', 'CIS, SOCS-1, and SOCS-3 had ret' ] result = list(sanitize_file_lines(s)) expect = [ (2, 'SET Species = 9606'), (3, 'SET Tissue = "t-cells"'), (5, 'SET Evidence = "Here we show that interfereon-alpha (IFNalpha) is a potent producer of SOCS expression ' 'in human T cells, as high expression of CIS, SOCS-1, SOCS-2, and SOCS-3 was detectable after IFNalpha ' 'stimulation. After 4 h of stimulation CIS, SOCS-1, and SOCS-3 had ret') ] self.assertEqual(expect, result) def test_c(self): s = [ 'SET Evidence = "yada yada yada" //this is a comment' ] result = list(sanitize_file_lines(s)) expect = [(1, 'SET Evidence = "yada yada yada"')] self.assertEqual(expect, result) def test_d(self): """Test forgotten delimiters""" s = [ 'SET Evidence = "Something', 'or other', 'or other"' ] result = list(sanitize_file_lines(s)) expect = [(1, 'SET Evidence = "Something or other or other"')] self.assertEqual(expect, result) def test_e(self): with open(test_bel_simple) as f: lines = list(sanitize_file_lines(f)) self.assertEqual(26, len(lines)) def test_f(self): s = '''SET Evidence = "Arterial cells are highly susceptible to oxidative stress, which can induce both necrosis and apoptosis (programmed cell death) [1,2]"'''.split('\n') lines = list(sanitize_file_lines(s)) self.assertEqual(1, len(lines)) def test_quote(self): a = "word1 word2" self.assertEqual('"word1 word2"', ensure_quotes(a)) b = "word1" self.assertEqual('word1', ensure_quotes(b)) c = "word1$#" self.assertEqual('"word1$#"', ensure_quotes(c)) pybel-0.12.1/tests/test_struct/000077500000000000000000000000001334645200200164325ustar00rootroot00000000000000pybel-0.12.1/tests/test_struct/__init__.py000066400000000000000000000000761334645200200205460ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.struct`.""" pybel-0.12.1/tests/test_struct/test_filters/000077500000000000000000000000001334645200200211415ustar00rootroot00000000000000pybel-0.12.1/tests/test_struct/test_filters/__init__.py000066400000000000000000000001011334645200200232420ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for PyBEL filter functions.""" pybel-0.12.1/tests/test_struct/test_filters/test_edge_predicate_builders.py000066400000000000000000000061651334645200200273770ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for edge predicate builders.""" import unittest from pybel.constants import CITATION, CITATION_AUTHORS, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_PUBMED from pybel.struct.filters.edge_predicate_builders import ( build_author_inclusion_filter, build_pmid_inclusion_filter, ) pmid1 = '1' pmid2 = '2' pmid3 = '3' author1 = '1' author2 = '2' author3 = '3' class TestEdgePredicateBuilders(unittest.TestCase): """Tests for edge predicate builders.""" def test_build_pmid_inclusion_filter(self): """Test building a predicate for a single PubMed identifier.""" pmid_inclusion_filter = build_pmid_inclusion_filter(pmid1) self.assertTrue(pmid_inclusion_filter({ CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: pmid1, } })) self.assertFalse(pmid_inclusion_filter({ CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: pmid2, } })) def test_build_pmid_set_inclusion_filter(self): """Test building a predicate for multiple PubMed identifiers.""" pmids = {pmid1, pmid2} pmid_inclusion_filter = build_pmid_inclusion_filter(pmids) self.assertTrue(pmid_inclusion_filter({ CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: pmid1, } })) self.assertTrue(pmid_inclusion_filter({ CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: pmid2, } })) self.assertFalse(pmid_inclusion_filter({ CITATION: { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: pmid3, } })) def test_build_author_inclusion_filter(self): """Test building a predicate for a single author.""" author_inclusion_filter = build_author_inclusion_filter(author1) self.assertTrue(author_inclusion_filter({ CITATION: { CITATION_AUTHORS: [author1] } })) self.assertTrue(author_inclusion_filter({ CITATION: { CITATION_AUTHORS: [author1, author2] } })) self.assertFalse(author_inclusion_filter({ CITATION: { CITATION_AUTHORS: [author3] } })) def test_build_author_set_inclusion_filter(self): """Test building a predicate for multiple authors.""" author = {author1, author2} author_inclusion_filter = build_author_inclusion_filter(author) self.assertTrue(author_inclusion_filter({ CITATION: { CITATION_AUTHORS: [author1] } })) self.assertTrue(author_inclusion_filter({ CITATION: { CITATION_AUTHORS: [author1, author2] } })) self.assertFalse(author_inclusion_filter({ CITATION: { CITATION_AUTHORS: [author3] } })) pybel-0.12.1/tests/test_struct/test_filters/test_edge_predicates.py000066400000000000000000000017551334645200200256710ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for edge predicates""" import unittest from pybel import BELGraph from pybel.dsl import pathology, protein from pybel.struct.filters.edge_predicates import has_pathology_causal from pybel.testing.utils import n class TestEdgePredicates(unittest.TestCase): """Tests for edge predicates.""" def test_has_pathology(self): """Test for checking edges that have a causal pathology.""" graph = BELGraph() a, b, c = protein(n(), n()), pathology(n(), n()), pathology(n(), n()) key = graph.add_increases(a, b, n(), n()) self.assertFalse(has_pathology_causal(graph, a, b, key)) key = graph.add_increases(b, a, n(), n()) self.assertTrue(has_pathology_causal(graph, b, a, key)) key = graph.add_association(b, a, n(), n()) self.assertFalse(has_pathology_causal(graph, b, a, key)) key = graph.add_increases(a, c, n(), n()) self.assertFalse(has_pathology_causal(graph, a, c, key)) pybel-0.12.1/tests/test_struct/test_filters/test_node_predicate_builders.py000066400000000000000000000110041334645200200274040ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for functions for building node predicates.""" import unittest from pybel import BELGraph from pybel.constants import GENE, NAME, PROTEIN from pybel.dsl import bioprocess, gene, protein from pybel.struct import filter_nodes from pybel.struct.filters import invert_node_predicate from pybel.struct.filters.node_predicate_builders import ( build_node_graph_data_search, build_node_key_search, build_node_name_search, data_missing_key_builder, function_inclusion_filter_builder, ) from pybel.testing.utils import n class TestFunctionInclusionFilterBuilder(unittest.TestCase): """Tests for the function_inclusion_filter_builder function.""" def test_type_error(self): """Test that a type error is thrown for an invalid argument type.""" with self.assertRaises(TypeError): function_inclusion_filter_builder(5) def test_empty_list_error(self): """Test that a value error is thrown for an empty list.""" with self.assertRaises(ValueError): function_inclusion_filter_builder([]) def test_single(self): """Test building a node predicate with a single function.""" f = function_inclusion_filter_builder(GENE) p1 = protein(n(), n()) g1 = gene(n(), n()) g = BELGraph() g.add_node_from_data(p1) g.add_node_from_data(g1) self.assertIn(p1, g) self.assertIn(g1, g) self.assertFalse(f(g, p1)) self.assertTrue(f(g, g1)) f = invert_node_predicate(f) self.assertTrue(f(g, p1)) self.assertFalse(f(g, g1)) def test_multiple(self): """Test building a node predicate with multiple functions.""" f = function_inclusion_filter_builder([GENE, PROTEIN]) p1 = protein(n(), n()) g1 = gene(n(), n()) b1 = bioprocess(n(), n()) g = BELGraph() g.add_node_from_data(p1) g.add_node_from_data(g1) g.add_node_from_data(b1) self.assertIn(p1, g) self.assertIn(g1, g) self.assertIn(b1, g) self.assertTrue(f(g, p1)) self.assertTrue(f(g, g1)) self.assertFalse(f(g, b1)) f = invert_node_predicate(f) self.assertFalse(f(g, p1)) self.assertFalse(f(g, g1)) self.assertTrue(f(g, b1)) class TestNodePredicateBuilders(unittest.TestCase): """Tests for node predicate builders.""" def test_data_missing_key_builder(self): """Test the data_missing_key_builder function.""" graph = BELGraph() p1 = protein('HGNC', n()) p2 = protein('HGNC', n()) graph.add_node_from_data(p1) graph.add_node_from_data(p2) key, other_key = 'k1', 'k2' data_missing_key = data_missing_key_builder(key) graph.nodes[p1][key] = n() graph.nodes[p2][other_key] = n() nodes = set(filter_nodes(graph, data_missing_key)) self.assertNotIn(p1, nodes) self.assertIn(p2, nodes) def test_build_node_data_search(self): """Test build_node_data_search.""" def test_key_predicate(datum): """Check the data is greater than zero. :rtype: bool """ return 0 < datum key = n() data_predicate = build_node_graph_data_search(key, test_key_predicate) graph = BELGraph() p1 = protein('HGNC', n()) graph.add_node_from_data(p1) graph.nodes[p1][key] = 0 self.assertFalse(data_predicate(graph, p1)) p2 = protein('HGNC', n()) graph.add_node_from_data(p2) graph.nodes[p2][key] = 5 self.assertTrue(data_predicate(graph, p2)) p3 = protein('HGNC', n()) graph.add_node_from_data(p3) self.assertFalse(data_predicate(graph, p3)) def test_build_node_key_search(self): """Test build_node_key_search.""" node_key_search = build_node_key_search(query='app', key=NAME) node_name_search = build_node_name_search(query='app') graph = BELGraph() p1 = protein('HGNC', 'APP') graph.add_node_from_data(p1) self.assertTrue(node_key_search(graph, p1)) self.assertTrue(node_name_search(graph, p1)) p2 = protein('MGI', 'app') graph.add_node_from_data(p2) self.assertTrue(node_key_search(graph, p2)) self.assertTrue(node_name_search(graph, p2)) p3 = protein('HGNC', 'nope') graph.add_node_from_data(p3) self.assertFalse(node_key_search(graph, p3)) self.assertFalse(node_name_search(graph, p3)) pybel-0.12.1/tests/test_struct/test_filters/test_node_predicates.py000066400000000000000000000520461334645200200257110ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for node predicates.""" import unittest from pybel import BELGraph from pybel.constants import ( ACTIVITY, ANNOTATIONS, ASSOCIATION, CAUSES_NO_CHANGE, CITATION, CITATION_AUTHORS, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_ONLINE, CITATION_TYPE_PUBMED, DECREASES, DEGRADATION, DIRECTLY_DECREASES, DIRECTLY_INCREASES, EVIDENCE, GMOD, INCREASES, LOCATION, MODIFIER, OBJECT, POLAR_RELATIONS, POSITIVE_CORRELATION, RELATION, SUBJECT, TRANSLOCATION, ) from pybel.dsl import ( abundance, activity, degradation, entity, fragment, gene, gmod, hgvs, pmod, protein, secretion, translocation, ) from pybel.struct.filters.edge_predicate_builders import build_relation_predicate from pybel.struct.filters.edge_predicates import ( edge_has_activity, edge_has_annotation, edge_has_degradation, edge_has_translocation, has_authors, has_polarity, has_provenance, has_pubmed, is_associative_relation, is_causal_relation, is_direct_causal_relation, ) from pybel.struct.filters.node_predicates import ( has_activity, has_causal_in_edges, has_causal_out_edges, has_fragment, has_gene_modification, has_hgvs, has_protein_modification, has_variant, is_abundance, is_causal_central, is_causal_sink, is_causal_source, is_degraded, is_gene, is_pathology, is_protein, is_translocated, keep_node_permissive, node_exclusion_predicate_builder, node_inclusion_predicate_builder, not_pathology, ) from pybel.testing.utils import n p1 = protein(name='BRAF', namespace='HGNC') p2 = protein(name='BRAF', namespace='HGNC', variants=[hgvs('p.Val600Glu'), pmod('Ph')]) p3 = protein(name='APP', namespace='HGNC', variants=fragment(start=672, stop=713)) p4 = protein(name='2', namespace='HGNC') g1 = gene(name='BRAF', namespace='HGNC', variants=gmod('Me')) class TestNodePredicates(unittest.TestCase): """Tests for node predicates.""" def test_none_data(self): """Test permissive node predicate with a node data dictionary.""" self.assertTrue(keep_node_permissive(p1)) def test_none(self): """Test permissive node predicate with graph and tuple.""" g = BELGraph() p1_tuple = g.add_node_from_data(p1) self.assertTrue(keep_node_permissive(g, p1_tuple)) def test_p1_data_variants(self): """Test node predicates on BRAF.""" self.assertFalse(is_abundance(p1)) self.assertFalse(is_gene(p1)) self.assertTrue(is_protein(p1)) self.assertFalse(is_pathology(p1)) self.assertTrue(not_pathology(p1)) self.assertFalse(has_variant(p1)) self.assertFalse(has_protein_modification(p1)) self.assertFalse(has_gene_modification(p1)) self.assertFalse(has_hgvs(p1)) self.assertFalse(has_fragment(p1)) def test_p1_tuple_variants(self): """Test node predicates on the node tuple from BRAF.s""" g = BELGraph() g.add_node_from_data(p1) self.assertFalse(is_abundance(g, p1)) self.assertFalse(is_gene(g, p1)) self.assertTrue(is_protein(g, p1)) self.assertFalse(is_pathology(g, p1)) self.assertTrue(not_pathology(g, p1)) self.assertFalse(has_variant(g, p1)) self.assertFalse(has_protein_modification(g, p1)) self.assertFalse(has_gene_modification(g, p1)) self.assertFalse(has_hgvs(g, p1)) def test_p2_data_variants(self): self.assertFalse(is_abundance(p2)) self.assertFalse(is_gene(p2)) self.assertTrue(is_protein(p2)) self.assertFalse(is_pathology(p2)) self.assertTrue(not_pathology(p2)) self.assertTrue(has_variant(p2)) self.assertFalse(has_gene_modification(p2)) self.assertTrue(has_protein_modification(p2)) self.assertTrue(has_hgvs(p2)) def test_p2_tuple_variants(self): g = BELGraph() g.add_node_from_data(p2) self.assertFalse(is_abundance(g, p2)) self.assertFalse(is_gene(g, p2)) self.assertTrue(is_protein(g, p2)) self.assertFalse(is_pathology(g, p2)) self.assertTrue(not_pathology(g, p2)) self.assertTrue(has_variant(g, p2)) self.assertFalse(has_gene_modification(g, p2)) self.assertTrue(has_protein_modification(g, p2)) self.assertTrue(has_hgvs(g, p2)) def test_p3(self): self.assertFalse(is_abundance(p3)) self.assertFalse(is_gene(p3)) self.assertTrue(is_protein(p3)) self.assertFalse(is_pathology(p3)) self.assertTrue(not_pathology(p3)) self.assertTrue(has_variant(p3)) self.assertFalse(has_gene_modification(p3)) self.assertFalse(has_protein_modification(p3)) self.assertFalse(has_hgvs(p3)) self.assertTrue(has_fragment(p3)) def test_g1_variants(self): self.assertFalse(is_abundance(g1)) self.assertTrue(is_gene(g1)) self.assertFalse(is_protein(g1)) self.assertFalse(is_pathology(g1)) self.assertTrue(has_variant(g1)) self.assertTrue(has_gene_modification(g1), msg='Should have {}: {}'.format(GMOD, g1)) self.assertFalse(has_protein_modification(g1)) self.assertFalse(has_hgvs(g1)) def test_fragments(self): self.assertTrue(has_fragment( protein(name='APP', namespace='HGNC', variants=[fragment(start=672, stop=713, description='random text')]))) self.assertTrue(has_fragment(protein(name='APP', namespace='HGNC', variants=[fragment()]))) def test_p1_active(self): """cat(p(HGNC:HSD11B1)) increases deg(a(CHEBI:cortisol))""" g = BELGraph() u_node = protein(name='HSD11B1', namespace='HGNC') v_node = abundance(name='cortisol', namespace='CHEBI', identifier='17650') u = g.add_node_from_data(u_node) v = g.add_node_from_data(v_node) g.add_qualified_edge( u_node, v_node, relation=INCREASES, citation={ CITATION_TYPE: CITATION_TYPE_ONLINE, CITATION_REFERENCE: 'https://www.ncbi.nlm.nih.gov/gene/3290' }, evidence="Entrez Gene Summary: Human: The protein encoded by this gene is a microsomal enzyme that " "catalyzes the conversion of the stress hormone cortisol to the inactive metabolite cortisone. " "In addition, the encoded protein can catalyze the reverse reaction, the conversion of cortisone " "to cortisol. Too much cortisol can lead to central obesity, and a particular variation in this " "gene has been associated with obesity and insulin resistance in children. Two transcript " "variants encoding the same protein have been found for this gene.", annotations={'Species': '9606'}, subject_modifier=activity('cat'), object_modifier=degradation() ) self.assertFalse(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertTrue(has_activity(g, u)) self.assertFalse(is_translocated(g, v)) self.assertTrue(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) def test_object_has_translocation(self): """p(HGNC: EGF) increases tloc(p(HGNC: VCP), GOCCID: 0005634, GOCCID: 0005737)""" g = BELGraph() u_node = protein(name='EFG', namespace='HGNC') v_node = protein(name='VCP', namespace='HGNC') u = g.add_node_from_data(u_node) v = g.add_node_from_data(v_node) g.add_qualified_edge( u_node, v_node, relation=INCREASES, citation='10855792', evidence="Although found predominantly in the cytoplasm and, less abundantly, in the nucleus, VCP can be " "translocated from the nucleus after stimulation with epidermal growth factor.", annotations={'Species': '9606'}, object_modifier=translocation( from_loc=entity(namespace='GO', identifier='0005634'), to_loc=entity(namespace='GO', identifier='0005737') ) ) self.assertFalse(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertFalse(has_activity(g, u)) self.assertFalse(has_causal_in_edges(g, u)) self.assertTrue(has_causal_out_edges(g, u)) self.assertTrue(is_translocated(g, v)) self.assertFalse(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) self.assertTrue(has_causal_in_edges(g, v)) self.assertFalse(has_causal_out_edges(g, v)) def test_object_has_secretion(self): """p(MGI:Il4) increases sec(p(MGI:Cxcl1))""" g = BELGraph() u_node = protein(name='Il4', namespace='MGI') v_node = protein(name='Cxcl1', namespace='MGI') u = g.add_node_from_data(u_node) v = g.add_node_from_data(v_node) g.add_increases( u_node, v_node, citation='10072486', evidence='Compared with controls treated with culture medium alone, IL-4 and IL-5 induced significantly ' 'higher levels of MIP-2 and KC production; IL-4 also increased the production of MCP-1 ' '(Fig. 2, A and B)....we only tested the effects of IL-3, IL-4, IL-5, and IL-13 on chemokine ' 'expression and cellular infiltration....Recombinant cytokines were used, ... to treat naive ' 'BALB/c mice.', annotations={'Species': '10090', 'MeSH': 'bronchoalveolar lavage fluid'}, object_modifier=secretion() ) self.assertFalse(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertFalse(has_activity(g, u)) self.assertFalse(has_causal_in_edges(g, u)) self.assertTrue(has_causal_out_edges(g, u)) self.assertTrue(is_translocated(g, v)) self.assertFalse(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) self.assertTrue(has_causal_in_edges(g, v)) self.assertFalse(has_causal_out_edges(g, v)) def test_subject_has_secretion(self): """sec(p(MGI:S100b)) increases a(CHEBI:"nitric oxide")""" g = BELGraph() u_node = protein(name='S100b', namespace='MGI') v_node = abundance(name='nitric oxide', namespace='CHEBI') u = g.add_node_from_data(u_node) v = g.add_node_from_data(v_node) g.add_increases( u_node, v_node, citation='11180510', evidence='S100B protein is also secreted by astrocytes and acts on these cells to stimulate nitric oxide ' 'secretion in an autocrine manner.', annotations={'Species': '10090', 'Cell': 'astrocyte'}, subject_modifier=secretion() ) self.assertTrue(is_translocated(g, u)) self.assertFalse(is_degraded(g, u)) self.assertFalse(has_activity(g, u)) self.assertFalse(has_causal_in_edges(g, u)) self.assertTrue(has_causal_out_edges(g, u)) self.assertFalse(is_translocated(g, v)) self.assertFalse(is_degraded(g, v)) self.assertFalse(has_activity(g, v)) self.assertTrue(has_causal_in_edges(g, v)) self.assertFalse(has_causal_out_edges(g, v)) def test_node_exclusion_data(self): g = BELGraph() u = protein(name='S100b', namespace='MGI') v = abundance(name='nitric oxide', namespace='CHEBI') w = abundance(name='cortisol', namespace='CHEBI', identifier='17650') g.add_node_from_data(u) g.add_node_from_data(v) g.add_node_from_data(w) f = node_exclusion_predicate_builder([u]) self.assertFalse(f(u)) self.assertTrue(f(v)) self.assertTrue(f(w)) f = node_exclusion_predicate_builder([u, v]) self.assertFalse(f(u)) self.assertFalse(f(v)) self.assertTrue(f(w)) f = node_exclusion_predicate_builder([]) self.assertTrue(f(u)) self.assertTrue(f(v)) self.assertTrue(f(w)) def test_node_exclusion_tuples(self): g = BELGraph() u = g.add_node_from_data(protein(name='S100b', namespace='MGI')) v = g.add_node_from_data(abundance(name='nitric oxide', namespace='CHEBI')) w = g.add_node_from_data(abundance(name='cortisol', namespace='CHEBI', identifier='17650')) f = node_exclusion_predicate_builder([u]) self.assertFalse(f(g, u)) self.assertTrue(f(g, v)) self.assertTrue(f(g, w)) f = node_exclusion_predicate_builder([u, v]) self.assertFalse(f(g, u)) self.assertFalse(f(g, v)) self.assertTrue(f(g, w)) f = node_exclusion_predicate_builder([]) self.assertTrue(f(g, u)) self.assertTrue(f(g, v)) self.assertTrue(f(g, w)) def test_node_inclusion_data(self): g = BELGraph() u = protein(name='S100b', namespace='MGI') v = abundance(name='nitric oxide', namespace='CHEBI') w = abundance(name='cortisol', namespace='CHEBI', identifier='17650') g.add_node_from_data(u) g.add_node_from_data(v) g.add_node_from_data(w) f = node_inclusion_predicate_builder([u]) self.assertTrue(f(u)) self.assertFalse(f(v)) self.assertFalse(f(w)) f = node_inclusion_predicate_builder([u, v]) self.assertTrue(f(u)) self.assertTrue(f(v)) self.assertFalse(f(w)) f = node_inclusion_predicate_builder([]) self.assertFalse(f(u)) self.assertFalse(f(v)) self.assertFalse(f(w)) def test_node_inclusion_tuples(self): g = BELGraph() u = g.add_node_from_data(protein(name='S100b', namespace='MGI')) v = g.add_node_from_data(abundance(name='nitric oxide', namespace='CHEBI')) w = g.add_node_from_data(abundance(name='cortisol', namespace='CHEBI', identifier='17650')) f = node_inclusion_predicate_builder([u]) self.assertTrue(f(g, u)) self.assertFalse(f(g, v)) self.assertFalse(f(g, w)) f = node_inclusion_predicate_builder([u, v]) self.assertTrue(f(g, u)) self.assertTrue(f(g, v)) self.assertFalse(f(g, w)) f = node_inclusion_predicate_builder([]) self.assertFalse(f(g, u)) self.assertFalse(f(g, v)) self.assertFalse(f(g, w)) def test_causal_source(self): g = BELGraph() a, b, c = (protein(n(), n()) for _ in range(3)) g.add_increases(a, b, n(), n()) g.add_increases(b, c, n(), n()) self.assertTrue(is_causal_source(g, a)) self.assertFalse(is_causal_central(g, a)) self.assertFalse(is_causal_sink(g, a)) self.assertFalse(is_causal_source(g, b)) self.assertTrue(is_causal_central(g, b)) self.assertFalse(is_causal_sink(g, b)) self.assertFalse(is_causal_source(g, c)) self.assertFalse(is_causal_central(g, c)) self.assertTrue(is_causal_sink(g, c)) class TestEdgePredicate(unittest.TestCase): def test_has_polarity_dict(self): for relation in POLAR_RELATIONS: self.assertTrue(has_polarity({RELATION: relation})) self.assertFalse(has_polarity({RELATION: ASSOCIATION})) def test_has_polarity(self): g = BELGraph() a, b, c = (protein(n(), n()) for _ in range(3)) key1 = g.add_increases(a, b, n(), n()) self.assertTrue(has_polarity(g, a, b, key1)) key2 = g.add_association(b, c, n(), n()) self.assertFalse(has_polarity(g, b, c, key2)) def test_has_provenance(self): self.assertFalse(has_provenance({})) self.assertFalse(has_provenance({CITATION: {}})) self.assertFalse(has_provenance({EVIDENCE: ''})) self.assertTrue(has_provenance({CITATION: {}, EVIDENCE: ''})) def test_has_pubmed(self): self.assertTrue(has_pubmed({CITATION: {CITATION_TYPE: CITATION_TYPE_PUBMED}})) self.assertFalse(has_pubmed({CITATION: {CITATION_TYPE: CITATION_TYPE_ONLINE}})) self.assertFalse(has_pubmed({})) def test_has_authors(self): self.assertFalse(has_authors({})) self.assertFalse(has_authors({CITATION: {}})) self.assertFalse(has_authors({CITATION: {CITATION_AUTHORS: []}})) self.assertTrue(has_authors({CITATION: {CITATION_AUTHORS: ['One guy']}})) def test_is_causal(self): self.assertTrue(is_causal_relation({RELATION: INCREASES})) self.assertTrue(is_causal_relation({RELATION: DECREASES})) self.assertTrue(is_causal_relation({RELATION: DIRECTLY_INCREASES})) self.assertTrue(is_causal_relation({RELATION: DIRECTLY_DECREASES})) self.assertFalse(is_causal_relation({RELATION: ASSOCIATION})) self.assertFalse(is_causal_relation({RELATION: POSITIVE_CORRELATION})) def test_is_direct_causal(self): self.assertTrue(is_direct_causal_relation({RELATION: DIRECTLY_INCREASES})) self.assertTrue(is_direct_causal_relation({RELATION: DIRECTLY_DECREASES})) self.assertFalse(is_direct_causal_relation({RELATION: INCREASES})) self.assertFalse(is_direct_causal_relation({RELATION: DECREASES})) self.assertFalse(is_direct_causal_relation({RELATION: ASSOCIATION})) self.assertFalse(is_direct_causal_relation({RELATION: POSITIVE_CORRELATION})) def test_is_association(self): self.assertTrue(is_associative_relation({RELATION: ASSOCIATION})) self.assertFalse(is_associative_relation({RELATION: INCREASES})) self.assertFalse(is_associative_relation({RELATION: CAUSES_NO_CHANGE})) self.assertFalse(is_associative_relation({RELATION: DECREASES})) self.assertFalse(is_associative_relation({RELATION: DIRECTLY_INCREASES})) self.assertFalse(is_associative_relation({RELATION: DIRECTLY_DECREASES})) def test_build_is_association(self): """Test build_relation_predicate.""" alternate_is_associative_relation = build_relation_predicate(ASSOCIATION) g = BELGraph() g.add_edge(p1, p2, key=0, **{RELATION: ASSOCIATION}) g.add_edge(p2, p3, key=0, **{RELATION: INCREASES}) self.assertTrue(alternate_is_associative_relation(g, p1, p2, 0)) self.assertFalse(alternate_is_associative_relation(g, p2, p3, 0)) def test_build_is_increases_or_decreases(self): """Test build_relation_predicate with multiple relations.""" is_increase_or_decrease = build_relation_predicate([INCREASES, DECREASES]) g = BELGraph() g.add_edge(p1, p2, key=0, **{RELATION: ASSOCIATION}) g.add_edge(p2, p3, key=0, **{RELATION: INCREASES}) g.add_edge(p3, p4, key=0, **{RELATION: DECREASES}) self.assertFalse(is_increase_or_decrease(g, p1, p2, 0)) self.assertTrue(is_increase_or_decrease(g, p2, p3, 0)) self.assertTrue(is_increase_or_decrease(g, p3, p4, 0)) def test_has_degradation(self): self.assertTrue(edge_has_degradation({SUBJECT: {MODIFIER: DEGRADATION}})) self.assertTrue(edge_has_degradation({OBJECT: {MODIFIER: DEGRADATION}})) self.assertFalse(edge_has_degradation({SUBJECT: {MODIFIER: TRANSLOCATION}})) self.assertFalse(edge_has_degradation({SUBJECT: {MODIFIER: ACTIVITY}})) self.assertFalse(edge_has_degradation({SUBJECT: {LOCATION: None}})) self.assertFalse(edge_has_degradation({OBJECT: {MODIFIER: TRANSLOCATION}})) self.assertFalse(edge_has_degradation({OBJECT: {MODIFIER: ACTIVITY}})) self.assertFalse(edge_has_degradation({OBJECT: {LOCATION: None}})) def test_has_translocation(self): self.assertTrue(edge_has_translocation({SUBJECT: {MODIFIER: TRANSLOCATION}})) self.assertTrue(edge_has_translocation({OBJECT: {MODIFIER: TRANSLOCATION}})) self.assertFalse(edge_has_translocation({SUBJECT: {MODIFIER: ACTIVITY}})) self.assertFalse(edge_has_translocation({SUBJECT: {LOCATION: None}})) self.assertFalse(edge_has_translocation({SUBJECT: {MODIFIER: DEGRADATION}})) self.assertFalse(edge_has_translocation({OBJECT: {MODIFIER: ACTIVITY}})) self.assertFalse(edge_has_translocation({OBJECT: {LOCATION: None}})) self.assertFalse(edge_has_translocation({OBJECT: {MODIFIER: DEGRADATION}})) def test_has_activity(self): self.assertTrue(edge_has_activity({SUBJECT: {MODIFIER: ACTIVITY}})) self.assertTrue(edge_has_activity({OBJECT: {MODIFIER: ACTIVITY}})) self.assertFalse(edge_has_activity({SUBJECT: {MODIFIER: TRANSLOCATION}})) self.assertFalse(edge_has_activity({OBJECT: {MODIFIER: TRANSLOCATION}})) self.assertFalse(edge_has_activity({SUBJECT: {LOCATION: None}})) self.assertFalse(edge_has_activity({SUBJECT: {MODIFIER: DEGRADATION}})) self.assertFalse(edge_has_activity({OBJECT: {LOCATION: None}})) self.assertFalse(edge_has_activity({OBJECT: {MODIFIER: DEGRADATION}})) def test_has_annotation(self): self.assertFalse(edge_has_annotation({}, 'Subgraph')) self.assertFalse(edge_has_annotation({ANNOTATIONS: {}}, 'Subgraph')) self.assertFalse(edge_has_annotation({ANNOTATIONS: {'Subgraph': None}}, 'Subgraph')) self.assertTrue(edge_has_annotation({ANNOTATIONS: {'Subgraph': 'value'}}, 'Subgraph')) self.assertFalse(edge_has_annotation({ANNOTATIONS: {'Nope': 'value'}}, 'Subgraph')) pybel-0.12.1/tests/test_struct/test_filters/test_struct_filters.py000066400000000000000000000234111334645200200256270ustar00rootroot00000000000000# -*- coding: utf-8 -*- import unittest from pybel import BELGraph from pybel.constants import ANNOTATIONS from pybel.dsl import protein from pybel.struct.filters import ( and_edge_predicates, concatenate_node_predicates, count_passed_edge_filter, count_passed_node_filter, filter_edges, get_nodes, invert_edge_predicate, ) from pybel.struct.filters.edge_predicate_builders import ( _annotation_dict_all_filter, _annotation_dict_any_filter, build_annotation_dict_all_filter, build_annotation_dict_any_filter, ) from pybel.struct.filters.edge_predicates import keep_edge_permissive from pybel.struct.filters.node_predicates import keep_node_permissive from pybel.testing.utils import n def make_edge_iterator_set(it): return {(u, v) for u, v, _ in it} class TestNodeFilters(unittest.TestCase): def setUp(self): self.universe = BELGraph() self.universe.add_edge(1, 2) self.universe.add_edge(2, 3) self.universe.add_edge(3, 7) self.universe.add_edge(1, 4) self.universe.add_edge(1, 5) self.universe.add_edge(5, 6) self.universe.add_edge(8, 2) self.graph = BELGraph() self.graph.add_edge(1, 2) self.all_universe_nodes = {1, 2, 3, 4, 5, 6, 7, 8} self.all_graph_nodes = {1, 2} def test_no_node_filter_argument(self): nodes = get_nodes(self.universe) self.assertEqual(self.all_universe_nodes, nodes) def test_keep_node_permissive(self): nodes = get_nodes(self.universe, keep_node_permissive) self.assertEqual(self.all_universe_nodes, nodes) def test_missing_node_filter(self): nodes = get_nodes(self.universe, concatenate_node_predicates()) self.assertEqual(self.all_universe_nodes, nodes) def test_concatenate_single_node_filter(self): nodes = get_nodes(self.universe, [keep_node_permissive]) self.assertEqual(self.all_universe_nodes, nodes) def test_concatenate_multiple_node_filters(self): def even(graph, node): return node % 2 == 0 def big(graph, node): return node > 3 nodes = get_nodes(self.universe, [even, big]) self.assertEqual({4, 6, 8}, nodes) self.assertEqual(3, count_passed_node_filter(self.universe, [even, big])) def test_no_edge_filter(self): edges = make_edge_iterator_set(filter_edges(self.graph)) self.assertEqual({(1, 2)}, edges) def test_keep_edge_permissive(self): edges = make_edge_iterator_set(filter_edges(self.graph, keep_edge_permissive)) self.assertEqual({(1, 2)}, edges) def test_keep_edge_unpermissive(self): keep_edge_restrictive = invert_edge_predicate(keep_edge_permissive) edges = make_edge_iterator_set(filter_edges(self.graph, keep_edge_restrictive)) self.assertEqual(set(), edges) def test_missing_edge_filter(self): edges = make_edge_iterator_set(filter_edges(self.graph, and_edge_predicates())) self.assertEqual(({(1, 2)}), edges) def test_concatenate_single_edge_filter(self): edges = make_edge_iterator_set(filter_edges(self.graph, [keep_edge_permissive])) self.assertEqual({(1, 2)}, edges) def test_concatenate_multiple_edge_filter(self): def has_odd_source(graph, u, v, k): return u % 2 != 0 def has_even_target(graph, u, v, k): return v % 2 == 0 edges = make_edge_iterator_set(filter_edges(self.universe, [has_odd_source, has_even_target])) self.assertEqual({(1, 2), (1, 4), (5, 6)}, edges) self.assertEqual(3, count_passed_edge_filter(self.universe, [has_odd_source, has_even_target])) has_even_source = invert_edge_predicate(has_odd_source) edges = make_edge_iterator_set(filter_edges(self.universe, has_even_source)) self.assertEqual({(2, 3), (8, 2)}, edges) class TestEdgeFilters(unittest.TestCase): def test_a(self): self.assertTrue(_annotation_dict_any_filter( {ANNOTATIONS: {'A': {'1', '2'}}}, {'A': {'1'}} )) self.assertTrue(_annotation_dict_any_filter( {ANNOTATIONS: {'A': {'1', '2'}}}, {'A': {'1', '2'}} )) self.assertTrue(_annotation_dict_any_filter( {ANNOTATIONS: {'A': {'1', '2'}}}, {'A': {'1', '2', '3'}} )) self.assertTrue(_annotation_dict_any_filter( {ANNOTATIONS: {'A': {'1', '2'}, 'B': {'X'}}}, {'A': {'3'}, 'B': {'X'}} )) self.assertFalse(_annotation_dict_any_filter( {ANNOTATIONS: {'A': {'1', '2'}}}, {'A': {'3'}} )) self.assertFalse(_annotation_dict_any_filter( {ANNOTATIONS: {'A': {'1', '2'}, 'B': {'X'}}}, {'A': {'3'}, 'B': {'Y'}} )) def test_any_filter_no_query(self): """Test that the all filter returns true when there's no argument""" graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n()) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_any_filter({}))) def test_any_filter_no_annotations(self): graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n()) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_any_filter({'A': {'1'}}))) def test_any_filter_empty_annotations(self): graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n(), annotations={}) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_any_filter({'A': {'1'}}))) def test_any_filter(self): graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n(), annotations={ 'A': {'1', '2', '3'} }) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_any_filter({'A': {'1'}}))) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_any_filter({'A': {'1', '2'}}))) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_any_filter({'A': {'1', '2', '3'}}))) def test_b(self): self.assertTrue(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1'}}}, {'A': {'1'}} )) self.assertTrue(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1', '2'}}}, {'A': {'1', '2'}} )) self.assertTrue(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1', '2'}}}, {'A': {'1', '2'}} )) self.assertTrue(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1', '2'}, 'B': {'X'}}}, {'A': {'1', '2'}, 'B': {'X'}} )) self.assertFalse(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1', '2'}, 'B': {'X'}}}, {'A': {'1', '2', '3'}, 'B': {'X', 'Y'}} )) self.assertFalse(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1'}}}, {'A': {'1', '2'}} )) self.assertFalse(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1'}}}, {'A': {'2'}} )) self.assertFalse(_annotation_dict_all_filter( {ANNOTATIONS: {'A': {'1'}}}, {'B': {'1'}} )) def test_all_filter_no_query(self): """Test that the all filter returns true when there's no argument""" graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n()) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({}))) def test_all_filter_no_annotations(self): graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n()) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1'}}))) def test_all_filter_empty_annotations(self): graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n(), annotations={}) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1'}}))) def test_all_filter(self): graph = BELGraph() graph.add_increases(protein(n(), n()), protein(n(), n()), n(), n(), annotations={ 'A': {'1', '2', '3'} }) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1'}}))) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1', '2'}}))) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1', '2', '3'}}))) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1', '2', '3', '4'}}))) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'4'}}))) def test_all_filter_dict(self): graph = BELGraph() graph.add_edge(1, 2, annotations={ 'A': {'1', '2', '3'} }) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({'A': {'1': True}}))) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({ 'A': {'1': True, '2': True} }))) self.assertEqual(1, count_passed_edge_filter(graph, build_annotation_dict_all_filter({ 'A': {'1': True, '2': True, '3': True} }))) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_all_filter({ 'A': {'1': True, '2': True, '3': True, '4': True} }))) self.assertEqual(0, count_passed_edge_filter(graph, build_annotation_dict_all_filter({ 'A': {'4': True} }))) if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_struct/test_grouping.py000066400000000000000000000110101334645200200216660ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for functions for grouping BEL graphs into sub-graphs.""" import unittest from pybel import BELGraph from pybel.constants import CITATION_TYPE_PUBMED, FUNCTION, PROTEIN from pybel.dsl import protein from pybel.struct.grouping import get_subgraphs_by_annotation, get_subgraphs_by_citation from pybel.testing.utils import n test_namespace_url = n() test_annotation_url = n() citation, evidence = n(), n() a, b, c, d = [protein(namespace='test', name=str(i)) for i in range(4)] class TestAnnotation(unittest.TestCase): """Tests for getting sub-graphs by annotation.""" def setUp(self): """Set up the test case with a pre-populated BEL graph.""" self.graph = BELGraph() self.graph.namespace_url['test'] = test_namespace_url self.graph.annotation_url['subgraph'] = test_annotation_url self.graph.add_increases(a, b, citation=citation, evidence=evidence, annotations={'subgraph': {'1', '2'}}) self.graph.add_increases(a, c, citation=citation, evidence=evidence, annotations={'subgraph': {'1'}}) self.graph.add_increases(b, d, citation=citation, evidence=evidence, annotations={'subgraph': {'1', '2'}}) self.graph.add_increases(a, d, citation=citation, evidence=evidence, annotations={'subgraph': {'2'}}) self.graph.add_increases(c, d, citation=citation, evidence=evidence) def test_get_subgraphs_by_annotation(self): subgraphs = get_subgraphs_by_annotation(self.graph, annotation='subgraph') self.assertEqual(2, len(subgraphs)) self.assertIn('1', subgraphs) self.assertIn('2', subgraphs) subgraph_1 = subgraphs['1'] self.assertIsInstance(subgraph_1, BELGraph) self.assertIn('test', subgraph_1.namespace_url) self.assertIn('subgraph', subgraph_1.annotation_url) self.assertIn(a, subgraph_1) self.assertIn(b, subgraph_1) self.assertIn(c, subgraph_1) self.assertIn(d, subgraph_1) self.assertIn(b, subgraph_1[a]) self.assertIn(c, subgraph_1[a]) self.assertIn(d, subgraph_1[b]) self.assertNotIn(d, subgraph_1[a]) self.assertNotIn(d, subgraph_1[c]) subgraph_2 = subgraphs['2'] self.assertIsInstance(subgraph_2, BELGraph) self.assertIn('test', subgraph_2.namespace_url) self.assertIn('subgraph', subgraph_2.annotation_url) self.assertIn(a, subgraph_2) self.assertIn(b, subgraph_2) self.assertNotIn(c, subgraph_2) self.assertIn(d, subgraph_2) self.assertIn(b, subgraph_2[a]) self.assertNotIn(c, subgraph_2[a]) self.assertIn(d, subgraph_2[b]) self.assertIn(d, subgraph_2[a]) def test_get_subgraphs_by_annotation_with_sentinel(self): sentinel = n() subgraphs = get_subgraphs_by_annotation(self.graph, annotation='subgraph', sentinel=sentinel) self.assertEqual(3, len(subgraphs)) self.assertIn('1', subgraphs) self.assertIn('2', subgraphs) self.assertIn(sentinel, subgraphs) class TestProvenance(unittest.TestCase): """Tests for getting sub-graphs by provenance information (citation, etc.)""" def test_get_subgraphs_by_citation(self): """Test getting sub-graphs by citation.""" graph = BELGraph() c1, c2, c3 = n(), n(), n() graph.add_increases(a, b, citation=c1, evidence=n()) graph.add_increases(a, b, citation=c2, evidence=n()) graph.add_increases(b, c, citation=c1, evidence=n()) graph.add_increases(c, d, citation=c1, evidence=n()) graph.add_increases(a, d, citation=c3, evidence=n()) subgraphs = get_subgraphs_by_citation(graph) # TODO tests for metadata c1_pair = (CITATION_TYPE_PUBMED, c1) self.assertIn(c1_pair, subgraphs) c1_subgraph = subgraphs[c1_pair] self.assertIn(a, c1_subgraph) self.assertIn(b, c1_subgraph) self.assertIn(c, c1_subgraph) self.assertIn(d, c1_subgraph) c2_pair = (CITATION_TYPE_PUBMED, c2) self.assertIn(c2_pair, subgraphs) c2_subgraph = subgraphs[c2_pair] self.assertIn(a, c2_subgraph) self.assertIn(b, c2_subgraph) self.assertNotIn(c, c2_subgraph) self.assertNotIn(d, c2_subgraph) c3_pair = (CITATION_TYPE_PUBMED, c3) self.assertIn(c3_pair, subgraphs) c3_subgraph = subgraphs[c3_pair] self.assertIn(a, c3_subgraph) self.assertNotIn(b, c3_subgraph) self.assertNotIn(c, c3_subgraph) self.assertIn(d, c3_subgraph) pybel-0.12.1/tests/test_struct/test_query/000077500000000000000000000000001334645200200206365ustar00rootroot00000000000000pybel-0.12.1/tests/test_struct/test_query/__init__.py000066400000000000000000000000001334645200200227350ustar00rootroot00000000000000pybel-0.12.1/tests/test_struct/test_query/test_mocks.py000066400000000000000000000016051334645200200233650ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for the mocks for the query builder.""" import unittest from pybel.examples import egf_graph from pybel.testing.mock_manager import MockQueryManager class TestMockManager(unittest.TestCase): """Tests for the mock query manager.""" def test_make(self): """Test instantiating the mock query manager.""" manager = MockQueryManager() self.assertEqual(0, manager.count_networks()) def test_make_with_graph(self): """Test counting networks in the mock query manager.""" manager = MockQueryManager(graphs=[egf_graph]) self.assertEqual(1, manager.count_networks()) def test_add_graph(self): """Test adding a graph with insert_graph.""" manager = MockQueryManager() graph = egf_graph.copy() manager.insert_graph(graph) self.assertEqual(1, manager.count_networks()) pybel-0.12.1/tests/test_struct/test_query/test_query.py000066400000000000000000000244731334645200200234260ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for the query builder.""" import logging import unittest from pybel import BELGraph, Pipeline from pybel.dsl import Protein from pybel.examples.egf_example import egf_graph, vcp from pybel.examples.homology_example import ( homology_graph, mouse_csf1_protein, mouse_csf1_rna, mouse_mapk1_protein, mouse_mapk1_rna, ) from pybel.examples.sialic_acid_example import (cd33_phosphorylated, dap12, shp1, shp2, sialic_acid_graph, syk, trem2) from pybel.struct import expand_node_neighborhood, expand_nodes_neighborhoods, get_subgraph_by_annotation_value from pybel.struct.mutation import collapse_to_genes, enrich_protein_and_rna_origins from pybel.struct.query import Query, QueryMissingNetworksError, Seeding from pybel.testing.generate import generate_random_graph from pybel.testing.mock_manager import MockQueryManager from pybel.testing.utils import n log = logging.getLogger(__name__) def add(query, manager, graph): network = manager.insert_graph(graph) query.append_network(network.id) class TestSeedingConstructor(unittest.TestCase): def test_none(self): """Test construction of a seeding container.""" seeding = Seeding() self.assertEqual(0, len(seeding)) self.assertEqual('[]', seeding.dumps()) def test_append_sample(self): seeding = Seeding() seeding.append_sample() self.assertEqual(1, len(seeding)) s = seeding.dumps() self.assertIsInstance(s, str) class TestQueryConstructor(unittest.TestCase): """Test the construction of a Query.""" def test_network_ids_none(self): query = Query() self.assertIsInstance(query.network_ids, list) self.assertIsInstance(query.seeding, Seeding) self.assertIsInstance(query.pipeline, Pipeline) self.assertEqual(0, len(query.network_ids)) def test_network_ids_single(self): query = Query(network_ids=1) self.assertIsInstance(query.network_ids, list) self.assertEqual(1, len(query.network_ids)) def test_network_ids_multiple(self): query = Query(network_ids=[1, 2, 3]) self.assertIsInstance(query.network_ids, list) self.assertEqual(3, len(query.network_ids)) def test_network_ids_type_error(self): with self.assertRaises(TypeError): Query(network_ids='a') def test_seeding(self): query = Query(seeding=Seeding()) self.assertEqual(0, len(query.seeding)) def test_pipeline(self): query = Query(pipeline=Pipeline()) self.assertEqual(0, len(query.pipeline)) class QueryTestEgf(unittest.TestCase): """Test querying the EGF subgraph""" def setUp(self): """Set up each test with a mock query manager.""" self.manager = MockQueryManager() self.query = Query() def add_query(self, graph): add(self.query, self.manager, graph) return self.query def run_query(self): return self.query.run(self.manager) def test_fail_run_with_no_networks(self): with self.assertRaises(QueryMissingNetworksError): self.run_query() def test_no_seeding_no_pipeline(self): graph = egf_graph.copy() self.add_query(graph) result = self.run_query() self.assertEqual(graph.number_of_nodes(), result.number_of_nodes()) self.assertEqual(graph.number_of_edges(), result.number_of_edges()) def test_seed_by_neighbor(self): graph = BELGraph() a, b, c, d = (Protein(namespace=n(), name=str(i)) for i in range(4)) graph.add_increases(a, b, n(), n()) graph.add_increases(b, c, n(), n()) graph.add_increases(c, d, n(), n()) self.add_query(graph).append_seeding_neighbors(b) result = self.run_query() self.assertIsInstance(result, BELGraph) # test nodes self.assertIn(a, result) self.assertIn(b, result) self.assertIn(c, result) self.assertNotIn(d, result) # test edges self.assertIn(b, result[a]) self.assertIn(c, result[b]) self.assertNotIn(d, result[c]) def test_seed_by_neighbors(self): graph = BELGraph() a, b, c, d, e = (Protein(namespace=n(), name=str(i)) for i in range(5)) graph.add_increases(a, b, n(), n()) graph.add_increases(b, c, n(), n()) graph.add_increases(c, d, n(), n()) graph.add_increases(d, e, n(), n()) self.add_query(graph).append_seeding_neighbors([b, c]) result = self.run_query() self.assertIsInstance(result, BELGraph) # test nodes self.assertIn(a, result) self.assertIn(b, result) self.assertIn(c, result) self.assertIn(d, result) self.assertNotIn(e, result) # test edges self.assertIn(b, result[a]) self.assertIn(c, result[b]) self.assertIn(d, result[c]) self.assertNotIn(e, result[d]) def test_random_sample(self): """Test generating multiple random samples and combining them.""" graph = generate_random_graph(50, 1000) query = self.add_query(graph) query.append_seeding_sample(number_edges=10) query.append_seeding_sample(number_edges=10) result = self.run_query() # this will fail randomly sometimes, lol self.assertIn(result.number_of_edges(), {18, 19, 20}) class QueryTest(unittest.TestCase): """Test the query""" def setUp(self): """Setup each test with an empty mock query manager.""" self.manager = MockQueryManager() def test_pipeline(self): graph = egf_graph.copy() enrich_protein_and_rna_origins(graph) self.assertEqual( 32, # 10 protein nodes already there + complex + bp + 2*10 (genes and rnas) graph.number_of_nodes() ) # 6 already there + 5 complex hasComponent edges + new 2*10 edges self.assertEqual(31, graph.number_of_edges()) network = self.manager.insert_graph(graph) pipeline = Pipeline() pipeline.append(collapse_to_genes) query = Query( network_ids=[network.id], pipeline=pipeline ) result_graph = query.run(self.manager) self.assertEqual(12, result_graph.number_of_nodes()) # same number of nodes than there were self.assertEqual(11, result_graph.number_of_edges()) # same number of edges than there were def test_pipeline_2(self): graph = egf_graph.copy() network = self.manager.insert_graph(graph) network_id = network.id query = Query(network_ids=[network_id]) query.append_seeding_neighbors(vcp) query.append_pipeline(get_subgraph_by_annotation_value, 'Species', '9606') result = query.run(self.manager) self.assertIsNotNone(result, msg='Query returned none') self.assertEqual(3, result.number_of_nodes()) def test_query_multiple_networks(self): sialic_acid_graph_id = self.manager.insert_graph(sialic_acid_graph.copy()).id egf_graph_id = self.manager.insert_graph(egf_graph.copy()).id query = Query() query.append_network(sialic_acid_graph_id) query.append_network(egf_graph_id) query.append_seeding_neighbors([syk]) query.append_pipeline(enrich_protein_and_rna_origins) result = query.run(self.manager) self.assertIsNotNone(result, msg='Query returned none') self.assertIn(shp1, result) self.assertIn(shp2, result) self.assertIn(trem2, result) self.assertIn(dap12, result) self.assertEqual(15, result.number_of_nodes()) self.assertEqual(14, result.number_of_edges()) def test_get_subgraph_by_annotation_value(self): graph = homology_graph.copy() result = get_subgraph_by_annotation_value(graph, 'Species', '10090') self.assertIsNotNone(result, msg='Query returned none') self.assertIsInstance(result, BELGraph) self.assertLess(0, result.number_of_nodes()) self.assertIn(mouse_mapk1_protein, result, msg='nodes:\n{}'.format(list(map(repr, graph)))) self.assertIn(mouse_csf1_protein, result) self.assertEqual(2, result.number_of_nodes()) self.assertEqual(1, result.number_of_edges()) def test_seeding_1(self): test_network_1 = self.manager.insert_graph(homology_graph.copy()) query = Query(network_ids=[test_network_1.id]) query.append_seeding_neighbors([mouse_csf1_rna, mouse_mapk1_rna]) result = query.run(self.manager) self.assertIsNotNone(result, msg='Query returned none') self.assertIsInstance(result, BELGraph) self.assertIn(mouse_mapk1_rna, result) self.assertIn(mouse_csf1_rna, result) self.assertIn(mouse_mapk1_protein, result) self.assertIn(mouse_csf1_protein, result) self.assertEqual(6, result.number_of_nodes()) self.assertEqual(4, result.number_of_edges()) def test_seeding_with_pipeline(self): test_network_1 = self.manager.insert_graph(sialic_acid_graph.copy()) query = Query(network_ids=[test_network_1.id]) query.append_seeding_neighbors([trem2, dap12, shp2]) query.append_pipeline(expand_nodes_neighborhoods, [trem2, dap12, shp2]) result = query.run(self.manager) self.assertIsNotNone(result, msg='Query returned none') self.assertIsInstance(result, BELGraph) self.assertIn(trem2, result) self.assertIn(dap12, result) self.assertIn(shp2, result) self.assertIn(syk, result) self.assertIn(cd33_phosphorylated, result) self.assertEqual(5, result.number_of_nodes()) self.assertEqual(4, result.number_of_edges()) def test_query_multiple_networks_with_api(self): test_network_1 = self.manager.insert_graph(homology_graph.copy()) pipeline = Pipeline() pipeline.append(expand_node_neighborhood, mouse_mapk1_protein) query = Query( network_ids=[test_network_1.id], pipeline=pipeline ) query.append_seeding_annotation('Species', {'10090'}) result = query.run(self.manager) self.assertIsNotNone(result, msg='Query returned none') self.assertEqual(3, result.number_of_nodes()) self.assertIn(mouse_mapk1_protein, result) self.assertIn(mouse_csf1_protein, result) self.assertEqual(2, result.number_of_edges()) pybel-0.12.1/tests/test_struct/test_query/test_seeding.py000066400000000000000000000056541334645200200236770ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for the query builder.""" import logging import unittest from pybel import BELGraph from pybel.dsl import Protein from pybel.examples.egf_example import egf_graph from pybel.struct.query import Seeding from pybel.testing.generate import generate_random_graph from pybel.testing.utils import n log = logging.getLogger(__name__) class TestSeedingConstructor(unittest.TestCase): def test_none(self): seeding = Seeding() self.assertEqual(0, len(seeding)) self.assertEqual('[]', seeding.dumps()) def test_append_sample(self): seeding = Seeding() seeding.append_sample() self.assertEqual(1, len(seeding)) s = seeding.dumps() self.assertIsInstance(s, str) def test_no_seeding(self): graph = egf_graph.copy() seeding = Seeding() result = seeding.run(graph) self.assertEqual(graph.number_of_nodes(), result.number_of_nodes()) self.assertEqual(graph.number_of_edges(), result.number_of_edges()) def test_seed_by_neighbor(self): graph = BELGraph() a, b, c, d = (Protein(namespace=n(), name=str(i)) for i in range(4)) graph.add_increases(a, b, n(), n()) graph.add_increases(b, c, n(), n()) graph.add_increases(c, d, n(), n()) seeding = Seeding() seeding.append_neighbors(b) result = seeding.run(graph) self.assertIsInstance(result, BELGraph) # test nodes self.assertIn(a, result) self.assertIn(b, result) self.assertIn(c, result) self.assertNotIn(d, result) # test edges self.assertIn(b, result[a]) self.assertIn(c, result[b]) self.assertNotIn(d, result[c]) def test_seed_by_neighbors(self): graph = BELGraph() a, b, c, d, e = (Protein(namespace=n(), name=str(i)) for i in range(5)) graph.add_increases(a, b, n(), n()) graph.add_increases(b, c, n(), n()) graph.add_increases(c, d, n(), n()) graph.add_increases(d, e, n(), n()) seeding = Seeding() seeding.append_neighbors([b, c]) result = seeding.run(graph) self.assertIsInstance(result, BELGraph) # test nodes self.assertIn(a, result) self.assertIn(b, result) self.assertIn(c, result) self.assertIn(d, result) self.assertNotIn(e, result) # test edges self.assertIn(b, result[a]) self.assertIn(c, result[b]) self.assertIn(d, result[c]) self.assertNotIn(e, result[d]) def test_random_sample(self): graph = generate_random_graph(50, 1000) seeding = Seeding() seeding.append_sample(number_edges=10) seeding.append_sample(number_edges=10) result = seeding.run(graph) # TODO this will fail randomly some times lol, so make allowed to be sort of wrong self.assertIn(result.number_of_edges(), {18, 19, 20}) pybel-0.12.1/tests/test_struct/test_query/test_struct_pipeline.py000066400000000000000000000152371334645200200254700ustar00rootroot00000000000000# -*- coding: utf-8 -*- import logging import unittest from six.moves import StringIO from pybel import BELGraph from pybel.examples.egf_example import egf_graph from pybel.struct.mutation import enrich_protein_and_rna_origins from pybel.struct.pipeline import Pipeline, transformation from pybel.struct.pipeline.decorators import ( deprecated, get_transformation, in_place_map, mapped, register_deprecated, universe_map, ) from pybel.struct.pipeline.exc import DeprecationMappingError, MetaValueError, MissingPipelineFunctionError log = logging.getLogger(__name__) log.setLevel(10) class TestEgfExample(unittest.TestCase): """Random test for mutation functions""" def setUp(self): self.graph = egf_graph.copy() self.original_number_nodes = self.graph.number_of_nodes() self.original_number_edges = self.graph.number_of_edges() def check_original_unchanged(self): self.assertEqual(self.original_number_nodes, self.graph.number_of_nodes(), msg='original graph nodes should remain unchanged') self.assertEqual(self.original_number_edges, self.graph.number_of_edges(), msg='original graph edges should remain unchanged') class TestPipelineFailures(unittest.TestCase): def test_assert_failure(self): with self.assertRaises(MissingPipelineFunctionError): get_transformation('missing function') def test_assert_success(self): m = list(mapped) self.assertLess(0, len(m)) m = m[0] f = get_transformation(m) self.assertIsNotNone(f) def test_append_invalid(self): """Test when an invalid type is given to a :class:`pybel.struct.Pipeline`.""" p = Pipeline() with self.assertRaises(TypeError): p.append(4) def test_get_function_failure(self): p = Pipeline() with self.assertRaises(MissingPipelineFunctionError): p._get_function('nonsense name') def test_build_meta_failure(self): p1, p2 = Pipeline(), Pipeline() p = Pipeline._build_meta('wrong', [p1, p2]) with self.assertRaises(MetaValueError): p(BELGraph()) def test_fail_add(self): pipeline = Pipeline() with self.assertRaises(MissingPipelineFunctionError): pipeline.append('missing function') class TestPipeline(TestEgfExample): def test_deprecated_central_dogma_is_registered(self): """Tests that a deprecated function is properly registered""" self.assertIn('enrich_protein_and_rna_origins', mapped) self.assertIn('infer_central_dogma', mapped) self.assertEqual(mapped['enrich_protein_and_rna_origins'], mapped['infer_central_dogma']) def test_append(self): pipeline = Pipeline() self.assertEqual(0, len(pipeline)) pipeline.append('infer_central_dogma') self.assertEqual(1, len(pipeline)) def test_extend(self): p1 = Pipeline.from_functions(['infer_central_dogma']) self.assertEqual(1, len(p1)) p2 = Pipeline.from_functions(['remove_pathologies']) p1.extend(p2) self.assertEqual(2, len(p1)) def test_serialize_string(self): p = Pipeline.from_functions(['infer_central_dogma']) s = p.dumps() p_reconstituted = Pipeline.loads(s) self.assertEqual(p.protocol, p_reconstituted.protocol) def test_serialize_file(self): p = Pipeline.from_functions(['infer_central_dogma']) sio = StringIO() p.dump(sio) sio.seek(0) p_reconstituted = Pipeline.load(sio) self.assertEqual(p.protocol, p_reconstituted.protocol) def test_pipeline_by_string(self): pipeline = Pipeline.from_functions([ 'infer_central_dogma', ]) result = pipeline(self.graph) self.assertEqual(32, result.number_of_nodes()) for node in self.graph: self.assertIn(node, result) self.check_original_unchanged() def test_pipeline_by_function(self): pipeline = Pipeline.from_functions([ enrich_protein_and_rna_origins, ]) result = pipeline(self.graph) self.assertEqual(32, result.number_of_nodes()) for node in self.graph: self.assertIn(node, result) self.check_original_unchanged() class TestDeprecation(unittest.TestCase): def test_register_deprecation_remapping_error(self): """Test that a deprecation mapping doesn't override a pre-existing mapping.""" @transformation def test_function_1(): """Test doing nothing.""" self.assertNotIn('test_function_1', deprecated) self.assertIn('test_function_1', mapped) self.assertNotIn('test_function_1', universe_map) self.assertNotIn('test_function_1', in_place_map) with self.assertRaises(DeprecationMappingError): @register_deprecated('test_function_1') @transformation def test_function_1_new(): """Test bad uage of register_deprecated.""" self.assertNotIn('test_function_1', deprecated) def test_register_deprecated(self): """Test that a deprecation mapping doesn't override a pre-existing mapping.""" @register_deprecated('test_function_2_old') @transformation def test_function_2(): """Test usage of register_deprecated.""" self.assertNotIn('test_function_2', deprecated) self.assertIn('test_function_2', mapped) self.assertNotIn('test_function_2', universe_map) self.assertNotIn('test_function_2', in_place_map) self.assertIn('test_function_2_old', deprecated) self.assertIn('test_function_2_old', mapped) self.assertNotIn('test_function_2_old', universe_map) self.assertNotIn('test_function_2_old', in_place_map) self.assertEqual(mapped['test_function_2_old'], mapped['test_function_2']) def test_register_missing(self): """Test that a deprecation mapping fails if it's missing a transformation function.""" with self.assertRaises(MissingPipelineFunctionError): @register_deprecated('test_function_3_old') def test_function_3(): """Test bad usage of register_deprecated that throws a MissingPipelineFunctionError.""" self.assertNotIn('test_function_3', mapped) self.assertNotIn('test_function_3', universe_map) self.assertNotIn('test_function_3', in_place_map) self.assertNotIn('test_function_3_old', deprecated) self.assertNotIn('test_function_3_old', mapped) self.assertNotIn('test_function_3_old', universe_map) self.assertNotIn('test_function_3_old', in_place_map) pybel-0.12.1/tests/test_struct/test_struct_graph.py000066400000000000000000000160141334645200200225520ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for data structures in PyBEL.""" import unittest from six import StringIO, string_types from pybel import BELGraph from pybel.constants import CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_PUBMED from pybel.dsl import hgvs, protein from pybel.testing.utils import n class TestGraphProperties(unittest.TestCase): """Test setting and access to graph properties.""" def setUp(self): """Make fake metadata for the graphs.""" ( self.name, self.version, self.description, self.authors, self.contact, self.licenses, self.copyrights, self.disclaimer ) = [n() for _ in range(8)] def help_test_metadata(self, graph): """Help test the right metadata got in the graph. :type graph: BELGraph """ self.assertEqual(self.name, graph.name) self.assertEqual(self.version, graph.version) self.assertEqual(self.description, graph.description) self.assertEqual(self.authors, graph.authors) self.assertEqual(self.contact, graph.contact) self.assertEqual(self.licenses, graph.license) self.assertEqual(self.copyrights, graph.copyright) self.assertEqual(self.disclaimer, graph.disclaimer) self.assertEqual('{name} v{version}'.format(name=self.name, version=self.version), str(graph)) def test_str_kwargs(self): """Test setting of metadata through keyword arguments.""" graph = BELGraph( name=self.name, version=self.version, description=self.description, authors=self.authors, contact=self.contact, license=self.licenses, copyright=self.copyrights, disclaimer=self.disclaimer ) self.help_test_metadata(graph) def test_name(self): """Test setting of metadata through attributes.""" graph = BELGraph() graph.name = self.name graph.version = self.version graph.description = self.description graph.authors = self.authors graph.contact = self.contact graph.license = self.licenses graph.copyright = self.copyrights graph.disclaimer = self.disclaimer self.help_test_metadata(graph) class TestStruct(unittest.TestCase): """Test the BEL graph data structure.""" def test_add_simple(self): """Test that a simple node can be added, but not duplicated.""" graph = BELGraph(name='Test',version='0.0.0') namespace, name = n(), n() graph.add_node_from_data(protein(namespace=namespace, name=name)) self.assertEqual(1, graph.number_of_nodes()) graph.add_node_from_data(protein(namespace=namespace, name=name)) self.assertEqual(1, graph.number_of_nodes()) sio = StringIO() graph.summarize(file=sio) test_str = """Test v0.0.0 Number of Nodes: 1 Number of Edges: 0 Network Density: 0.00E+00 Number of Components: 1""" self.assertEqual(test_str.strip(), sio.getvalue().strip()) def test_citation_type_error(self): """Test error handling on adding qualified edges.""" graph = BELGraph() with self.assertRaises(TypeError): graph.add_increases( protein(namespace='TEST', name='YFG1'), protein(namespace='TEST', name='YFG2'), evidence=n(), citation=5, ) class TestGetGraphProperties(unittest.TestCase): """The tests in this class check the getting and setting of node properties.""" def setUp(self): """Set up the test case with a fresh BEL graph.""" self.graph = BELGraph() def test_get_qualified_edge(self): """Test adding an edge to a graph.""" test_source = protein(namespace='TEST', name='YFG') test_target = protein(namespace='TEST', name='YFG2') self.graph.add_node_from_data(test_source) self.graph.add_node_from_data(test_target) test_evidence = n() test_pmid = n() test_key = self.graph.add_increases( test_source, test_target, citation=test_pmid, evidence=test_evidence, annotations={ 'Species': '9606', 'Confidence': 'Very High' }, ) citation = self.graph.get_edge_citation(test_source, test_target, test_key) self.assertIsNotNone(citation) self.assertIsInstance(citation, dict) self.assertIn(CITATION_TYPE, citation) self.assertEqual(CITATION_TYPE_PUBMED, citation[CITATION_TYPE]) self.assertIn(CITATION_REFERENCE, citation) self.assertEqual(test_pmid, citation[CITATION_REFERENCE]) evidence = self.graph.get_edge_evidence(test_source, test_target, test_key) self.assertIsNotNone(evidence) self.assertIsInstance(evidence, string_types) self.assertEqual(test_evidence, evidence) annotations = self.graph.get_edge_annotations(test_source, test_target, test_key) self.assertIsNotNone(annotations) self.assertIsInstance(annotations, dict) self.assertIn('Species', annotations) self.assertIn('9606', annotations['Species']) self.assertTrue(annotations['Species']['9606']) self.assertIn('Confidence', annotations) self.assertIn('Very High', annotations['Confidence']) self.assertTrue(annotations['Confidence']['Very High']) def test_get_unqualified_edge(self): """Test adding an unqualified edge.""" test_source = protein(namespace='TEST', name='YFG') test_target = protein(namespace='TEST', name='YFG2') key = self.graph.add_part_of(test_source, test_target) citation = self.graph.get_edge_citation(test_source, test_target, key) self.assertIsNone(citation) evidence = self.graph.get_edge_evidence(test_source, test_target, key) self.assertIsNone(evidence) annotations = self.graph.get_edge_annotations(test_source, test_target, key) self.assertIsNone(annotations) def test_get_node_properties(self): """Test looking up node properties.""" test_name = n() test_identifier = n() node = protein(namespace='TEST', name=test_name, identifier=test_identifier) self.graph.add_node_from_data(node) self.assertIsNone(self.graph.get_node_description(node)) test_description = n() self.graph.set_node_description(node, test_description) self.assertEqual(test_description, self.graph.get_node_description(node)) def test_add_node_with_variant(self): """Test that the identifier is carried through to the child.""" graph = BELGraph() namespace, name, identifier, variant_name = n(), n(), n(), n() node = protein(namespace=namespace, name=name, identifier=identifier, variants=hgvs(variant_name)) parent = node.get_parent() graph.add_node_from_data(node) self.assertEqual(2, graph.number_of_nodes()) pybel-0.12.1/tests/test_struct/test_struct_operations.py000066400000000000000000000207711334645200200236410ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for graph operations.""" import unittest from pybel import BELGraph from pybel.dsl import protein from pybel.struct.operations import ( left_full_join, left_node_intersection_join, left_outer_join, node_intersection, union, ) from pybel.testing.utils import n p1, p2, p3, p4, p5, p6, p7, p8 = (protein(namespace='HGNC', name=n()) for _ in range(8)) class TestLeftFullJoin(unittest.TestCase): """Tests the variants of the left full join, including the exhaustive vs. hash algorithms and calling by function or magic functions""" def setUp(self): """Set up tests for the left full join with two example graphs.""" g = BELGraph() g.add_increases(p1, p2, citation='PMID1', evidence='Evidence 1') self.tag = 'EXTRANEOUS' self.tag_value = 'MOST DEFINITELY' h = BELGraph() h.add_increases(p1, p2, citation='PMID1', evidence='Evidence 1') h.add_increases(p1, p2, citation='PMID2', evidence='Evidence 2') h.add_increases(p1, p3, citation='PMID1', evidence='Evidence 3') h.nodes[p1][self.tag] = self.tag_value h.nodes[p3][self.tag] = self.tag_value self.g = g self.h = h self.help_check_initial_g(self.g) self.help_check_initial_h(self.h) def help_check_initial_g(self, graph): """Test the initial G graph. :type graph: pybel.BELGraph """ self.assertEqual(2, graph.number_of_nodes(), msg='initial graph G had wrong number of nodes') self.assertEqual(1, graph.number_of_edges(), msg='initial graph G had wrong number of edges') def help_check_initial_h(self, graph): """Test the initial H graph. :type graph: pybel.BELGraph """ self.assertEqual(3, graph.number_of_nodes(), msg='initial graph H had wrong number of nodes') self.assertEqual(3, graph.number_of_edges(), msg='initial graph H had wrong number of edges') def help_check_result(self, j): """Help check the result of left joining H into G. :param pybel.BELGraph j: The resulting graph from G += H """ self.assertIn(self.tag, j.nodes[p1]) self.assertNotIn(self.tag, j.nodes[p2]) self.assertIn(self.tag, j.nodes[p3]) self.assertEqual(self.tag_value, j.nodes[p1][self.tag]) self.assertEqual(self.tag_value, j.nodes[p3][self.tag]) self.assertEqual(3, j.number_of_nodes()) self.assertEqual(3, j.number_of_edges(), msg="G edges:\n{}".format('\n'.join(map(str, j.edges(data=True))))) def test_function(self): """Test full joining two networks using the function.""" left_full_join(self.g, self.h) self.help_check_result(self.g) self.help_check_initial_h(self.h) def test_in_place_operator_failure(self): """Test that using the wrong type with the in-place addition operator raises an error.""" with self.assertRaises(TypeError): self.g += None def test_in_place_operator(self): """Test full joining two networks using the BELGraph in-place addition operator.""" self.g += self.h self.help_check_result(self.g) self.help_check_initial_h(self.h) def test_operator_failure(self): """Test that using the wrong type with the addition operator raises an error.""" with self.assertRaises(TypeError): self.g + None def test_operator(self): """Test full joining two networks using the BELGraph addition operator.""" j = self.g + self.h self.help_check_result(j) self.help_check_initial_g(self.g) self.help_check_initial_h(self.h) def test_union_failure(self): """Test that the union of no graphs raises a value error.""" with self.assertRaises(ValueError): union([]) def test_union_trivial(self): """Test that the union of a single graph returns that graph.""" res = union([self.g]) self.assertEqual(self.g, res) def test_union(self): """Test that the union of a pair of graphs is the same as the full join.""" j = union([self.g, self.h]) self.help_check_result(j) self.help_check_initial_g(self.g) self.help_check_initial_h(self.h) class TestLeftFullOuterJoin(unittest.TestCase): def setUp(self): g = BELGraph() g.add_edge(p1, p2) h = BELGraph() h.add_edge(p1, p3) h.add_edge(p1, p4) h.add_edge(p5, p6) h.add_node(p7) self.g = g self.h = h def help_check_initial_g(self, g): self.assertEqual(2, g.number_of_nodes()) self.assertEqual({p1, p2}, set(g)) self.assertEqual(1, g.number_of_edges()) self.assertEqual({(p1, p2)}, set(g.edges())) def help_check_initial_h(self, h): self.assertEqual(6, h.number_of_nodes()) self.assertEqual({p1, p3, p4, p5, p6, p7}, set(h)) self.assertEqual(3, h.number_of_edges()) self.assertEqual({(p1, p3), (p1, p4), (p5, p6)}, set(h.edges())) def help_check_result(self, j): """After H has been full outer joined into G, this is what it should be""" self.assertEqual(4, j.number_of_nodes()) self.assertEqual({p1, p2, p3, p4}, set(j)) self.assertEqual(3, j.number_of_edges()) self.assertEqual({(p1, p2), (p1, p3), (p1, p4)}, set(j.edges())) def test_in_place_type_failure(self): with self.assertRaises(TypeError): self.g &= None def test_type_failure(self): with self.assertRaises(TypeError): self.g & None def test_magic(self): # left_outer_join(g, h) self.g &= self.h self.help_check_initial_h(self.h) self.help_check_result(self.g) def test_operator(self): # left_outer_join(g, h) j = self.g & self.h self.help_check_initial_h(self.h) self.help_check_initial_g(self.g) self.help_check_result(j) def test_left_outer_join(self): left_outer_join(self.g, self.h) self.help_check_initial_h(self.h) self.help_check_result(self.g) def test_left_outer_exhaustive_join(self): self.g &= self.h left_outer_join(self.g, self.h) self.help_check_initial_h(self.h) self.help_check_result(self.g) class TestInnerJoin(unittest.TestCase): """Tests various graph merging procedures""" def setUp(self): g = BELGraph() g.add_edge(p1, p2) g.add_edge(p1, p3) g.add_edge(p8, p3) h = BELGraph() h.add_edge(p1, p3) h.add_edge(p1, p4) h.add_edge(p5, p6) h.add_node(p7) self.g = g self.h = h def help_check_initialize_g(self, graph): self.assertEqual(4, graph.number_of_nodes()) self.assertEqual(3, graph.number_of_edges()) def help_check_initialize_h(self, graph): self.assertEqual(6, graph.number_of_nodes()) self.assertEqual({p1, p3, p4, p5, p6, p7}, set(graph)) self.assertEqual(3, graph.number_of_edges()) self.assertEqual({(p1, p3), (p1, p4), (p5, p6)}, set(graph.edges())) def test_initialize(self): self.help_check_initialize_g(self.g) self.help_check_initialize_h(self.h) def help_check_join(self, j): self.assertEqual(2, j.number_of_nodes()) self.assertEqual({p1, p3}, set(j)) self.assertEqual(1, j.number_of_edges()) self.assertEqual({(p1, p3), }, set(j.edges())) def test_in_place_type_failure(self): with self.assertRaises(TypeError): self.g ^ None def test_type_failure(self): with self.assertRaises(TypeError): self.g ^= None def test_magic(self): j = self.g ^ self.h self.help_check_join(j) self.help_check_initialize_h(self.h) self.help_check_initialize_g(self.g) def test_left_node_intersection_join(self): j = left_node_intersection_join(self.g, self.h) self.help_check_join(j) self.help_check_initialize_h(self.h) self.help_check_initialize_g(self.g) def test_node_intersection(self): j = node_intersection([self.h, self.g]) self.help_check_join(j) self.help_check_initialize_h(self.h) self.help_check_initialize_g(self.g) def test_intersection_failure(self): with self.assertRaises(ValueError): node_intersection([]) def test_intersection_trivial(self): res = node_intersection([self.g]) self.assertEqual(self.g, res) if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_struct/test_summary/000077500000000000000000000000001334645200200211665ustar00rootroot00000000000000pybel-0.12.1/tests/test_struct/test_summary/__init__.py000066400000000000000000000001061334645200200232740ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.struct.summary`.""" pybel-0.12.1/tests/test_struct/test_summary/test_errors.py000066400000000000000000000043751334645200200241240ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.struct.summary.errors`.""" import unittest from pybel import BELGraph from pybel.parser.exc import NakedNameWarning, UndefinedAnnotationWarning from pybel.struct.summary import count_error_types, count_naked_names, get_naked_names from pybel.testing.utils import n class TestErrors(unittest.TestCase): """Test :mod:`pybel.struct.summary.errors`.""" def test_count_error_types(self): """Test counting error types.""" graph = BELGraph() line_number = 30 position = 4 line = n() annotation = n() exception = UndefinedAnnotationWarning( line_number=line_number, line=line, position=position, annotation=annotation, ) graph.add_warning( line_number=line_number, line=line, exception=exception, ) error_types = count_error_types(graph) self.assertEqual(1, len(error_types)) self.assertIn(UndefinedAnnotationWarning.__name__, error_types) self.assertEqual(1, error_types[UndefinedAnnotationWarning.__name__]) def test_get_naked_names(self): """Retrieve the naked names from a graph.""" graph = BELGraph() n_names = 5 line_number = 30 position = 4 line = n() names = {n() for _ in range(n_names)} exceptions = [ NakedNameWarning( line_number=line_number, line=line, position=position, name=name, ) for name in names ] for exception in exceptions: graph.add_warning( line_number=line_number, line=line, exception=exception, ) graph.add_warning( line_number=line_number, line=line, exception=exceptions[0], ) self.assertEqual(6, len(graph.warnings)) naked_names = get_naked_names(graph) self.assertEqual(names, naked_names) naked_name_counter = count_naked_names(graph) self.assertEqual(n_names, len(naked_name_counter)) self.assertEqual(2, naked_name_counter[exceptions[0].name]) pybel-0.12.1/tests/test_struct/test_summary/test_provenance.py000066400000000000000000000006561334645200200247460ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for provenance summary functions.""" import unittest from pybel.examples import sialic_acid_graph from pybel.struct import count_citations class TestProvenance(unittest.TestCase): """Tests for provenance summary functions.""" def test_count_citations(self): """Test counting citations.""" count = count_citations(sialic_acid_graph) self.assertEqual(1, count) pybel-0.12.1/tests/test_struct/test_summary/test_struct_summary_edges.py000066400000000000000000000063251334645200200270550ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test summary functions for edges.""" import unittest from collections import Counter from pybel import BELGraph from pybel.dsl import protein from pybel.examples import sialic_acid_graph from pybel.struct.summary.edge_summary import ( count_annotations, get_annotation_values, get_annotation_values_by_annotation, get_annotations, get_unused_annotations, iter_annotation_value_pairs, iter_annotation_values, ) from pybel.testing.utils import n class TestEdgeSummary(unittest.TestCase): """Test summary functions for edges.""" def test_1(self): """Test iterating over annotation/value pairs.""" graph = BELGraph() u = protein('HGNC', name='U') v = protein('HGNC', name='V') w = protein('HGNC', name='W') graph.add_increases( u, v, evidence=n(), citation=n(), annotations={ 'A': {'1', '2'}, 'B': {'X'} } ) graph.add_increases( u, w, evidence=n(), citation=n(), annotations={ 'A': {'1', '3'}, 'C': {'a'} } ) graph.add_increases( w, v, evidence=n(), citation=n(), ) x = dict(Counter(iter_annotation_value_pairs(graph))) self.assertEqual({ ('A', '1'): 2, ('A', '2'): 1, ('A', '3'): 1, ('B', 'X'): 1, ('C', 'a'): 1, }, x) y = Counter(iter_annotation_values(graph, 'A')) self.assertEqual(x['A', '1'] + x['A', '2'] + x['A', '3'], sum(y.values())) y = Counter(iter_annotation_values(graph, 'B')) self.assertEqual(x['B', 'X'], sum(y.values())) y = Counter(iter_annotation_values(graph, 'C')) self.assertEqual(x['C', 'a'], sum(y.values())) def test_get_annotation_values(self): """Test getting annotation values.""" expected = { 'Confidence': {'High', 'Low'}, 'Species': {'9606'} } self.assertEqual({'Confidence', 'Species'}, get_annotations(sialic_acid_graph)) self.assertEqual({'Confidence': 8, 'Species': 8}, dict(count_annotations(sialic_acid_graph))) annotation_values_by_annotation = get_annotation_values_by_annotation(sialic_acid_graph) self.assertEqual(expected, annotation_values_by_annotation) annotation_values = get_annotation_values(sialic_acid_graph, 'Confidence') self.assertEqual(expected['Confidence'], annotation_values) def test_get_unused_annotation_url(self): graph = BELGraph() name = n() graph.annotation_url[name] = n() self.assertEqual({name}, get_unused_annotations(graph)) def test_get_unused_annotation_pattern(self): graph = BELGraph() name = n() graph.annotation_pattern[name] = n() self.assertEqual({name}, get_unused_annotations(graph)) def test_get_unused_annotation_list(self): graph = BELGraph() name = n() graph.annotation_pattern[name] = {n(), n(), n()} self.assertEqual({name}, get_unused_annotations(graph)) pybel-0.12.1/tests/test_struct/test_summary/test_summary_nodes.py000066400000000000000000000123611334645200200254670ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for summary functions for nodes.""" import unittest from collections import Counter from pybel import BELGraph from pybel.constants import ABUNDANCE, BIOPROCESS, COMPLEX, PROTEIN from pybel.dsl import fusion_range, pathology, protein, protein_fusion from pybel.examples import egf_graph, sialic_acid_graph from pybel.struct.summary.node_summary import ( count_functions, count_names_by_namespace, count_namespaces, count_pathologies, count_variants, get_functions, get_names_by_namespace, get_namespaces, get_top_hubs, get_top_pathologies, ) from pybel.testing.utils import n class TestSummary(unittest.TestCase): """Test node summary functions.""" def test_functions_sialic(self): """Test counting nodes and grouping by function on the sialic acid graph.""" result = { PROTEIN: 7, COMPLEX: 1, ABUNDANCE: 1 } self.assertEqual(set(result), get_functions(sialic_acid_graph)) self.assertEqual(Counter(result), count_functions(sialic_acid_graph)) def test_functions_egf(self): """Test counting nodes and grouping by function on the EGF graph.""" result = { PROTEIN: 10, COMPLEX: 1, BIOPROCESS: 1 } self.assertEqual(set(result), get_functions(egf_graph)) self.assertEqual(result, count_functions(egf_graph)) def test_namespaces_sialic(self): """Test getting and counting namespaces' contents on the sialic acid graph.""" result = { 'HGNC': 7, 'CHEBI': 1 } self.assertEqual(set(result), get_namespaces(sialic_acid_graph)) self.assertEqual(Counter(result), count_namespaces(sialic_acid_graph)) def test_namespaces_egf(self): """Test getting and counting namespaces' contents on the EGF graph.""" result = { 'HGNC': 10, 'GOBP': 1, } self.assertEqual(set(result), get_namespaces(egf_graph)) self.assertEqual(Counter(result), count_namespaces(egf_graph)) def test_names_sialic(self): """Test getting and counting names by namespace.""" result = { 'CD33': 3, # once as reference, once in complex, and once as variant 'TYROBP': 1, 'SYK': 1, 'PTPN6': 1, 'PTPN11': 1, 'TREM2': 1, } self.assertEqual(set(result), get_names_by_namespace(sialic_acid_graph, 'HGNC')) self.assertEqual(result, dict(count_names_by_namespace(sialic_acid_graph, 'HGNC'))) def test_names_fusions(self): """Test that names inside fusions are still found by the iterator.""" graph = BELGraph() graph.namespace_url['HGNC'] = 'http://dummy' n = protein_fusion( partner_5p=protein(name='A', namespace='HGNC'), range_5p=fusion_range('p', 1, 15), partner_3p=protein(name='B', namespace='HGNC'), range_3p=fusion_range('p', 1, 100) ) graph.add_node_from_data(n) result = { 'A': 1, 'B': 1, } self.assertEqual(set(result), get_names_by_namespace(graph, 'HGNC')) self.assertEqual(result, count_names_by_namespace(graph, 'HGNC')) def test_get_names_raise(self): """Test that an index error is raised when trying to get names from a namespace that isn't present.""" with self.assertRaises(IndexError): get_names_by_namespace(sialic_acid_graph, 'NOPE') def test_count_names_raise(self): """Test that an index error is raised when trying to count a namespace that isn't present.""" with self.assertRaises(IndexError): count_names_by_namespace(sialic_acid_graph, 'NOPE') def test_count_variants(self): """Test counting the number of variants in a graph.""" variants = count_variants(sialic_acid_graph) self.assertEqual(1, variants['pmod']) def test_count_pathologies(self): """Test counting pathologies in the graph.""" graph = BELGraph() a, b, c, d = protein(n(), n()), protein(n(), n()), pathology(n(), n()), pathology(n(), n()) graph.add_association(a, c, n(), n()) graph.add_association(a, d, n(), n()) graph.add_association(b, d, n(), n()) pathology_counter = count_pathologies(graph) self.assertIn(c, pathology_counter) self.assertIn(d, pathology_counter) self.assertEqual(1, pathology_counter[c]) self.assertEqual(2, pathology_counter[d]) top_pathology_counter = get_top_pathologies(graph, count=1) self.assertEqual(1, len(top_pathology_counter)) node, count = top_pathology_counter[0] self.assertEqual(d, node) self.assertEqual(2, count) def test_get_top_hubs(self): """Test counting pathologies in the graph.""" graph = BELGraph() a, b, c = protein(n(), n()), protein(n(), n()), pathology(n(), n()) graph.add_association(a, b, n(), n()) graph.add_association(a, c, n(), n()) top_hubs = get_top_hubs(graph, count=1) print(top_hubs[0]) self.assertEqual(1, len(top_hubs)) node, degree = top_hubs[0] self.assertEqual(a, node) self.assertEqual(2, degree) pybel-0.12.1/tests/test_struct/test_transformations/000077500000000000000000000000001334645200200227225ustar00rootroot00000000000000pybel-0.12.1/tests/test_struct/test_transformations/__init__.py000066400000000000000000000001071334645200200250310ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for :mod:`pybel.struct.mutation`.""" pybel-0.12.1/tests/test_struct/test_transformations/test_collapse.py000066400000000000000000000070351334645200200261420ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for collapse functions.""" import unittest from pybel import BELGraph from pybel.constants import DIRECTLY_INCREASES from pybel.dsl import gene, mirna, pathology, pmod, protein, rna from pybel.struct.mutation.collapse import collapse_all_variants, collapse_nodes, collapse_to_genes from pybel.testing.utils import n HGNC = 'HGNC' GO = 'GO' CHEBI = 'CHEBI' g1 = gene(HGNC, '1') r1 = rna(HGNC, '1') p1 = protein(HGNC, '1') p1_phosphorylated = protein(HGNC, '1', variants=[pmod('Ph')]) g2 = gene(HGNC, '2') r2 = rna(HGNC, '2') p2 = protein(HGNC, '2') g3 = gene(HGNC, '3') r3 = rna(HGNC, '3') p3 = protein(HGNC, '3') g4 = gene(HGNC, '4') m4 = mirna(HGNC, '4') p5 = pathology(GO, '5') class TestCollapse(unittest.TestCase): """Tests for collapse functions.""" def test_collapse_by_dict(self): """Test collapsing nodes by a dictionary.""" graph = BELGraph() graph.add_node_from_data(p1) graph.add_node_from_data(p2) graph.add_node_from_data(p3) graph.add_increases(p1, p3, citation=n(), evidence=n()) graph.add_qualified_edge(p2, p3, relation=DIRECTLY_INCREASES, citation=n(), evidence=n()) self.assertEqual(3, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges()) d = { p1: {p2} } collapse_nodes(graph, d) self.assertEqual({p1, p3}, set(graph)) self.assertEqual({(p1, p3), (p1, p3)}, set(graph.edges())) self.assertEqual(2, graph.number_of_edges(), msg=graph.edges(data=True, keys=True)) def test_collapse_dogma_1(self): """Test collapsing to genes, only with translations.""" graph = BELGraph() graph.add_translation(r1, p1) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) collapse_to_genes(graph) self.assertIn(g1, graph) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) def test_collapse_dogma_2(self): """Test collapsing to genes with translations and transcriptions.""" graph = BELGraph() graph.add_transcription(g1, r1) graph.add_translation(r1, p1) self.assertEqual(3, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges()) collapse_to_genes(graph) self.assertIn(g1, graph) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) def test_collapse_dogma_3(self): """Test collapsing to genes, only with transcriptions.""" graph = BELGraph() graph.add_transcription(g1, r1) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) collapse_to_genes(graph) self.assertIn(g1, graph) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) def test_collapse_all_variants(self): """Test collapsing all variants to their reference nodes.""" graph = BELGraph() graph.add_node_from_data(p1_phosphorylated) graph.add_increases(p1_phosphorylated, p2, n(), n()) self.assertEqual(3, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges()) collapse_all_variants(graph) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) self.assertIn(p1, graph) self.assertNotIn(p1_phosphorylated, graph) self.assertIn(p2, graph) pybel-0.12.1/tests/test_struct/test_transformations/test_deletions.py000066400000000000000000000141371334645200200263270ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for node deletion functions.""" import unittest from pybel import BELGraph from pybel.constants import FUNCTION, POSITIVE_CORRELATION, PROTEIN, RELATION from pybel.dsl import gene, hgvs, pathology, protein, protein_fusion, rna, rna_fusion from pybel.struct.mutation import ( enrich_protein_and_rna_origins, prune_protein_rna_origins, remove_associations, remove_pathologies, ) from pybel.struct.mutation.utils import remove_isolated_nodes, remove_isolated_nodes_op from pybel.testing.utils import n trem2_gene = gene(namespace='HGNC', name='TREM2') trem2_rna = rna(namespace='HGNC', name='TREM2') trem2_protein = protein(namespace='HGNC', name='TREM2') class TestDeletions(unittest.TestCase): """Test cases for deletion functions.""" def test_remove_pathologies(self): """Test removal of pathologies.""" g = BELGraph() p1, p2, p3 = (protein(namespace='HGNC', name=n()) for _ in range(3)) d1, d2 = (pathology(namespace='MESH', name=n()) for _ in range(2)) g.add_increases(p1, p2, n(), n()) g.add_increases(p2, p3, n(), n()) g.add_qualified_edge(p1, d1, POSITIVE_CORRELATION, n(), n()) g.add_qualified_edge(p2, d1, POSITIVE_CORRELATION, n(), n()) g.add_association(p2, d1, n(), n()) g.add_qualified_edge(d1, d2, POSITIVE_CORRELATION, n(), n()) g.add_qualified_edge(d1, d2, POSITIVE_CORRELATION, n(), n()) self.assertEqual(5, g.number_of_nodes()) self.assertEqual(7, g.number_of_edges()) self.assertEqual(2, len(g[p2][d1])) remove_associations(g) relations = list(g[p2][d1].values()) self.assertEqual(1, len(relations)) self.assertEqual(POSITIVE_CORRELATION, relations[0][RELATION]) self.assertEqual(5, g.number_of_nodes()) self.assertEqual(6, g.number_of_edges()) self.assertEqual(5, g.number_of_nodes()) remove_pathologies(g) self.assertTrue(p1, g) self.assertTrue(p2, g) self.assertTrue(p3, g) self.assertEqual(3, g.number_of_nodes()) self.assertEqual(2, g.number_of_edges()) def test_remove_isolated_in_place(self): """Test removing isolated nodes (in-place).""" g = BELGraph() g.add_edge(1, 2) g.add_edge(2, 3) g.add_node(4) remove_isolated_nodes(g) self.assertEqual(3, g.number_of_nodes()) self.assertEqual(2, g.number_of_edges()) def test_remove_isolated_out_of_place(self): """Test removing isolated nodes (out-of-place).""" g = BELGraph() g.add_edge(1, 2) g.add_edge(2, 3) g.add_node(4) g = remove_isolated_nodes_op(g) self.assertEqual(3, g.number_of_nodes()) self.assertEqual(2, g.number_of_edges()) class TestProcessing(unittest.TestCase): """Test inference of the central dogma.""" def assert_in_graph(self, node, graph): """Assert the node is in the graph. :type node: pybel.dsl.BaseEntity :type graph: pybel.BELGraph :rtype: bool """ self.assertIn(node, graph) def assert_not_in_graph(self, node, graph): """Assert the node is not in the graph. :type node: pybel.dsl.BaseEntity :type graph: pybel.BELGraph :rtype: bool """ self.assertNotIn(node, graph) def test_infer_on_sialic_acid_example(self): """Test infer_central_dogma on the sialic acid example.""" graph = BELGraph() graph.add_node_from_data(trem2_protein) self.assert_in_graph(trem2_protein, graph) self.assert_not_in_graph(trem2_gene, graph) self.assert_not_in_graph(trem2_rna, graph) enrich_protein_and_rna_origins(graph) self.assert_in_graph(trem2_gene, graph) self.assert_in_graph(trem2_rna, graph) prune_protein_rna_origins(graph) self.assert_not_in_graph(trem2_gene, graph) self.assert_not_in_graph(trem2_rna, graph) self.assert_in_graph(trem2_protein, graph) def test_no_infer_on_protein_variants(self): """Test that expansion doesn't occur on protein variants.""" p = protein('HGNC', n(), variants=[hgvs(n())]) graph = BELGraph() graph.add_node_from_data(p) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) enrich_protein_and_rna_origins(graph) self.assertEqual(4, graph.number_of_nodes()) self.assertEqual(3, graph.number_of_edges()) def test_no_infer_on_rna_variants(self): """Test that expansion doesn't occur on RNA variants.""" r = rna('HGNC', n(), variants=[hgvs(n())]) graph = BELGraph() graph.add_node_from_data(r) self.assertEqual(2, graph.number_of_nodes()) self.assertEqual(1, graph.number_of_edges()) enrich_protein_and_rna_origins(graph) self.assertEqual(3, graph.number_of_nodes()) self.assertEqual(2, graph.number_of_edges()) def test_no_infer_protein_fusion(self): """Test that no gene is inferred from a RNA fusion node.""" partner5p = protein(n(), n()) partner3p = protein(n(), n()) p = protein_fusion(partner_3p=partner3p, partner_5p=partner5p) graph = BELGraph() graph.add_node_from_data(p) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) enrich_protein_and_rna_origins(graph) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) def test_no_infer_rna_fusion(self): """Test that no RNA nor gene is inferred from a protein fusion node.""" partner5p = rna(n(), n()) partner3p = rna(n(), n()) p = rna_fusion(partner_3p=partner3p, partner_5p=partner5p) graph = BELGraph() graph.add_node_from_data(p) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) enrich_protein_and_rna_origins(graph) self.assertEqual(1, graph.number_of_nodes()) self.assertEqual(0, graph.number_of_edges()) pybel-0.12.1/tests/test_struct/test_transformations/test_expansion.py000066400000000000000000000056611334645200200263470ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for expansion functions.""" import unittest from pybel import BELGraph from pybel.constants import COMPLEX, FUNCTION from pybel.examples.sialic_acid_example import ( cd33, cd33_phosphorylated, shp1, shp2, sialic_acid, sialic_acid_cd33_complex, sialic_acid_graph, syk, ) from pybel.struct.mutation.expansion.neighborhood import ( expand_node_neighborhood, expand_node_predecessors, expand_node_successors, expand_nodes_neighborhoods, ) class TestExpansion(unittest.TestCase): """Test expansion functions.""" def test_neighborhood(self): """Test expansion around the neighborhood of a given node.""" graph = BELGraph() graph.add_node_from_data(cd33) self.assertEqual(1, graph.number_of_nodes()) expand_node_neighborhood(sialic_acid_graph, graph, cd33) self.assertEqual(3, graph.number_of_nodes()) self.assertIn(sialic_acid_cd33_complex, graph) self.assertIn(cd33_phosphorylated, graph) def test_neighborhood_with_predecessors(self): """Test expansion on the predecessors of a given node.""" graph = BELGraph() graph.add_node_from_data(cd33) graph.add_node_from_data(sialic_acid_cd33_complex) self.assertEqual(3, graph.number_of_nodes()) expand_node_predecessors(sialic_acid_graph, graph, cd33) self.assertEqual(4, graph.number_of_nodes()) self.assertIn(sialic_acid, graph) self.assertIn(sialic_acid_cd33_complex, graph) self.assertIn(cd33_phosphorylated, graph) def test_neighborhood_with_successors(self): """Test expansion on the successors of a given node.""" graph = BELGraph() graph.add_node_from_data(cd33) graph.add_node_from_data(cd33_phosphorylated) self.assertEqual(2, graph.number_of_nodes()) expand_node_successors(sialic_acid_graph, graph, cd33) self.assertEqual(3, graph.number_of_nodes()) self.assertIn(sialic_acid_cd33_complex, graph) self.assertIn(cd33_phosphorylated, graph) def test_neighborhoods(self): """Test expansion on the neighborhood of given nodes. The edge between PTPN6/CD33ph should not be added. """ graph = BELGraph() graph.add_node_from_data(cd33) graph.add_node_from_data(syk) self.assertEqual(2, graph.number_of_nodes()) expand_nodes_neighborhoods(sialic_acid_graph, graph, [cd33, syk]) self.assertNotIn(shp1, graph[cd33_phosphorylated]) self.assertNotIn(shp2, graph[cd33_phosphorylated]) self.assertEqual(8, graph.number_of_nodes(), msg='wrong number of nodes') self.assertEqual(8, graph.number_of_edges(), msg='wrong number of edges') self.assertIn(sialic_acid_cd33_complex, graph) self.assertIn(cd33_phosphorylated, graph) # TODO test that if new nodes with metadata that's missing (namespace_url definition, etc) then that gets added too pybel-0.12.1/tests/test_struct/test_transformations/test_induction.py000066400000000000000000000357441334645200200263440ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for PyBEL induction functions.""" import string import unittest from pybel import BELGraph from pybel.constants import ( ASSOCIATION, CITATION_AUTHORS, CITATION_REFERENCE, CITATION_TYPE, CITATION_TYPE_PUBMED, DECREASES, INCREASES, ) from pybel.dsl import BaseEntity, gene, protein, rna from pybel.struct.mutation.expansion import expand_upstream_causal from pybel.struct.mutation.induction import get_subgraph_by_annotation_value from pybel.struct.mutation.induction.citation import get_subgraph_by_authors, get_subgraph_by_pubmed from pybel.struct.mutation.induction.paths import get_nodes_in_all_shortest_paths, get_subgraph_by_all_shortest_paths from pybel.struct.mutation.induction.upstream import get_upstream_causal_subgraph from pybel.struct.mutation.induction.utils import get_subgraph_by_induction from pybel.testing.utils import n trem2_gene = gene(namespace='HGNC', name='TREM2') trem2_rna = rna(namespace='HGNC', name='TREM2') trem2_protein = protein(namespace='HGNC', name='TREM2') class TestGraphMixin(unittest.TestCase): """A mixin to enable testing nodes and edge membership in the graph.""" def assert_in_edge(self, source, target, graph): """Assert the edge is in the graph. :param source: :param target: :type graph: pybel.BELGraph :rtype: bool """ self.assertIn(target, graph[source]) def assert_all_nodes_are_base_entities(self, graph): """Assert that all nodes are base entities.""" for node in graph: self.assertIsInstance(node, BaseEntity) class TestInduction(TestGraphMixin): """Test induction functions.""" def test_get_subgraph_by_induction(self): """Test get_subgraph_by_induction.""" graph = BELGraph() keyword, url = n(), n() graph.namespace_url[keyword] = url a, b, c, d = [protein(namespace='test', name=str(i)) for i in range(4)] graph.add_directly_increases(a, b, n(), n()) graph.add_directly_increases(b, c, n(), n()) graph.add_directly_increases(c, d, n(), n()) graph.add_increases(a, d, n(), n()) nodes = [b, c] subgraph = get_subgraph_by_induction(graph, nodes) self.assertIsInstance(subgraph, BELGraph) self.assert_all_nodes_are_base_entities(subgraph) self.assertNotEqual(0, len(subgraph.namespace_url), msg='improperly found metadata: {}'.format(subgraph.graph)) self.assertIn(keyword, subgraph.namespace_url) self.assertEqual(url, subgraph.namespace_url[keyword]) self.assertNotIn(a, subgraph) self.assertIn(b, subgraph) self.assertIn(c, subgraph) self.assertNotIn(d, subgraph) def test_get_subgraph_by_all_shortest_paths(self): """Test get_subgraph_by_all_shortest_paths.""" graph = BELGraph() keyword, url = n(), n() graph.namespace_url[keyword] = url a, b, c, d, e, f = [protein(namespace='test', name=n()) for _ in range(6)] graph.add_increases(a, b, n(), n()) graph.add_increases(a, c, n(), n()) graph.add_increases(b, d, n(), n()) graph.add_increases(c, d, n(), n()) graph.add_increases(a, e, n(), n()) graph.add_increases(e, f, n(), n()) graph.add_increases(f, d, n(), n()) query_nodes = [a, d] shortest_paths_nodes = get_nodes_in_all_shortest_paths(graph, query_nodes) self.assertIn(a, shortest_paths_nodes) self.assertIn(b, shortest_paths_nodes) self.assertIn(c, shortest_paths_nodes) self.assertIn(d, shortest_paths_nodes) subgraph = get_subgraph_by_all_shortest_paths(graph, query_nodes) self.assert_all_nodes_are_base_entities(subgraph) self.assertIsInstance(subgraph, BELGraph) self.assertIn(keyword, subgraph.namespace_url) self.assertEqual(url, subgraph.namespace_url[keyword]) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertIn(c, subgraph) self.assertIn(d, subgraph) self.assertNotIn(e, subgraph) self.assertNotIn(f, subgraph) def test_get_upstream_causal_subgraph(self): """Test get_upstream_causal_subgraph.""" a, b, c, d, e, f = [protein(namespace='test', name=n()) for _ in range(6)] citation, evidence = '', '' universe = BELGraph() universe.namespace_pattern['test'] = 'test-url' universe.add_qualified_edge(a, b, INCREASES, citation, evidence) universe.add_qualified_edge(b, c, INCREASES, citation, evidence) universe.add_qualified_edge(d, a, ASSOCIATION, citation, evidence) universe.add_qualified_edge(e, a, INCREASES, citation, evidence) universe.add_qualified_edge(f, b, DECREASES, citation, evidence) subgraph = get_upstream_causal_subgraph(universe, [a, b]) self.assertIsInstance(subgraph, BELGraph) self.assert_all_nodes_are_base_entities(subgraph) self.assertIn('test', subgraph.namespace_pattern) self.assertEqual('test-url', subgraph.namespace_pattern['test']) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertNotIn(c, subgraph) self.assertNotIn(d, subgraph) self.assertIn(e, subgraph) self.assertIn(f, subgraph) self.assertEqual(4, subgraph.number_of_nodes()) self.assert_in_edge(e, a, subgraph) self.assert_in_edge(a, b, subgraph) self.assert_in_edge(f, b, subgraph) self.assertEqual(3, subgraph.number_of_edges()) def test_expand_upstream_causal_subgraph(self): """Test expanding on the upstream causal subgraph.""" a, b, c, d, e, f = [protein(namespace='test', name=i) for i in string.ascii_lowercase[:6]] universe = BELGraph() universe.add_qualified_edge(a, b, INCREASES, n(), n()) universe.add_qualified_edge(b, c, INCREASES, n(), n()) universe.add_qualified_edge(d, a, ASSOCIATION, n(), n()) universe.add_qualified_edge(e, a, INCREASES, n(), n()) universe.add_qualified_edge(f, b, DECREASES, n(), n()) subgraph = BELGraph() subgraph.add_qualified_edge(a, b, INCREASES, n(), n()) expand_upstream_causal(universe, subgraph) self.assertIsInstance(subgraph, BELGraph) self.assert_all_nodes_are_base_entities(subgraph) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertNotIn(c, subgraph) self.assertNotIn(d, subgraph) self.assertIn(e, subgraph) self.assertIn(f, subgraph) self.assertEqual(4, subgraph.number_of_nodes()) self.assert_in_edge(e, a, subgraph) self.assert_in_edge(a, b, subgraph) self.assert_in_edge(f, b, subgraph) self.assertEqual(2, len(subgraph[a][b])) self.assertEqual(4, subgraph.number_of_edges(), msg='\n'.join(map(str, subgraph.edges()))) class TestEdgePredicateBuilders(TestGraphMixin): """Tests for edge predicate builders.""" def test_build_pmid_inclusion_filter(self): """Test getting a sub-graph by a single PubMed identifier.""" a, b, c, d = [protein(namespace='test', name=n()) for _ in range(4)] p1, p2, p3, p4 = n(), n(), n(), n() graph = BELGraph() keyword, url = n(), n() graph.namespace_url[keyword] = url graph.add_increases(a, b, n(), citation=p1) graph.add_increases(a, b, n(), citation=p2) graph.add_increases(b, c, n(), citation=p1) graph.add_increases(b, c, n(), citation=p3) graph.add_increases(c, d, n(), citation=p3) subgraph = get_subgraph_by_pubmed(graph, p1) self.assertIsInstance(subgraph, BELGraph) self.assert_all_nodes_are_base_entities(subgraph) self.assertIn(keyword, subgraph.namespace_url) self.assertEqual(url, subgraph.namespace_url[keyword]) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertIn(c, subgraph) self.assertNotIn(d, subgraph) empty_subgraph = get_subgraph_by_pubmed(graph, p4) self.assertIn(keyword, subgraph.namespace_url) self.assertEqual(url, subgraph.namespace_url[keyword]) self.assertEqual(0, empty_subgraph.number_of_nodes()) def test_build_pmid_set_inclusion_filter(self): """Test getting a sub-graph by a set of PubMed identifiers.""" a, b, c, d, e, f = [protein(namespace='test', name=n()) for _ in range(6)] p1, p2, p3, p4, p5, p6 = n(), n(), n(), n(), n(), n() graph = BELGraph() keyword, url = n(), n() graph.namespace_url[keyword] = url graph.add_increases(a, b, n(), citation=p1) graph.add_increases(a, b, n(), citation=p2) graph.add_increases(b, c, n(), citation=p1) graph.add_increases(b, c, n(), citation=p3) graph.add_increases(c, d, n(), citation=p3) graph.add_increases(e, f, n(), citation=p4) subgraph = get_subgraph_by_pubmed(graph, [p1, p4]) self.assertIsInstance(subgraph, BELGraph) self.assert_all_nodes_are_base_entities(subgraph) self.assertIn(keyword, subgraph.namespace_url) self.assertEqual(url, subgraph.namespace_url[keyword]) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertIn(c, subgraph) self.assertNotIn(d, subgraph) self.assertIn(e, subgraph) self.assertIn(f, subgraph) empty_subgraph = get_subgraph_by_pubmed(graph, [p5, p6]) self.assertIn(keyword, subgraph.namespace_url) self.assertEqual(url, subgraph.namespace_url[keyword]) self.assertEqual(0, empty_subgraph.number_of_nodes()) def test_build_author_inclusion_filter(self): """Test getting a sub-graph by a single author.""" a, b, c, d = [protein(namespace='test', name=n()) for _ in range(4)] a1, a2, a3, a4, a5 = n(), n(), n(), n(), n() c1 = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: n(), CITATION_AUTHORS: [a1, a2, a3] } c2 = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: n(), CITATION_AUTHORS: [a1, a4] } graph = BELGraph() keyword, url = n(), n() graph.namespace_url[keyword] = url graph.add_increases(a, b, n(), citation=c1) graph.add_increases(a, b, n(), citation=c2) graph.add_increases(b, c, n(), citation=c1) graph.add_increases(c, d, n(), citation=c2) subgraph1 = get_subgraph_by_authors(graph, a1) self.assertIsInstance(subgraph1, BELGraph) self.assert_all_nodes_are_base_entities(subgraph1) self.assertIn(keyword, subgraph1.namespace_url) self.assertEqual(url, subgraph1.namespace_url[keyword]) self.assertIn(a, subgraph1) self.assertIn(b, subgraph1) self.assertIn(c, subgraph1) self.assertIn(d, subgraph1) subgraph2 = get_subgraph_by_authors(graph, a2) self.assertIsInstance(subgraph2, BELGraph) self.assert_all_nodes_are_base_entities(subgraph2) self.assertIn(keyword, subgraph2.namespace_url) self.assertEqual(url, subgraph2.namespace_url[keyword]) self.assertIn(a, subgraph2) self.assertIn(b, subgraph2) self.assertIn(c, subgraph2) self.assertNotIn(d, subgraph2) subgraph3 = get_subgraph_by_authors(graph, a5) self.assertIsInstance(subgraph3, BELGraph) self.assert_all_nodes_are_base_entities(subgraph3) self.assertIn(keyword, subgraph3.namespace_url) self.assertEqual(url, subgraph3.namespace_url[keyword]) self.assertEqual(0, subgraph3.number_of_nodes()) def test_build_author_set_inclusion_filter(self): """Test getting a sub-graph by a set of authors.""" a, b, c, d = [protein(namespace='test', name=n()) for _ in range(4)] a1, a2, a3, a4 = n(), n(), n(), n() c1 = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: n(), CITATION_AUTHORS: [a1, a2, a3] } c2 = { CITATION_TYPE: CITATION_TYPE_PUBMED, CITATION_REFERENCE: n(), CITATION_AUTHORS: [a1, a4] } graph = BELGraph() keyword, url = n(), n() graph.namespace_url[keyword] = url graph.add_increases(a, b, n(), citation=c1) graph.add_increases(a, b, n(), citation=c2) graph.add_increases(b, c, n(), citation=c1) graph.add_increases(c, d, n(), citation=c2) subgraph1 = get_subgraph_by_authors(graph, [a1, a2]) self.assertIsInstance(subgraph1, BELGraph) self.assert_all_nodes_are_base_entities(subgraph1) self.assertIn(keyword, subgraph1.namespace_url) self.assertEqual(url, subgraph1.namespace_url[keyword]) self.assertIn(a, subgraph1) self.assertIn(b, subgraph1) self.assertIn(c, subgraph1) self.assertIn(d, subgraph1) class TestEdgeInduction(unittest.TestCase): """Test induction over edges.""" def test_get_subgraph_by_annotation_value(self): """Test getting a subgraph by a single annotation value.""" graph = BELGraph() a, b, c, d = [protein(namespace='test', name=n()) for _ in range(4)] k1 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'A'} }) k2 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'B'} }) k3 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'A', 'C', 'D'} }) subgraph = get_subgraph_by_annotation_value(graph, 'Subgraph', 'A') self.assertIsInstance(subgraph, BELGraph) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertIn(b, subgraph[a]) self.assertIn(k1, subgraph[a][b]) self.assertNotIn(k2, subgraph[a][b]) self.assertIn(k3, subgraph[a][b]) def test_get_subgraph_by_annotation_values(self): """Test getting a subgraph by multiple annotation value.""" graph = BELGraph() a, b, c, d = [protein(namespace='test', name=n()) for _ in range(4)] k1 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'A'} }) k2 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'B'} }) k3 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'A', 'C', 'D'} }) k4 = graph.add_increases(a, b, citation=n(), evidence=n(), annotations={ 'Subgraph': {'C', 'D'} }) subgraph = get_subgraph_by_annotation_value(graph, 'Subgraph', {'A', 'C'}) self.assertIsInstance(subgraph, BELGraph) self.assertIn(a, subgraph) self.assertIn(b, subgraph) self.assertIn(b, subgraph[a]) self.assertIn(k1, subgraph[a][b]) self.assertNotIn(k2, subgraph[a][b]) self.assertIn(k3, subgraph[a][b]) self.assertIn(k4, subgraph[a][b]) pybel-0.12.1/tests/test_struct/test_transformations/test_metadata.py000066400000000000000000000042301334645200200261120ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for metadata transforations.""" import unittest from pybel import BELGraph from pybel.constants import ANNOTATIONS, INCREASES from pybel.dsl import protein from pybel.examples import sialic_acid_graph from pybel.struct.mutation import add_annotation_value, remove_annotation_value, strip_annotations from pybel.testing.utils import n class TestMetadata(unittest.TestCase): """Test metadata transformations.""" def test_strip_annotations(self): """Test the strip_annotation function.""" x = protein(namespace='HGNC', name='X') y = protein(namespace='HGNC', name='X') graph = BELGraph() key = graph.add_qualified_edge( x, y, relation=INCREASES, citation='123456', evidence='Fake', annotations={ 'A': {'B': True} }, ) self.assertIn(ANNOTATIONS, graph[x][y][key]) strip_annotations(graph) self.assertNotIn(ANNOTATIONS, graph[x][y][key]) def test_add_and_remove_annotation(self): """Test adding and removing annotations. See: :func:`pybel.struct.mutation.add_annotation_value` and :func:`pybel.struct.mutation.remove_annotation_value` functions. """ graph = sialic_acid_graph.copy() annotation = 'test-annotation' value = 'test-value' url = n() graph.annotation_url[annotation] = url add_annotation_value(graph, annotation, value) for u, v, d in graph.edges(data=True): annotations = d.get(ANNOTATIONS) if annotations is None: continue self.assertIn(annotation, annotations) self.assertIn(value, annotations[annotation]) remove_annotation_value(graph, annotation, value) for u, v, d in graph.edges(data=True): annotations = d.get(ANNOTATIONS) if annotations is None: continue annotation_values = annotations.get(annotation) if annotation_values is None: continue self.assertNotIn(value, annotation_values) pybel-0.12.1/tests/test_struct/test_transformations/test_random.py000066400000000000000000000112301334645200200256100ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Test for functions for inducing random sub-graphs.""" import random import sys import unittest from collections import Counter import networkx as nx from pybel.examples import sialic_acid_graph, statin_graph from pybel.struct.mutation.induction.paths import get_random_path from pybel.struct.mutation.induction.random_subgraph import ( _helper, get_graph_with_random_edges, get_random_node, get_random_subgraph, ) from pybel.testing.generate import generate_random_graph @unittest.skipIf(sys.version_info < (3,), 'Will not support random operations on python2') class TestRandom(unittest.TestCase): """Test random graph induction functions.""" def setUp(self): """Set the random seed before each test.""" random.seed(127) # love that number def test_random_edges(self): """Test getting a graph by random edges.""" n_nodes, n_edges, n_sample_edges = 15, 80, 40 graph = generate_random_graph(n_nodes=n_nodes, n_edges=n_edges) subgraph = get_graph_with_random_edges(graph, n_edges=n_sample_edges) self.assertEqual(n_sample_edges, subgraph.number_of_edges()) def test_random_nodes(self): """Test getting random nodes.""" graph = nx.MultiDiGraph() graph.add_edge(1, 2) graph.add_edge(1, 3) graph.add_edge(1, 4) graph.add_edge(1, 5) n = 30000 r = Counter( get_random_node(graph, set(), invert_degrees=False) for _ in range(n) ) degree_sum = 4 + 1 + 1 + 1 + 1 self.assertAlmostEqual(4 / degree_sum, r[1] / n, places=2) self.assertAlmostEqual(1 / degree_sum, r[2] / n, places=2) self.assertAlmostEqual(1 / degree_sum, r[3] / n, places=2) self.assertAlmostEqual(1 / degree_sum, r[4] / n, places=2) self.assertAlmostEqual(1 / degree_sum, r[5] / n, places=2) def test_random_nodes_inverted(self): """Test getting random nodes.""" graph = nx.MultiDiGraph() graph.add_edge(1, 2) graph.add_edge(1, 3) graph.add_edge(1, 4) graph.add_edge(1, 5) n = 30000 r = Counter( get_random_node(graph, set(), invert_degrees=True) for _ in range(n) ) degree_sum = (1 / 4) + (1 / 1) + (1 / 1) + (1 / 1) + (1 / 1) self.assertAlmostEqual((1 / 4) / degree_sum, r[1] / n, places=2) self.assertAlmostEqual((1 / 1) / degree_sum, r[2] / n, places=2) self.assertAlmostEqual((1 / 1) / degree_sum, r[3] / n, places=2) self.assertAlmostEqual((1 / 1) / degree_sum, r[4] / n, places=2) self.assertAlmostEqual((1 / 1) / degree_sum, r[5] / n, places=2) def test_random_sample(self): """Test randomly sampling a graph.""" n_nodes, n_edges = 50, 500 graph = generate_random_graph(n_nodes=n_nodes, n_edges=n_edges) self.assertEqual(n_edges, graph.number_of_edges()) sg_1 = get_random_subgraph(graph, number_edges=250, number_seed_edges=5, invert_degrees=False) self.assertEqual(250, sg_1.number_of_edges()) sg_2 = get_random_subgraph(graph, number_edges=250, number_seed_edges=5, invert_degrees=True) self.assertEqual(250, sg_2.number_of_edges()) def test_random_sample_small(self): """Test a graph that is too small to sample.""" n_nodes, n_edges = 11, 25 graph = generate_random_graph(n_nodes, n_edges) self.assertEqual(n_edges, graph.number_of_edges()) sg_1 = get_random_subgraph(graph, number_edges=250, number_seed_edges=5, invert_degrees=False) self.assertEqual(graph.number_of_edges(), sg_1.number_of_edges(), msg='since graph is too small, the subgraph should contain the whole thing') sg_2 = get_random_subgraph(graph, number_edges=250, number_seed_edges=5, invert_degrees=True) self.assertEqual(graph.number_of_edges(), sg_2.number_of_edges(), msg='since graph is too small, the subgraph should contain the whole thing') def test_helper_failure(self): graph = nx.MultiDiGraph() graph.add_edge(1, 2) graph.add_edge(2, 3) result = nx.MultiDiGraph() result.add_edge(1, 2) _helper( result, graph, number_edges_remaining=5, no_grow={1, 2, 3}, ) self.assertNotIn(3, result) class TestRandomPath(unittest.TestCase): """Test getting random paths.""" def test_get_random_path(self): """Test getting random paths doesn't crash.""" for graph in (sialic_acid_graph, statin_graph): for _ in range(100): get_random_path(graph) pybel-0.12.1/tests/test_struct/test_transformations/test_transfer.py000066400000000000000000000074511334645200200261660ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for transfer of knowledge and inference functions.""" import unittest from pybel.examples.statin_example import ( avorastatin, ec_11134, ec_11188, fluvastatin, hmgcr, hmgcr_inhibitor, mevinolinic_acid, statin, statin_graph, synthetic_statin, ) from pybel.struct.mutation import infer_child_relations from pybel.struct.mutation.transfer import iter_children class TestTransfer(unittest.TestCase): """Tests for transfer of knowledge and inference functions.""" def test_get_children(self): """Test iterating over the children of a node.""" children = list(iter_children(statin_graph, hmgcr_inhibitor)) self.assertNotEqual(0, len(children), msg='no children found') self.assertIn(mevinolinic_acid, children, msg='direct child not found') def test_infer(self): """Test inferring child relations.""" graph = statin_graph.copy() self.assertEqual(9, graph.number_of_nodes()) self.assertEqual(8, graph.number_of_edges()) self.assertNotIn(ec_11134, graph[fluvastatin]) self.assertNotIn(ec_11188, graph[fluvastatin]) self.assertNotIn(ec_11134, graph[avorastatin]) self.assertNotIn(ec_11188, graph[avorastatin]) self.assertNotIn(ec_11134, graph[synthetic_statin]) self.assertNotIn(ec_11188, graph[synthetic_statin]) self.assertNotIn(ec_11134, graph[statin]) self.assertNotIn(ec_11188, graph[statin]) self.assertNotIn(ec_11134, graph[mevinolinic_acid]) self.assertNotIn(ec_11188, graph[mevinolinic_acid]) self.assertIn(ec_11134, graph[hmgcr_inhibitor]) self.assertIn(ec_11188, graph[hmgcr_inhibitor]) infer_child_relations(graph, hmgcr_inhibitor) self.assertIn(ec_11134, graph[fluvastatin]) self.assertIn(ec_11188, graph[fluvastatin]) self.assertIn(ec_11134, graph[avorastatin]) self.assertIn(ec_11188, graph[avorastatin]) self.assertIn(ec_11134, graph[synthetic_statin]) self.assertIn(ec_11188, graph[synthetic_statin]) self.assertIn(ec_11134, graph[statin]) self.assertIn(ec_11188, graph[statin]) self.assertIn(ec_11134, graph[mevinolinic_acid]) self.assertIn(ec_11188, graph[mevinolinic_acid]) self.assertIn(ec_11134, graph[hmgcr_inhibitor]) self.assertIn(ec_11188, graph[hmgcr_inhibitor]) self.assertEqual(9, graph.number_of_nodes()) self.assertEqual(18, graph.number_of_edges()) infer_child_relations(graph, ec_11134) self.assertIn(hmgcr, graph[fluvastatin]) self.assertIn(hmgcr, graph[avorastatin]) self.assertIn(hmgcr, graph[synthetic_statin]) self.assertIn(hmgcr, graph[statin]) self.assertIn(hmgcr, graph[mevinolinic_acid]) self.assertIn(hmgcr, graph[hmgcr_inhibitor]) self.assertEqual(9, graph.number_of_nodes()) self.assertEqual(24, graph.number_of_edges()) self.assertEqual(9, statin_graph.number_of_nodes(), msg='original graph nodes should not be modified') self.assertEqual(8, statin_graph.number_of_edges(), msg='original graph edges should not be modified') def test_does_not_redo(self): """Test that :func:`propagate_node_relations` does not add the same edges twice.""" graph = statin_graph.copy() self.assertEqual(9, graph.number_of_nodes()) self.assertEqual(8, graph.number_of_edges()) infer_child_relations(graph, hmgcr_inhibitor) self.assertEqual(9, graph.number_of_nodes()) self.assertEqual(18, graph.number_of_edges()) infer_child_relations(graph, hmgcr_inhibitor) self.assertEqual(9, graph.number_of_nodes()) self.assertEqual(18, graph.number_of_edges(), msg='edges should not be added again') if __name__ == '__main__': unittest.main() pybel-0.12.1/tests/test_utils.py000066400000000000000000000074351334645200200166310ustar00rootroot00000000000000# -*- coding: utf-8 -*- """Tests for PyBEL utilities.""" import unittest import time from six import string_types from pybel.parser.exc import PlaceholderAminoAcidWarning from pybel.parser.modifiers.constants import amino_acid from pybel.parser.utils import nest from pybel.resources.definitions import get_bel_resource from pybel.resources.exc import EmptyResourceError from pybel.resources.utils import get_iso_8601_date from pybel.testing.constants import test_an_1, test_ns_empty from pybel.testing.mocks import mock_bel_resources from pybel.utils import expand_dict, flatten_dict, tokenize_version class TestTokenizeVersion(unittest.TestCase): """Test tokenization of version strings.""" def test_simple(self): """Test the simplest version string case.""" version_str = '0.1.2' version_tuple = 0, 1, 2 self.assertEqual(version_tuple, tokenize_version(version_str)) def test_long(self): """Test when the version pieces have more than 1 digit.""" version_str = '0.12.20' version_tuple = 0, 12, 20 self.assertEqual(version_tuple, tokenize_version(version_str)) def test_dev(self): """Test when there's a dash after.""" version_str = '0.1.2-dev' version_tuple = 0, 1, 2 self.assertEqual(version_tuple, tokenize_version(version_str)) class TestRandom(unittest.TestCase): def test_nest_failure(self): with self.assertRaises(ValueError): nest() def test_bad_aminoAcid(self): with self.assertRaises(PlaceholderAminoAcidWarning): amino_acid.parseString('X') def test_get_date(self): d = get_iso_8601_date() self.assertIsInstance(d, string_types) self.assertEqual(d[:4], time.strftime('%Y')) self.assertEqual(d[4:6], time.strftime('%m')) self.assertEqual(d[6:8], time.strftime('%d')) class TestUtils(unittest.TestCase): def test_download_url(self): """Test downloading a resource by URL.""" with mock_bel_resources: res = get_bel_resource(test_an_1) expected_values = { 'TestAnnot1': 'O', 'TestAnnot2': 'O', 'TestAnnot3': 'O', 'TestAnnot4': 'O', 'TestAnnot5': 'O' } self.assertEqual(expected_values, res['Values']) def test_download_raises_on_empty(self): """Test that an error is thrown if an empty resource is downloaded.""" with mock_bel_resources, self.assertRaises(EmptyResourceError): get_bel_resource(test_ns_empty) def test_expand_dict(self): flat_dict = { 'k1': 'v1', 'k2_k2a': 'v2', 'k2_k2b': 'v3', 'k2_k2c_k2ci': 'v4', 'k2_k2c_k2cii': 'v5' } expected_dict = { 'k1': 'v1', 'k2': { 'k2a': 'v2', 'k2b': 'v3', 'k2c': { 'k2ci': 'v4', 'k2cii': 'v5' } } } self.assertEqual(expected_dict, expand_dict(flat_dict)) def test_flatten_dict(self): d = { 'A': 5, 'B': 'b', 'C': { 'D': 'd', 'E': 'e' } } expected = { 'A': 5, 'B': 'b', 'C_D': 'd', 'C_E': 'e' } self.assertEqual(expected, flatten_dict(d)) def test_flatten_dict_withLists(self): d = { 'A': 5, 'B': 'b', 'C': { 'D': ['d', 'delta'], 'E': 'e' } } expected = { 'A': 5, 'B': 'b', 'C_D': 'd,delta', 'C_E': 'e' } self.assertEqual(expected, flatten_dict(d)) pybel-0.12.1/tox.ini000066400000000000000000000073421334645200200142260ustar00rootroot00000000000000# Tox (http://tox.testrun.org/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. [tox] envlist = # always keep coverage-clean first coverage-clean # code linters/stylers manifest #flake8 #pylint pyroma # documentation linters/checkers doc8 readme docs # the actual tests py27 py # always keep coverage-report last coverage-report [testenv] commands = coverage run -p -m pytest --durations=20 {posargs:tests} passenv = PYBEL_TEST_CONNECTOR PYBEL_TEST_CONNECTION TRAVIS CI DB deps = coverage pytest mock pathlib {env:PYBEL_TEST_CONNECTOR:} whitelist_externals = /bin/cat /bin/cp /bin/mkdir [testenv:coverage-clean] deps = coverage skip_install = true commands = coverage erase [testenv:manifest] deps = check-manifest skip_install = true commands = check-manifest [testenv:flake8] basepython = python3 skip_install = true deps = flake8 flake8-bandit flake8-colors flake8-docstrings flake8-import-order pep8-naming commands = flake8 src/pybel/ setup.py description = Run the flake8 tool with several plugins (bandit, docstrings, import order, pep8 naming). [testenv:pylint] basepython = python3 deps = pyflakes pylint commands = pylint src/pybel [testenv:radon] deps = radon skip_install = true commands = radon mi . description = Run the radon tool to calculate the maintainability indices of the project sources. [testenv:vulture] deps = vulture skip_install = true commands = vulture src/pybel description = Run the vulture tool to look for dead code. [testenv:xenon] deps = xenon skip_install = true commands = xenon --max-average A --max-modules A --max-absolute B . description = Run the xenon tool to monitor code complexity. [testenv:pyroma] deps = pygments pyroma skip_install = true commands = pyroma --min=10 . description = Run the pyroma tool to check the project's package friendliness. [testenv:doc8] basepython = python3 skip_install = true deps = sphinx doc8 commands = doc8 docs/source/ README.rst description = Run the doc8 tool to check the style of the RST files in the project docs. [testenv:readme] commands = rst-lint README.rst skip_install = true deps = restructuredtext_lint pygments [testenv:docs] changedir = docs deps = sphinx sphinx_rtd_theme sphinx-click commands = mkdir -p {envtmpdir} cp -r source {envtmpdir}/source sphinx-build -W -b html -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/html sphinx-build -W -b coverage -d {envtmpdir}/build/doctrees {envtmpdir}/source {envtmpdir}/build/coverage cat {envtmpdir}/build/coverage/c.txt cat {envtmpdir}/build/coverage/python.txt [testenv:coverage-report] deps = coverage skip_install = true commands = coverage combine coverage report #################### # Deployment tools # #################### [testenv:bumpversion] commands = bumpversion {posargs} skip_install = true deps = bumpversion [testenv:build] basepython = python3 skip_install = true deps = wheel setuptools commands = python setup.py -q sdist bdist_wheel [testenv:release] basepython = python3 skip_install = true deps = {[testenv:build]deps} twine >= 1.5.0 commands = {[testenv:build]commands} twine upload --skip-existing dist/* [testenv:finish] basepython = python3 skip_install = true whitelist_externals = /bin/git /usr/local/bin/git deps = {[testenv:build]deps} {[testenv:release]deps} bumpversion commands = bumpversion release {[testenv:release]commands} git push bumpversion patch