pax_global_header00006660000000000000000000000064146240245000014507gustar00rootroot0000000000000052 comment=84b5c16f779d2d73394ed57642b1c029e7321041 rdflib-endpoint-0.5.1/000077500000000000000000000000001462402450000145725ustar00rootroot00000000000000rdflib-endpoint-0.5.1/.github/000077500000000000000000000000001462402450000161325ustar00rootroot00000000000000rdflib-endpoint-0.5.1/.github/workflows/000077500000000000000000000000001462402450000201675ustar00rootroot00000000000000rdflib-endpoint-0.5.1/.github/workflows/release.yml000066400000000000000000000012351462402450000223330ustar00rootroot00000000000000name: Release on: workflow_dispatch: release: types: [published] jobs: tests: uses: vemonet/rdflib-endpoint/.github/workflows/test.yml@main secrets: inherit publish: needs: tests runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: | pip install build - name: Build package run: python -m build - name: Publish package uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_TOKEN }} rdflib-endpoint-0.5.1/.github/workflows/test.yml000066400000000000000000000022551462402450000216750ustar00rootroot00000000000000name: Tests on: [push, pull_request, workflow_call, workflow_dispatch] jobs: CodeQL-Analysis: name: CodeQL analysis runs-on: ubuntu-latest permissions: security-events: write packages: read steps: - name: Checkout repository uses: actions/checkout@v4 - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: python - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: category: "/language:python" tests: name: Run tests runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: ๐Ÿ Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: ๐Ÿ“ฅ๏ธ Install dependencies run: | pipx install hatch - name: โ˜‘๏ธ Test with coverage run: | hatch run test --cov-report xml - name: โ˜‚๏ธ Upload coverage to Coveralls uses: coverallsapp/github-action@v2 rdflib-endpoint-0.5.1/.gitignore000066400000000000000000000005021462402450000165570ustar00rootroot00000000000000# Python build artefacts build/ *.egg-info .eggs/ dist/ __pycache__/ .*_cache/ .coverage coverage.xml htmlcov/ Secrets > Actions). You can get an API token from PyPI at [pypi.org/manage/account](https://pypi.org/manage/account). 2. Increment the `version` number following semantic versioning, select between `fix`, `minor`, or `major`: ```bash hatch version fix ``` 3. Commit the new version, and **create a new release on GitHub**, which will automatically trigger the workflow to publish the new release to [PyPI](https://pypi.org/project/rdflib-endpoint/). You can also manually trigger the workflow from the Actions tab in your GitHub repository webpage if needed. rdflib-endpoint-0.5.1/LICENSE.txt000066400000000000000000000021221462402450000164120ustar00rootroot00000000000000MIT License Copyright (c) 2022-present Vincent Emonet Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. rdflib-endpoint-0.5.1/README.md000066400000000000000000000306471462402450000160630ustar00rootroot00000000000000
# ๐Ÿ’ซ SPARQL endpoint for RDFLib [![PyPI - Version](https://img.shields.io/pypi/v/rdflib-endpoint.svg?logo=pypi&label=PyPI&logoColor=silver)](https://pypi.org/project/rdflib-endpoint/) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/rdflib-endpoint.svg?logo=python&label=Python&logoColor=silver)](https://pypi.org/project/rdflib-endpoint/) [![Test package](https://github.com/vemonet/rdflib-endpoint/actions/workflows/test.yml/badge.svg)](https://github.com/vemonet/rdflib-endpoint/actions/workflows/test.yml) [![Publish package](https://github.com/vemonet/rdflib-endpoint/actions/workflows/release.yml/badge.svg)](https://github.com/vemonet/rdflib-endpoint/actions/workflows/release.yml) [![Coverage Status](https://coveralls.io/repos/github/vemonet/rdflib-endpoint/badge.svg?branch=main)](https://coveralls.io/github/vemonet/rdflib-endpoint?branch=main) [![license](https://img.shields.io/pypi/l/rdflib-endpoint.svg?color=%2334D058)](https://github.com/vemonet/rdflib-endpoint/blob/main/LICENSE.txt) [![code style - black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![types - Mypy](https://img.shields.io/badge/types-mypy-blue.svg)](https://github.com/python/mypy)
`rdflib-endpoint` is a SPARQL endpoint based on RDFLib to **easily serve RDF files locally**, machine learning models, or any other logic implemented in Python via **custom SPARQL functions**. It aims to enable python developers to easily deploy functions that can be queried in a federated fashion using SPARQL. For example: using a python function to resolve labels for specific identifiers, or run a classifier given entities retrieved using a `SERVICE` query to another SPARQL endpoint. > Feel free to create an [issue](/issues), or send a pull request if you are facing issues or would like to see a feature implemented. ## โ„น๏ธ How it works `rdflib-endpoint` can be used directly from the terminal to quickly serve RDF files through a SPARQL endpoint automatically deployed locally. It can also be used to define custom SPARQL functions: the user defines and registers custom SPARQL functions, and/or populate the RDFLib Graph using Python, then the endpoint is started using `uvicorn`/`gunicorn`. The deployed SPARQL endpoint can be used as a `SERVICE` in a federated SPARQL query from regular triplestores SPARQL endpoints. Tested on OpenLink Virtuoso (Jena based) and Ontotext GraphDB (RDF4J based). The endpoint is CORS enabled by default to enable querying it from client JavaScript (can be turned off). Built with [RDFLib](https://github.com/RDFLib/rdflib) and [FastAPI](https://fastapi.tiangolo.com/). ## ๐Ÿ“ฆ๏ธ Installation This package requires Python >=3.8, install it from [PyPI](https://pypi.org/project/rdflib-endpoint/) with: ```shell pip install rdflib-endpoint ``` The `uvicorn` and `gunicorn` dependencies are not included by default, if you want to install them use the optional dependency `web`: ```bash pip install "rdflib-endpoint[web]" ``` If you want to use `rdlib-endpoint` as a CLI you can install with the optional dependency `cli`: ```bash pip install "rdflib-endpoint[web,cli]" ``` If you want to use [oxigraph](https://github.com/oxigraph/oxigraph) as backend triplestore you can install with the optional dependency `oxigraph`: ```bash pip install "rdflib-endpoint[web,cli,oxigraph]" ``` > [!WARNING] > Oxigraph and `oxrdflib` do not support custom functions, so it can be only used to deploy graphs without custom functions. ## โŒจ๏ธ Use the CLI `rdflib-endpoint` can be used from the command line interface to perform basic utility tasks, such as serving or converting RDF files locally. Make sure you installed `rdflib-endpoint` with the `cli` optional dependencies: ```bash pip install "rdflib-endpoint[cli]" ``` ### โšก๏ธ Quickly serve RDF files through a SPARQL endpoint Use `rdflib-endpoint` as a command line interface (CLI) in your terminal to quickly serve one or multiple RDF files as a SPARQL endpoint. You can use wildcard and provide multiple files, for example to serve all turtle, JSON-LD and nquads files in the current folder you could run: ```bash rdflib-endpoint serve *.ttl *.jsonld *.nq ``` > Then access the YASGUI SPARQL editor on http://localhost:8000 If you installed with the Oxigraph optional dependency you can use it as backend triplestore, it is faster and supports some functions that are not supported by the RDFLib query engine (such as `COALESCE()`): ```bash rdflib-endpoint serve --store Oxigraph "*.ttl" "*.jsonld" "*.nq" ``` ### ๐Ÿ”„ Convert RDF files to another format `rdflib-endpoint` can also be used to quickly merge and convert files from multiple formats to a specific format: ```bash rdflib-endpoint convert "*.ttl" "*.jsonld" "*.nq" --output "merged.trig" ``` ## โœจ Deploy your SPARQL endpoint `rdflib-endpoint` enables you to easily define and deploy SPARQL endpoints based on RDFLib Graph, ConjunctiveGraph, and Dataset. Additionally it provides helpers to defines custom functions in the endpoint. Checkout the [`example`](https://github.com/vemonet/rdflib-endpoint/tree/main/example) folder for a complete working app example to get started, including a docker deployment. A good way to create a new SPARQL endpoint is to copy this `example` folder, and start from it. ### ๐Ÿšจ Deploy as a standalone API Deploy your SPARQL endpoint as a standalone API: ```python from rdflib import ConjunctiveGraph from rdflib_endpoint import SparqlEndpoint # Start the SPARQL endpoint based on a RDFLib Graph and register your custom functions g = ConjunctiveGraph() # TODO: Add triples in your graph # Then use either SparqlEndpoint or SparqlRouter, they take the same arguments app = SparqlEndpoint( graph=g, path="/", cors_enabled=True, # Metadata used for the SPARQL service description and Swagger UI: title="SPARQL endpoint for RDFLib graph", description="A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)", version="0.1.0", public_url='https://your-endpoint-url/', # Example query displayed in YASGUI default tab example_query="""PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""", # Additional example queries displayed in additional YASGUI tabs example_queries = { "Bio2RDF query": { "endpoint": "https://bio2rdf.org/sparql", "query": """SELECT DISTINCT * WHERE { ?s a ?o . } LIMIT 10""", }, "Custom function": { "query": """PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""", } } ) ``` Finally deploy this app using `uvicorn` (see below) ### ๐Ÿ›ฃ๏ธ Deploy as a router to include in an existing API Deploy your SPARQL endpoint as an `APIRouter` to include in an existing `FastAPI` API. The `SparqlRouter` constructor takes the same arguments as the `SparqlEndpoint`, apart from `enable_cors` which needs be enabled at the API level. ```python from fastapi import FastAPI from rdflib import ConjunctiveGraph from rdflib_endpoint import SparqlRouter g = ConjunctiveGraph() sparql_router = SparqlRouter( graph=g, path="/", # Metadata used for the SPARQL service description and Swagger UI: title="SPARQL endpoint for RDFLib graph", description="A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)", version="0.1.0", public_url='https://your-endpoint-url/', ) app = FastAPI() app.include_router(sparql_router) ``` > TODO: add docs to integrate to a Flask app ### ๐Ÿ“ Define custom SPARQL functions This option makes it easier to define functions in your SPARQL endpoint, e.g. `BIND(myfunction:custom_concat("start", "end") AS ?concat)`. It can be used with the `SparqlEndpoint` and `SparqlRouter` classes. Create a `app/main.py` file in your project folder with your custom SPARQL functions, and endpoint parameters: ````python import rdflib from rdflib import ConjunctiveGraph from rdflib.plugins.sparql.evalutils import _eval from rdflib_endpoint import SparqlEndpoint def custom_concat(query_results, ctx, part, eval_part): """Concat 2 strings in the 2 senses and return the length as additional Length variable """ # Retrieve the 2 input arguments argument1 = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars))) argument2 = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars))) evaluation = [] scores = [] # Prepare the 2 result string, 1 for eval, 1 for scores evaluation.append(argument1 + argument2) evaluation.append(argument2 + argument1) scores.append(len(argument1 + argument2)) scores.append(len(argument2 + argument1)) # Append the results for our custom function for i, result in enumerate(evaluation): query_results.append(eval_part.merge({ part.var: rdflib.Literal(result), # With an additional custom var for the length rdflib.term.Variable(part.var + 'Length'): rdflib.Literal(scores[i]) })) return query_results, ctx, part, eval_part # Start the SPARQL endpoint based on a RDFLib Graph and register your custom functions g = ConjunctiveGraph() # Use either SparqlEndpoint or SparqlRouter, they take the same arguments app = SparqlEndpoint( graph=g, path="/", # Register the functions: functions={ 'https://w3id.org/um/sparql-functions/custom_concat': custom_concat }, cors_enabled=True, # Metadata used for the SPARQL service description and Swagger UI: title="SPARQL endpoint for RDFLib graph", description="A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)", version="0.1.0", public_url='https://your-endpoint-url/', # Example queries displayed in the Swagger UI to help users try your function example_query="""PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""" ) ```` ### โœ’๏ธ Or directly define the custom evaluation You can also directly provide the custom evaluation function, this will override the `functions`. Refer to the [RDFLib documentation](https://rdflib.readthedocs.io/en/stable/_modules/examples/custom_eval.html) to define the custom evaluation function. Then provide it when instantiating the SPARQL endpoint: ```python import rdflib from rdflib.plugins.sparql.evaluate import evalBGP from rdflib.namespace import FOAF, RDF, RDFS def custom_eval(ctx, part): """Rewrite triple patterns to get super-classes""" if part.name == "BGP": # rewrite triples triples = [] for t in part.triples: if t[1] == RDF.type: bnode = rdflib.BNode() triples.append((t[0], t[1], bnode)) triples.append((bnode, RDFS.subClassOf, t[2])) else: triples.append(t) # delegate to normal evalBGP return evalBGP(ctx, triples) raise NotImplementedError() app = SparqlEndpoint( graph=g, custom_eval=custom_eval ) ``` ### ๐Ÿฆ„ Run the SPARQL endpoint You can then run the SPARQL endpoint server from the folder where your script is defined with `uvicorn` on http://localhost:8000 (it is installed automatically when you install the `rdflib-endpoint` package) ```bash uvicorn main:app --app-dir example/app --reload ``` > Checkout in the `example/README.md` for more details, such as deploying it with docker. ## ๐Ÿ“‚ Projects using rdflib-endpoint Here are some projects using `rdflib-endpoint` to deploy custom SPARQL endpoints with python: * [The Bioregistry](https://bioregistry.io/), an open source, community curated registry, meta-registry, and compact identifier resolver. * [proycon/codemeta-server](https://github.com/proycon/codemeta-server), server for codemeta, in memory triple store, SPARQL endpoint and simple web-based visualisation for end-user ## ๐Ÿ› ๏ธ Contributing To run the project in development and make a contribution checkout the [contributing page](https://github.com/vemonet/rdflib-endpoint/blob/main/CONTRIBUTING.md). rdflib-endpoint-0.5.1/example/000077500000000000000000000000001462402450000162255ustar00rootroot00000000000000rdflib-endpoint-0.5.1/example/Dockerfile000066400000000000000000000005151462402450000202200ustar00rootroot00000000000000FROM tiangolo/uvicorn-gunicorn-fastapi:python3.8 COPY ./requirements.txt /tmp/requirements.txt RUN pip install -r /tmp/requirements.txt && \ rm /tmp/requirements.txt # cf. https://fastapi.tiangolo.com/deployment/docker/ COPY ./app /app # EXPOSE 80 # ENTRYPOINT ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "80"] rdflib-endpoint-0.5.1/example/README.md000066400000000000000000000103141462402450000175030ustar00rootroot00000000000000# Example SPARQL endpoint for Python function A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. Serve drug/disease predictions using the OpenPredict classifier. Built with [RDFLib](https://github.com/RDFLib/rdflib) and [FastAPI](https://fastapi.tiangolo.com/), CORS enabled. ## Example queries ๐Ÿ“ฌ ### Get predictions Concatenate the 2 given string, and also return the length as additional Length variable ```SPARQL PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) } ``` ### Try a federated query Use this federated query to retrieve predicted treatments for a drug or disease (OMIM or DRUGBANK) from any other SPARQL endpoint supporting federated queries (note that this query use our test SPARQL endpoints, it might not be always up) **From another SPARQL endpoint:** ```SPARQL PREFIX myfunctions: SELECT * WHERE { SERVICE { SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) } } } ``` **From the RDFLib SPARQL endpoint** โš ๏ธ RDFLib has a few limitation related to federated queries: * Unfortunately, the `PREFIX` keyword can crash with federated queries in RDFLib, so we need to write the full URIs * The latest version of RDFLib (`5.0.0 `) only recognize **lowercase `service`**. This will be fixed in the next versions. Run this federated query on the RDFLib endpoint https://service.translator.137.120.31.102.nip.io to resolve drug/disease labels retrieved from the Nanopublication network: ```SPARQL SELECT DISTINCT ?label ?subject ?object ((str(?subject)) AS ?subjectLabel) ((str(?object)) AS ?objectLabel) WHERE { service { SELECT * WHERE { GRAPH ?np_assertion { ?association ?label ; ?subject ; ?predicate ; ?object . optional { ?association ?relation . } optional { ?association ?provided_by . } optional { ?association ?association_type . } ?subject ?subject_category . ?object ?object_category . } filter ( ?subject_category = || ?subject_category = ) filter ( ?object_category = ) GRAPH ?np_head { ?np_uri ?np_assertion . } ?np_uri . filter NOT EXISTS { ?creator ?np_uri } } LIMIT 5 } } ``` ### Insert data Insert data in the in-memory rdflib graph: ```SPARQL INSERT DATA { . } ``` ## Install and run โœจ๏ธ 1. Install dependencies ```bash pip install -r requirements.txt ``` 2. Run the server on http://localhost:8000 ```bash uvicorn main:app --reload --app-dir app ``` ## Or run with docker ๐Ÿณ Checkout the `Dockerfile` to see how the image is built, and run it with the `docker-compose.yml`: ```bash docker-compose up -d --build ``` Or build and run with docker: ```bash docker build -t rdflib-endpoint . ``` Run on http://localhost:8080 ```bash docker run -p 8080:80 rdflib-endpoint ``` rdflib-endpoint-0.5.1/example/app/000077500000000000000000000000001462402450000170055ustar00rootroot00000000000000rdflib-endpoint-0.5.1/example/app/__init__.py000066400000000000000000000000001462402450000211040ustar00rootroot00000000000000rdflib-endpoint-0.5.1/example/app/custom_eval.py000066400000000000000000000117341462402450000217060ustar00rootroot00000000000000""" This example shows how a custom evaluation function can be added to handle certain SPARQL Algebra elements. A custom function is added that adds ``rdfs:subClassOf`` "inference" when asking for ``rdf:type`` triples. Here the custom eval function is added manually, normally you would use setuptools and entry_points to do it: i.e. in your setup.py:: entry_points = { 'rdf.plugins.sparqleval': [ 'myfunc = mypackage:MyFunction', ], } """ # EvalBGP https://rdflib.readthedocs.io/en/stable/_modules/rdflib/plugins/sparql/evaluate.html # Custom fct for rdf:type with auto infer super-classes: https://github.com/RDFLib/rdflib/blob/master/examples/custom_eval.py # BGP = Basic Graph Pattern # Docs rdflib custom fct: https://rdflib.readthedocs.io/en/stable/intro_to_sparql.html # StackOverflow: https://stackoverflow.com/questions/43976691/custom-sparql-functions-in-rdflib/66988421#66988421 # Another project: https://github.com/bas-stringer/scry/blob/master/query_handler.py # https://www.w3.org/TR/sparql11-service-description/#example-turtle # Federated query: https://www.w3.org/TR/2013/REC-sparql11-federated-query-20130321/#defn_service # XML method: https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.plugins.sparql.results.html#module-rdflib.plugins.sparql.results.xmlresults import rdflib from rdflib import Literal, URIRef from rdflib.plugins.sparql import parser from rdflib.plugins.sparql.algebra import pprintAlgebra, translateQuery from rdflib.plugins.sparql.evaluate import evalBGP # inferredSubClass = rdflib.RDFS.subClassOf * "*" # any number of rdfs.subClassOf biolink = URIRef("https://w3id.org/biolink/vocab/") class Result: pass def add_to_graph(ctx, drug, disease, score): bnode = rdflib.BNode() ctx.graph.add((bnode, rdflib.RDF.type, rdflib.RDF.Statement)) ctx.graph.add((bnode, rdflib.RDF.subject, drug)) ctx.graph.add((bnode, rdflib.RDF.predicate, biolink + "treats")) ctx.graph.add((bnode, rdflib.RDF.object, disease)) ctx.graph.add((bnode, biolink + "category", biolink + "ChemicalToDiseaseOrPhenotypicFeatureAssociation")) ctx.graph.add((bnode, biolink + "has_confidence_level", score)) def get_triples(disease): drug = URIRef("http://bio2rdf.org/drugbank:DB00001") score = Literal("1.0") r = Result() r.drug = drug r.disease = disease r.score = score results = [] results.append(r) return results # def parseRelationalExpr(expr): def custom_eval(ctx, part): """ """ # print (part.name) if part.name == "Project": ctx.myvars = [] # search extend for variable binding if part.name == "Extend" and hasattr(part, "expr") and not isinstance(part.expr, list): ctx.myvars.append(part.expr) # search for filter if part.name == "Filter" and hasattr(part, "expr"): if hasattr(part.expr, "expr"): if part.expr.expr["op"] == "=": part.expr.expr["expr"] d = part.expr.expr["other"] ctx.myvars.append(d) else: if part.expr["op"] == "=": part.expr["expr"] d = part.expr["other"] ctx.myvars.append(d) # search the BGP for the variable of interest if part.name == "BGP": triples = [] for t in part.triples: if t[1] == rdflib.RDF.object: disease = t[2] # check first if the disease term is specified in the bgp triple if isinstance(disease, rdflib.term.URIRef): ctx.myvars.append(disease) # fetch instances for d in ctx.myvars: results = get_triples(d) for r in results: add_to_graph(ctx, r.drug, r.disease, r.score) triples.append(t) return evalBGP(ctx, triples) raise NotImplementedError() if __name__ == "__main__": # add function directly, normally we would use setuptools and entry_points rdflib.plugins.sparql.CUSTOM_EVALS["exampleEval"] = custom_eval g = rdflib.Graph() q = """PREFIX openpredict: PREFIX biolink: PREFIX omim: SELECT ?disease ?drug ?score { ?association a rdf:Statement ; rdf:subject ?drug ; rdf:predicate ?predicate ; #rdf:object omim:246300 ; rdf:object ?disease ; biolink:category biolink:ChemicalToDiseaseOrPhenotypicFeatureAssociation ; biolink:has_confidence_level ?score . #?disease dcat:identifier "OMIM:246300" . BIND(omim:1 AS ?disease) #FILTER(?disease = omim:2 || ?disease = omim:3) #VALUES ?disease { omim:5 omim:6 omim:7 } }""" pq = parser.parseQuery(q) tq = translateQuery(pq) pprintAlgebra(tq) # Find all FOAF Agents for x in g.query(q): print(x) rdflib-endpoint-0.5.1/example/app/main.py000066400000000000000000000110771462402450000203110ustar00rootroot00000000000000import rdflib from rdflib import RDF, RDFS, ConjunctiveGraph, Literal, URIRef from rdflib.plugins.sparql.evalutils import _eval from rdflib_endpoint import SparqlEndpoint def custom_concat(query_results, ctx, part, eval_part): """ Concat 2 string and return the length as additional Length variable \f :param query_results: An array with the query results objects :param ctx: :param part: Part of the query processed (e.g. Extend or BGP) :param eval_part: Part currently evaluated :return: the same query_results provided in input param, with additional results """ argument1 = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars))) argument2 = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars))) evaluation = [] scores = [] concat_string = argument1 + argument2 reverse_string = argument2 + argument1 # Append the concatenated string to the results evaluation.append(concat_string) evaluation.append(reverse_string) # Append the scores for each row of results scores.append(len(concat_string)) scores.append(len(reverse_string)) # Append our results to the query_results for i, result in enumerate(evaluation): query_results.append( eval_part.merge({part.var: Literal(result), rdflib.term.Variable(part.var + "Length"): Literal(scores[i])}) ) return query_results, ctx, part, eval_part def most_similar(query_results, ctx, part, eval_part): """ Get most similar entities for a given entity PREFIX openpredict: SELECT ?drugOrDisease ?mostSimilar ?mostSimilarScore WHERE { BIND("OMIM:246300" AS ?drugOrDisease) BIND(openpredict:most_similar(?drugOrDisease) AS ?mostSimilar) """ # argumentEntity = str(_eval(part.expr.expr[0], eval_part.forget(ctx, _except=part.expr._vars))) # try: # argumentLimit = str(_eval(part.expr.expr[1], eval_part.forget(ctx, _except=part.expr._vars))) # except: # argumentLimit = None # Using stub data similarity_results = [{"mostSimilar": "DRUGBANK:DB00001", "score": 0.42}] evaluation = [] scores = [] for most_similar in similarity_results: evaluation.append(most_similar["mostSimilar"]) scores.append(most_similar["score"]) # Append our results to the query_results for i, result in enumerate(evaluation): query_results.append( eval_part.merge({part.var: Literal(result), rdflib.term.Variable(part.var + "Score"): Literal(scores[i])}) ) return query_results, ctx, part, eval_part example_query = """PREFIX myfunctions: SELECT DISTINCT * WHERE { ?s ?p ?o . } LIMIT 100""" example_queries = { "Bio2RDF query": { "endpoint": "https://bio2rdf.org/sparql", "query": """SELECT DISTINCT * WHERE { ?s a ?o . } LIMIT 10""", }, "Custom function": { "query": """PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""", }, } # Use ConjunctiveGraph to support nquads and graphs in SPARQL queries # identifier is the default graph g = ConjunctiveGraph( # store="Oxigraph", identifier=URIRef("https://w3id.org/um/sparql-functions/graph/default"), ) # Example to add a nquad to the exposed graph g.add((URIRef("http://subject"), RDF.type, URIRef("http://object"), URIRef("http://graph"))) g.add((URIRef("http://subject"), RDFS.label, Literal("foo"), URIRef("http://graph"))) # Start the SPARQL endpoint based on the RDFLib Graph app = SparqlEndpoint( graph=g, functions={ "https://w3id.org/um/openpredict/most_similar": most_similar, "https://w3id.org/um/sparql-functions/custom_concat": custom_concat, }, title="SPARQL endpoint for RDFLib graph", description="A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)", version="0.1.0", path="/", public_url="https://your-website-url/", cors_enabled=True, example_query=example_query, example_queries=example_queries, enable_update=True, ) ## Uncomment to run it directly with python app/main.py # if __name__ == "__main__": # import uvicorn # uvicorn.run(app, host="0.0.0.0", port=8000) rdflib-endpoint-0.5.1/example/docker-compose.yml000066400000000000000000000005771462402450000216730ustar00rootroot00000000000000version: '3' services: rdflib-endpoint: build: . restart: unless-stopped ports: - 8000:80 # environment: # # We recommend to use a reverse nginx-proxy: https://github.com/nginx-proxy/nginx-proxy # VIRTUAL_HOST: service.openpredict.137.120.31.102.nip.io # LETSENCRYPT_HOST: service.openpredict.137.120.31.102.nip.io # VIRTUAL_PORT: 80 rdflib-endpoint-0.5.1/example/requirements.txt000066400000000000000000000001041462402450000215040ustar00rootroot00000000000000rdflib-endpoint@git+https://github.com/vemonet/rdflib-endpoint@main rdflib-endpoint-0.5.1/pyproject.toml000066400000000000000000000111341462402450000175060ustar00rootroot00000000000000[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] requires-python = ">=3.8" name = "rdflib-endpoint" description = "A package to deploy SPARQL endpoint to serve local RDF files, machine learning models, or any other logic implemented in Python, using RDFLib and FastAPI." readme = "README.md" license = { file = "LICENSE.txt" } authors = [ { name = "Vincent Emonet", email = "vincent.emonet@gmail.com" }, ] maintainers = [ { name = "Vincent Emonet", email = "vincent.emonet@gmail.com" }, ] keywords = [ "Python", "SPARQL", "RDF", "RDFLib", "endpoint", ] classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dynamic = ["version"] dependencies = [ "rdflib >=6.0.0", "fastapi >=0.51.0", ] [project.scripts] rdflib-endpoint = "rdflib_endpoint.__main__:cli" [project.optional-dependencies] cli =[ "uvicorn[standard] >=0.12.0", "click", ] web =[ "uvicorn[standard] >=0.12.0", "gunicorn", ] oxigraph =[ "oxrdflib", ] test = [ "pytest >=7.1.3", "pytest-cov >=3.0.0", "pre-commit", "mypy >=1.4.1", "requests", "httpx", "types-PyYAML", "types-setuptools", "types-ujson", "types-click", ] [project.urls] Homepage = "https://github.com/vemonet/rdflib-endpoint" Documentation = "https://github.com/vemonet/rdflib-endpoint" History = "https://github.com/vemonet/rdflib-endpoint/releases" Tracker = "https://github.com/vemonet/rdflib-endpoint/issues" Source = "https://github.com/vemonet/rdflib-endpoint" # ENVIRONMENTS AND SCRIPTS [tool.hatch.envs.default] features = [ "cli", "web", "oxigraph", "test", ] post-install-commands = [ "pre-commit install", ] [tool.hatch.envs.default.scripts] dev = "uvicorn example.app.main:app --reload {args}" fmt = [ "pre-commit run --all --all-files", "mypy", ] test = [ "fmt", "pytest --cov-fail-under=85 {args}", ] cov = [ "pytest --cov-report html {args}", "python -c 'import webbrowser; webbrowser.open(\"http://0.0.0.0:3000\")'", "python -m http.server 3000 --directory ./htmlcov", ] [[tool.hatch.envs.all.matrix]] python = ["3.8", "3.9", "3.10", "3.11", "3.12"] # TOOLS [tool.hatch.build.targets.wheel] packages = ["src/rdflib_endpoint"] [tool.hatch.version] path = "src/rdflib_endpoint/__init__.py" [tool.coverage.run] source = ["src/rdflib_endpoint"] branch = false [tool.coverage.report] omit = ["tests/*"] exclude_lines = [ "no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] show_missing = true [tool.pytest.ini_options] addopts = [ "-vvv", "--cov=src", "--color=yes", "--cov-report=term-missing", ] filterwarnings = [ "ignore::DeprecationWarning:httpx.*:" ] [tool.mypy] files = ["src/"] strict = true implicit_reexport = true follow_imports = "normal" ignore_missing_imports = true pretty = true show_column_numbers = true warn_no_return = true warn_unused_ignores = true warn_redundant_casts = true disallow_untyped_defs = true disallow_any_generics = true disallow_untyped_calls = false # needed due to _eval() not being typed in rdflib # https://docs.astral.sh/ruff/rules/ [tool.ruff] src = ["src", "tests"] target-version = "py38" line-length = 120 [tool.ruff.lint] select = [ "I", # isort "N", # pep8-naming "S", # bandit "A", # flake8-builtins "YTT", # flake8-2020 "B", # flake8-bugbear "C", # flake8-comprehensions "ICN", # flake8-import-conventions "SIM", # flake8-simplify "TID", # flake8-tidy-imports "Q", # flake8-quotes # "FBT", # flake8-boolean-trap "F", # pyflakes "UP", # pyupgrade "E", # pycodestyle errors "W", # pycodestyle warnings "PLC", # pylint convention "PLE", # pylint error # "PLR", # pylint refactor Magic value used in comparison, consider replacing 400 with a constant variable "PLW", # pylint warning "RUF", # ruff specific "T", ] ignore = [ "B008", # do not perform function calls in argument defaults (required for FastAPI afaik) "E501", # line too long "C901", # too complex "S101", # Use of `assert` detected "T201", "T203", # remove print and pprint ] [tool.ruff.lint.per-file-ignores] "__init__.py" = ["I", "F401"] # module imported but unused # [tool.hatch.build] # sources = ["src"] rdflib-endpoint-0.5.1/src/000077500000000000000000000000001462402450000153615ustar00rootroot00000000000000rdflib-endpoint-0.5.1/src/rdflib_endpoint/000077500000000000000000000000001462402450000205235ustar00rootroot00000000000000rdflib-endpoint-0.5.1/src/rdflib_endpoint/__init__.py000066400000000000000000000004151462402450000226340ustar00rootroot00000000000000"""A package to deploy SPARQL endpoint to serve local RDF files, machine learning models, or any other logic implemented in Python, using RDFLib and FastAPI.""" __version__ = "0.5.1" from .sparql_router import SparqlRouter from .sparql_endpoint import SparqlEndpoint rdflib-endpoint-0.5.1/src/rdflib_endpoint/__main__.py000066400000000000000000000062761462402450000226300ustar00rootroot00000000000000import glob import sys from typing import List import click import uvicorn from rdflib import ConjunctiveGraph from rdflib_endpoint import SparqlEndpoint @click.group() def cli() -> None: """Quickly serve RDF files as SPARQL endpoint with RDFLib Endpoint""" @cli.command(help="Serve a local RDF file as a SPARQL endpoint by default on http://0.0.0.0:8000/sparql") @click.argument("files", nargs=-1) @click.option("--host", default="localhost", help="Host of the SPARQL endpoint") @click.option("--port", default=8000, help="Port of the SPARQL endpoint") @click.option("--store", default="default", help="Store used by RDFLib: default or Oxigraph") @click.option("--enable-update", is_flag=True, help="Enable SPARQL updates") def serve(files: List[str], host: str, port: int, store: str, enable_update: bool) -> None: run_serve(files, host, port, store, enable_update) def run_serve(files: List[str], host: str, port: int, store: str = "default", enable_update: bool = False) -> None: if store == "oxigraph": store = store.capitalize() g = ConjunctiveGraph(store=store) for glob_file in files: file_list = glob.glob(glob_file) for file in file_list: g.parse(file) click.echo( click.style("INFO", fg="green") + ": ๐Ÿ“ฅ๏ธ Loaded triples from " + click.style(str(file), bold=True) + ", for a total of " + click.style(str(len(g)), bold=True) ) app = SparqlEndpoint( graph=g, enable_update=enable_update, example_query="""PREFIX rdf: PREFIX rdfs: SELECT * WHERE { GRAPH ?g { ?s ?p ?o . } } LIMIT 100""", ) uvicorn.run(app, host=host, port=port) @cli.command(help="Merge and convert local RDF files to another format easily") @click.argument("files", nargs=-1) @click.option("--output", default="localhost", help="Host of the SPARQL endpoint") @click.option("--store", default="default", help="Store used by RDFLib: default or Oxigraph") def convert(files: List[str], output: str, store: str) -> None: run_convert(files, output, store) def run_convert(files: List[str], output: str, store: str = "default") -> None: if store == "oxigraph": store = store.capitalize() g = ConjunctiveGraph(store=store) for glob_file in files: file_list = glob.glob(glob_file) for file in file_list: g.parse(file) click.echo( click.style("INFO", fg="green") + ": ๐Ÿ“ฅ๏ธ Loaded triples from " + click.style(str(file), bold=True) + ", for a total of " + click.style(str(len(g)), bold=True) ) out_format = "ttl" if output.endswith(".nt"): out_format = "nt" elif output.endswith(".xml") or output.endswith(".rdf"): out_format = "xml" elif output.endswith(".json") or output.endswith(".jsonld"): out_format = "json-ld" elif output.endswith(".trig"): out_format = "trig" g.serialize(output, format=out_format) if __name__ == "__main__": sys.exit(cli()) rdflib-endpoint-0.5.1/src/rdflib_endpoint/py.typed000066400000000000000000000000001462402450000222100ustar00rootroot00000000000000rdflib-endpoint-0.5.1/src/rdflib_endpoint/sparql_endpoint.py000066400000000000000000000055421462402450000243050ustar00rootroot00000000000000import time from typing import Any, Callable, Dict, Optional, Union from fastapi import FastAPI, Request, Response from fastapi.middleware.cors import CORSMiddleware from rdflib import ConjunctiveGraph, Dataset, Graph from rdflib.query import Processor from rdflib_endpoint.sparql_router import ( DEFAULT_DESCRIPTION, DEFAULT_EXAMPLE, DEFAULT_FAVICON, DEFAULT_PUBLIC_URL, DEFAULT_TITLE, DEFAULT_VERSION, SparqlRouter, ) __all__ = [ "SparqlEndpoint", ] class SparqlEndpoint(FastAPI): """ Class to deploy a SPARQL endpoint using a RDFLib Graph. """ def __init__( self, *args: Any, path: str = "/", title: str = DEFAULT_TITLE, description: str = DEFAULT_DESCRIPTION, version: str = DEFAULT_VERSION, graph: Union[None, Graph, ConjunctiveGraph, Dataset] = None, functions: Optional[Dict[str, Callable[..., Any]]] = None, processor: Union[str, Processor] = "sparql", custom_eval: Optional[Callable[..., Any]] = None, enable_update: bool = False, cors_enabled: bool = True, public_url: str = DEFAULT_PUBLIC_URL, example_query: str = DEFAULT_EXAMPLE, example_queries: Optional[Dict[str, Dict[str, str]]] = None, favicon: str = DEFAULT_FAVICON, **kwargs: Any, ) -> None: """ Constructor of the SPARQL endpoint, everything happens here. FastAPI calls are defined in this constructor """ self.title = title self.description = description self.version = version # Instantiate FastAPI super().__init__( *args, title=title, description=description, version=version, **kwargs, ) sparql_router = SparqlRouter( path=path, title=title, description=description, version=version, graph=graph, functions=functions, processor=processor, custom_eval=custom_eval, enable_update=enable_update, public_url=public_url, example_query=example_query, example_queries=example_queries, favicon=favicon, ) self.include_router(sparql_router) if cors_enabled: self.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @self.middleware("http") async def add_process_time_header(request: Request, call_next: Any) -> Response: start_time = time.time() response: Response = await call_next(request) response.headers["Server-Timing"] = f"total;dur={time.time() - start_time}" return response rdflib-endpoint-0.5.1/src/rdflib_endpoint/sparql_router.py000066400000000000000000000471431462402450000240100ustar00rootroot00000000000000import logging import os import re from importlib import resources from typing import Any, Callable, Dict, List, Optional, Union from urllib import parse import rdflib from fastapi import APIRouter, Query, Request, Response from fastapi.responses import JSONResponse from rdflib import RDF, ConjunctiveGraph, Dataset, Graph, Literal, URIRef from rdflib.plugins.sparql import prepareQuery, prepareUpdate from rdflib.plugins.sparql.evaluate import evalPart from rdflib.plugins.sparql.evalutils import _eval from rdflib.plugins.sparql.parserutils import CompValue from rdflib.plugins.sparql.sparql import QueryContext, SPARQLError from rdflib.query import Processor __all__ = [ "SparqlRouter", ] DEFAULT_TITLE: str = "SPARQL endpoint for RDFLib graph" DEFAULT_DESCRIPTION: str = "A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. \n[Source code](https://github.com/vemonet/rdflib-endpoint)" DEFAULT_VERSION: str = "0.1.0" DEFAULT_PUBLIC_URL: str = "https://your-endpoint/sparql" DEFAULT_FAVICON: str = "https://rdflib.readthedocs.io/en/stable/_static/RDFlib.png" DEFAULT_EXAMPLE = """\ PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) } """.rstrip() SERVICE_DESCRIPTION_TTL_FMT = """\ @prefix sd: . @prefix ent: . @prefix prof: . @prefix void: . @prefix dc: . @prefix rdfs: . <{public_url}> a sd:Service ; rdfs:label "{title}" ; dc:description "{description}" ; sd:endpoint <{public_url}> ; sd:supportedLanguage sd:SPARQL11Query ; sd:resultFormat , ; sd:feature sd:DereferencesURIs ; sd:defaultEntailmentRegime ent:RDFS ; sd:defaultDataset [ a sd:Dataset ; sd:defaultGraph [ a sd:Graph ; ] ] . """.rstrip() api_responses: Optional[Dict[Union[int, str], Dict[str, Any]]] = { 200: { "description": "SPARQL query results", "content": { "application/sparql-results+json": {"example": {"results": {"bindings": []}, "head": {"vars": []}}}, "application/json": {"example": {"results": {"bindings": []}, "head": {"vars": []}}}, "text/csv": {"example": "s,p,o"}, "application/sparql-results+csv": {"example": "s,p,o"}, "application/sparql-results+xml": {"example": ""}, "application/xml": {"example": ""}, "text/turtle": {"example": " ."}, "application/n-triples": {"example": " ."}, "text/n3": {"example": " ."}, "application/n-quads": {"example": " ."}, "application/trig": { "example": "GRAPH { .}" }, "application/trix": {"example": ""}, "application/ld+json": { "example": [ { "@id": "http://subject", "@type": ["http://object"], "http://www.w3.org/2000/01/rdf-schema#label": [{"@value": "foo"}], } ] }, # "application/rdf+xml": { # "example": ' ' # }, }, }, 400: { "description": "Bad Request", }, 403: { "description": "Forbidden", }, 422: { "description": "Unprocessable Entity", }, } #: This is default for federated queries DEFAULT_CONTENT_TYPE = "application/xml" #: A mapping from content types to the keys used for serializing #: in :meth:`rdflib.Graph.serialize` and other serialization functions CONTENT_TYPE_TO_RDFLIB_FORMAT = { # https://www.w3.org/TR/sparql11-results-json/ "application/sparql-results+json": "json", "application/json": "json", "text/json": "json", # https://www.w3.org/TR/rdf-sparql-XMLres/ "application/sparql-results+xml": "xml", "application/xml": "xml", # for compatibility "application/rdf+xml": "xml", # for compatibility "text/xml": "xml", # not standard "application/ld+json": "json-ld", # https://www.w3.org/TR/sparql11-results-csv-tsv/ "application/sparql-results+csv": "csv", "text/csv": "csv", # for compatibility # Extras "text/turtle": "ttl", "text/n3": "n3", "application/n-triples": "nt", "text/plain": "nt", "application/trig": "trig", "application/trix": "trix", "application/n-quads": "nquads", } def parse_accept_header(accept: str) -> List[str]: """ Given an accept header string, return a list of media types in order of preference. :param accept: Accept header value :return: Ordered list of media type preferences """ def _parse_preference(qpref: str) -> float: qparts = qpref.split("=") try: return float(qparts[1].strip()) except (ValueError, IndexError): pass return 1.0 preferences = [] types = accept.split(",") dpref = 2.0 for mtype in types: parts = mtype.split(";") parts = [part.strip() for part in parts] pref = dpref try: for part in parts[1:]: if part.startswith("q="): pref = _parse_preference(part) break except IndexError: pass # preserve order of appearance in the list dpref = dpref - 0.01 preferences.append((parts[0], pref)) preferences.sort(key=lambda x: -x[1]) return [pref[0] for pref in preferences] class SparqlRouter(APIRouter): """ Class to deploy a SPARQL endpoint using a RDFLib Graph. """ def __init__( self, *args: Any, path: str = "/", title: str = DEFAULT_TITLE, description: str = DEFAULT_DESCRIPTION, version: str = DEFAULT_VERSION, graph: Union[None, Graph, ConjunctiveGraph, Dataset] = None, processor: Union[str, Processor] = "sparql", custom_eval: Optional[Callable[..., Any]] = None, functions: Optional[Dict[str, Callable[..., Any]]] = None, enable_update: bool = False, public_url: str = DEFAULT_PUBLIC_URL, favicon: str = DEFAULT_FAVICON, example_query: str = DEFAULT_EXAMPLE, example_queries: Optional[Dict[str, Dict[str, str]]] = None, **kwargs: Any, ) -> None: """ Constructor of the SPARQL endpoint, everything happens here. FastAPI calls are defined in this constructor """ self.graph = graph if graph is not None else ConjunctiveGraph() self.functions = functions if functions is not None else {} self.processor = processor self.title = title self.description = description self.version = version self.path = path self.public_url = public_url self.example_query = example_query self.example_queries = example_queries self.example_markdown = f"Example query:\n\n```\n{example_query}\n```" self.enable_update = enable_update self.favicon = favicon # Instantiate APIRouter super().__init__( *args, responses=api_responses, **kwargs, ) # Save custom function in custom evaluation dictionary # Handle multiple functions directly in the evalCustomFunctions function if custom_eval: rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = custom_eval elif len(self.functions) > 0: rdflib.plugins.sparql.CUSTOM_EVALS["evalCustomFunctions"] = self.eval_custom_functions async def handle_sparql_request( request: Request, query: Optional[str] = None, update: Optional[str] = None ) -> Response: """Handle SPARQL requests to the GET and POST endpoints""" if query and update: return JSONResponse( status_code=400, content={"message": "Cannot do both query and update"}, ) if not query and not update: if str(request.headers["accept"]).startswith("text/html"): return self.serve_yasgui() # If not asking HTML, return the SPARQL endpoint service description service_graph = self.get_service_graph() # Return the service description RDF as turtle or XML if request.headers["accept"] == "text/turtle": return Response( service_graph.serialize(format="turtle"), media_type="text/turtle", ) else: return Response( service_graph.serialize(format="xml"), media_type="application/xml", ) # Pretty print the query object # from rdflib.plugins.sparql.algebra import pprintAlgebra # parsed_query = parser.parseQuery(query) # tq = algebraTranslateQuery(parsed_query) # pprintAlgebra(tq) graph_ns = dict(self.graph.namespaces()) if query: try: parsed_query = prepareQuery(query, initNs=graph_ns) query_results = self.graph.query(parsed_query, processor=self.processor) # Format and return results depending on Accept mime type in request header mime_types = parse_accept_header(request.headers.get("accept", DEFAULT_CONTENT_TYPE)) # Handle cases that are more complicated, like it includes multiple # types, extra information, etc. output_mime_type = DEFAULT_CONTENT_TYPE for mime_type in mime_types: if mime_type in CONTENT_TYPE_TO_RDFLIB_FORMAT: output_mime_type = mime_type # Use the first mime_type that matches break query_operation = re.sub(r"(\w)([A-Z])", r"\1 \2", parsed_query.algebra.name) # Handle mime type for construct queries if query_operation == "Construct Query": if output_mime_type == "text/csv": output_mime_type = "text/turtle" elif output_mime_type == "application/json": output_mime_type = "application/ld+json" elif output_mime_type == "application/xml": output_mime_type = "application/rdf+xml" else: pass try: rdflib_format = CONTENT_TYPE_TO_RDFLIB_FORMAT.get(output_mime_type, output_mime_type) response = Response( query_results.serialize(format=rdflib_format), media_type=output_mime_type, ) except Exception as e: logging.error(f"Error serializing the SPARQL query results with RDFLib: {e}") return JSONResponse( status_code=422, content={"message": f"Error serializing the SPARQL query results with RDFLib: {e}"}, ) else: return response except Exception as e: logging.error(f"Error executing the SPARQL query on the RDFLib Graph: {e}") return JSONResponse( status_code=400, content={"message": f"Error executing the SPARQL query on the RDFLib Graph: {e}"}, ) else: # Update if not self.enable_update: return JSONResponse( status_code=403, content={"message": "INSERT and DELETE queries are not allowed."} ) if rdflib_apikey := os.environ.get("RDFLIB_APIKEY"): authorized = False if auth_header := request.headers.get("Authorization"): # noqa: SIM102 if auth_header.startswith("Bearer ") and auth_header[7:] == rdflib_apikey: authorized = True if not authorized: return JSONResponse(status_code=403, content={"message": "Invalid API KEY."}) try: prechecked_update: str = update # type: ignore parsed_update = prepareUpdate(prechecked_update, initNs=graph_ns) self.graph.update(parsed_update, "sparql") return Response(status_code=204) except Exception as e: logging.error(f"Error executing the SPARQL update on the RDFLib Graph: {e}") return JSONResponse( status_code=400, content={"message": f"Error executing the SPARQL update on the RDFLib Graph: {e}"}, ) # TODO: use add_api_route? https://github.com/tiangolo/fastapi/blob/d666ccb62216e45ca78643b52c235ba0d2c53986/fastapi/routing.py#L548 @self.get( self.path, name="SPARQL endpoint", description=self.example_markdown, responses=api_responses, ) async def get_sparql_endpoint( request: Request, query: Optional[str] = Query(None), ) -> Response: """ Send a SPARQL query to be executed through HTTP GET operation. :param request: The HTTP GET request :param query: SPARQL query input. """ return await handle_sparql_request(request, query=query) @self.post( path, name="SPARQL endpoint", description=self.example_markdown, responses=api_responses, ) async def post_sparql_endpoint(request: Request) -> Response: """Send a SPARQL query to be executed through HTTP POST operation. :param request: The HTTP POST request with a .body() """ request_body = await request.body() body = request_body.decode("utf-8") content_type = request.headers.get("content-type") if content_type == "application/sparql-query": query = body update = None elif content_type == "application/sparql-update": query = None update = body elif content_type == "application/x-www-form-urlencoded": request_params = parse.parse_qsl(body) query_params = [kvp[1] for kvp in request_params if kvp[0] == "query"] query = parse.unquote(query_params[0]) if query_params else None update_params = [kvp[1] for kvp in request_params if kvp[0] == "update"] update = parse.unquote(update_params[0]) if update_params else None # TODO: handle params `using-graph-uri` and `using-named-graph-uri` # https://www.w3.org/TR/sparql11-protocol/#update-operation else: # Response with the service description query = None update = None return await handle_sparql_request(request, query, update) def eval_custom_functions(self, ctx: QueryContext, part: CompValue) -> List[Any]: """Retrieve variables from a SPARQL-query, then execute registered SPARQL functions The results are then stored in Literal objects and added to the query results. :param ctx: :param part: :return: """ # This part holds basic implementation for adding new functions if part.name == "Extend": query_results: List[Any] = [] # Information is retrieved and stored and passed through a generator for eval_part in evalPart(ctx, part.p): # Checks if the function is a URI (custom function) if hasattr(part.expr, "iri"): # Iterate through the custom functions passed in the constructor for function_uri, custom_function in self.functions.items(): # Check if URI correspond to a registered custom function if part.expr.iri == URIRef(function_uri): # Execute each function query_results, ctx, part, _ = custom_function(query_results, ctx, part, eval_part) else: # For built-in SPARQL functions (that are not URIs) evaluation: List[Any] = [_eval(part.expr, eval_part.forget(ctx, _except=part._vars))] if isinstance(evaluation[0], SPARQLError): raise evaluation[0] # Append results for built-in SPARQL functions for result in evaluation: query_results.append(eval_part.merge({part.var: Literal(result)})) return query_results raise NotImplementedError() def serve_yasgui(self) -> Response: """Serve YASGUI interface""" import json with resources.open_text("rdflib_endpoint", "yasgui.html") as f: html_str = f.read() html_str = html_str.replace("$TITLE", self.title) html_str = html_str.replace("$DESCRIPTION", self.description) html_str = html_str.replace("$FAVICON", self.favicon) html_str = html_str.replace("$EXAMPLE_QUERY", self.example_query) html_str = html_str.replace("$EXAMPLE_QUERIES", json.dumps(self.example_queries)) return Response(content=html_str, media_type="text/html") def get_service_graph(self) -> Graph: # Service description returned when no query provided service_description_ttl = SERVICE_DESCRIPTION_TTL_FMT.format( public_url=self.public_url, title=self.title, description=self.description.replace("\n", ""), ) graph = Graph() graph.parse(data=service_description_ttl, format="ttl") # service_graph.parse('app/service-description.ttl', format="ttl") # Add custom functions URI to the service description for custom_function_uri in self.functions: graph.add( ( URIRef(custom_function_uri), RDF.type, URIRef("http://www.w3.org/ns/sparql-service-description#Function"), ) ) graph.add( ( URIRef(self.public_url), URIRef("http://www.w3.org/ns/sparql-service-description#extensionFunction"), URIRef(custom_function_uri), ) ) return graph rdflib-endpoint-0.5.1/src/rdflib_endpoint/yasgui.html000066400000000000000000000043531462402450000227170ustar00rootroot00000000000000 $TITLE
rdflib-endpoint-0.5.1/tests/000077500000000000000000000000001462402450000157345ustar00rootroot00000000000000rdflib-endpoint-0.5.1/tests/__init__.py000066400000000000000000000000001462402450000200330ustar00rootroot00000000000000rdflib-endpoint-0.5.1/tests/resources/000077500000000000000000000000001462402450000177465ustar00rootroot00000000000000rdflib-endpoint-0.5.1/tests/resources/another.jsonld000066400000000000000000000002411462402450000226160ustar00rootroot00000000000000[ { "@id":"http://another/s", "http://another/p": [ {"@id":"http://another/o"} ] }, {"@id":"http://another/o"} ] rdflib-endpoint-0.5.1/tests/resources/custom_eval_todo.py000066400000000000000000000036011462402450000236660ustar00rootroot00000000000000import rdflib from fastapi.testclient import TestClient from rdflib import URIRef from rdflib.namespace import RDF, RDFS from rdflib.plugins.sparql.evaluate import evalBGP from rdflib_endpoint import SparqlEndpoint # TODO: not used due to bug with FastAPI TestClient when using different apps in the tests def custom_eval(ctx, part): """Rewrite triple patterns to get super-classes""" if part.name == "BGP": # rewrite triples triples = [] for t in part.triples: if t[1] == RDF.type: bnode = rdflib.BNode() triples.append((t[0], t[1], bnode)) triples.append((bnode, RDFS.subClassOf, t[2])) else: triples.append(t) # delegate to normal evalBGP return evalBGP(ctx, triples) raise NotImplementedError() g = rdflib.Graph() g.add((URIRef("http://human"), RDFS.subClassOf, URIRef("http://mammal"))) g.add((URIRef("http://alice"), RDF.type, URIRef("http://human"))) eval_app = SparqlEndpoint(graph=g, custom_eval=custom_eval, functions={}) eval_endpoint = TestClient(eval_app) def test_custom_eval(): # eval_app = SparqlEndpoint( # graph=g, # custom_eval=custom_eval, # functions={} # ) # eval_endpoint = TestClient(eval_app) response = eval_endpoint.get("/?query=" + select_parent, headers={"accept": "application/json"}) print(response.json()) assert response.status_code == 200 print(response.json()["results"]["bindings"]) assert str(response.json()["results"]["bindings"][0]["s"]["value"]) == "http://alice" response = eval_endpoint.post("/", data="query=" + select_parent, headers={"accept": "application/json"}) assert response.status_code == 200 assert str(response.json()["results"]["bindings"][0]["s"]["value"]) == "http://alice" select_parent = """SELECT * WHERE { ?s a . }""" rdflib-endpoint-0.5.1/tests/resources/test.nq000066400000000000000000000001061462402450000212620ustar00rootroot00000000000000 . rdflib-endpoint-0.5.1/tests/resources/test2.ttl000066400000000000000000000000771462402450000215400ustar00rootroot00000000000000@prefix ns0: . ns0:p ns0:o . rdflib-endpoint-0.5.1/tests/test_example_app.py000066400000000000000000000053651462402450000216510ustar00rootroot00000000000000from example.app.main import app from fastapi.testclient import TestClient # Use app defined in example folder endpoint = TestClient(app) def test_service_description(): response = endpoint.get("/", headers={"accept": "text/turtle"}) # print(response.text.strip()) assert response.status_code == 200 assert response.text.strip() == service_description response = endpoint.post("/", headers={"accept": "text/turtle"}) assert response.status_code == 200 assert response.text.strip() == service_description # Check for application/xml response = endpoint.post("/", headers={"accept": "application/xml"}) assert response.status_code == 200 def test_custom_concat(): response = endpoint.get("/", params={"query": custom_concat_query}, headers={"accept": "application/json"}) # print(response.json()) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" response = endpoint.post( "/", data={"query": custom_concat_query}, headers={"accept": "application/json"}, ) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" def test_bad_request(): response = endpoint.get("/?query=figarofigarofigaro", headers={"accept": "application/json"}) assert response.status_code == 400 custom_concat_query = """PREFIX myfunctions: SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""" service_description = """@prefix dc: . @prefix ent: . @prefix rdfs: . @prefix sd: . a sd:Function . a sd:Function . a sd:Service ; rdfs:label "SPARQL endpoint for RDFLib graph" ; dc:description "A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. [Source code](https://github.com/vemonet/rdflib-endpoint)" ; sd:defaultDataset [ a sd:Dataset ; sd:defaultGraph [ a sd:Graph ] ] ; sd:defaultEntailmentRegime ent:RDFS ; sd:endpoint ; sd:extensionFunction , ; sd:feature sd:DereferencesURIs ; sd:resultFormat , ; sd:supportedLanguage sd:SPARQL11Query .""" rdflib-endpoint-0.5.1/tests/test_oxrdflib.py000066400000000000000000000103571462402450000211640ustar00rootroot00000000000000from fastapi.testclient import TestClient from rdflib import RDF, RDFS, Graph, Literal, URIRef from rdflib_endpoint import SparqlEndpoint g = Graph(store="Oxigraph") g.add((URIRef("http://subject"), RDF.type, URIRef("http://object"))) g.add((URIRef("http://subject"), RDFS.label, Literal("test value"))) app = SparqlEndpoint(graph=g) endpoint = TestClient(app) def test_service_description(): response = endpoint.get("/", headers={"accept": "text/turtle"}) print(response.text.strip()) assert response.status_code == 200 # assert response.text.strip() == service_description response = endpoint.post("/", headers={"accept": "text/turtle"}) assert response.status_code == 200 # assert response.text.strip() == service_description # Check for application/xml response = endpoint.post("/", headers={"accept": "application/xml"}) assert response.status_code == 200 def test_custom_concat_json(): response = endpoint.get("/?query=", params={"query": label_select}, headers={"accept": "application/json"}) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["label"]["value"] == "test value" response = endpoint.post("/", data={"query": label_select}, headers={"accept": "application/json"}) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["label"]["value"] == "test value" def test_select_noaccept_xml(): response = endpoint.post("/", data={"query": label_select}) assert response.status_code == 200 # assert response.json()['results']['bindings'][0]['concat']['value'] == "Firstlast" def test_select_csv(): response = endpoint.post("/", data={"query": label_select}, headers={"accept": "text/csv"}) assert response.status_code == 200 # assert response.json()['results']['bindings'][0]['concat']['value'] == "Firstlast" def test_fail_select_turtle(): response = endpoint.post("/", data={"query": label_select}, headers={"accept": "text/turtle"}) assert response.status_code == 422 # assert response.json()['results']['bindings'][0]['concat']['value'] == "Firstlast" def test_concat_construct_turtle(): # expected to return turtle response = endpoint.post( "/", data="query=" + label_construct, headers={"accept": "application/json"}, ) assert response.status_code == 200 # assert response.json()['results']['bindings'][0]['concat']['value'] == "Firstlast" def test_concat_construct_xml(): # expected to return turtle response = endpoint.post( "/", data="query=" + label_construct, headers={"accept": "application/xml"}, ) assert response.status_code == 200 def test_bad_request(): response = endpoint.get("/?query=figarofigarofigaro", headers={"accept": "application/json"}) assert response.status_code == 400 label_select = """PREFIX rdfs: SELECT ?label WHERE { ?s rdfs:label ?label . }""" label_construct = """PREFIX rdfs: CONSTRUCT { ?label . } WHERE { ?s rdfs:label ?label . }""" # service_description = """@prefix dc: . # @prefix ent: . # @prefix rdfs: . # @prefix sd: . # a sd:Service ; # rdfs:label "SPARQL endpoint for RDFLib graph" ; # dc:description "A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. [Source code](https://github.com/vemonet/rdflib-endpoint)" ; # sd:defaultDataset [ a sd:Dataset ; # sd:defaultGraph [ a sd:Graph ] ] ; # sd:defaultEntailmentRegime ent:RDFS ; # sd:endpoint ; # sd:extensionFunction ; # sd:feature sd:DereferencesURIs ; # sd:resultFormat , # ; # sd:supportedLanguage sd:SPARQL11Query . # a sd:Function .""" rdflib-endpoint-0.5.1/tests/test_parse_accept.py000066400000000000000000000014271462402450000220020ustar00rootroot00000000000000import pytest import rdflib_endpoint.sparql_router accept_cases = [ ("text/xml", "text/xml"), ("text/rdf+xml, text/xml, */*", "text/rdf+xml"), ("text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", "text/html"), ("text/html, application/xhtml+xml, application/xml;q=0.9, */*;q=", "text/html"), ("text/html;q=0.3, application/xhtml+xml, application/xml;q=0.9, */*;q=0.8", "application/xhtml+xml"), ( 'text/turtle;q=0.9;profile="urn:example:profile-1", text/turtle;q=0.7;profile="urn:example:profile-2"', "text/turtle", ), ] @pytest.mark.parametrize("accept,expected", accept_cases) def test_accept_preference(accept, expected): pref = rdflib_endpoint.sparql_router.parse_accept_header(accept) assert pref[0] == expected rdflib-endpoint-0.5.1/tests/test_rdflib_endpoint.py000066400000000000000000000172471462402450000225220ustar00rootroot00000000000000import pytest from example.app.main import custom_concat from fastapi.testclient import TestClient from rdflib import RDFS, Graph, Literal, URIRef from rdflib_endpoint import SparqlEndpoint graph = Graph() @pytest.fixture(autouse=True) def clear_graph(): # Workaround to clear graph without putting # graph, app and endpoint into a fixture # and modifying the test fixture usage. for triple in graph: graph.remove(triple) app = SparqlEndpoint( graph=graph, functions={ "https://w3id.org/um/sparql-functions/custom_concat": custom_concat, }, enable_update=True, ) endpoint = TestClient(app) def test_service_description(): response = endpoint.get("/", headers={"accept": "text/turtle"}) assert response.status_code == 200 assert response.text.strip() == service_description response = endpoint.post("/", headers={"accept": "text/turtle"}) assert response.status_code == 200 assert response.text.strip() == service_description # Check for application/xml response = endpoint.post("/", headers={"accept": "application/xml"}) assert response.status_code == 200 def test_custom_concat_json(): response = endpoint.get("/", params={"query": concat_select}, headers={"accept": "application/json"}) # print(response.json()) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" response = endpoint.post("/", data={"query": concat_select}, headers={"accept": "application/json"}) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" response = endpoint.post( "/", data=concat_select, headers={"accept": "application/json", "content-type": "application/sparql-query"} ) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" def test_select_noaccept_xml(): response = endpoint.post("/", data={"query": concat_select}) assert response.status_code == 200 assert response.text.startswith(" DELETE { ?subject rdfs:label "foo" } INSERT { ?subject rdfs:label "bar" } WHERE { ?subject rdfs:label "foo" } """ @pytest.mark.parametrize( "api_key,key_provided,param_method", [ (api_key, key_provided, param_method) for api_key in [None, "key"] for key_provided in [True, False] for param_method in ["body_form", "body_direct"] ], ) def test_sparql_update(api_key, key_provided, param_method, monkeypatch): if api_key: monkeypatch.setenv("RDFLIB_APIKEY", api_key) subject = URIRef("http://server.test/subject") headers = {} if key_provided: headers["Authorization"] = "Bearer key" graph.add((subject, RDFS.label, Literal("foo"))) if param_method == "body_form": request_args = {"data": {"update": label_patch}} else: # direct headers["content-type"] = "application/sparql-update" request_args = {"data": label_patch} response = endpoint.post("/", headers=headers, **request_args) if api_key is None or key_provided: assert response.status_code == 204 assert (subject, RDFS.label, Literal("foo")) not in graph assert (subject, RDFS.label, Literal("bar")) in graph else: assert response.status_code == 403 assert (subject, RDFS.label, Literal("foo")) in graph assert (subject, RDFS.label, Literal("bar")) not in graph def test_sparql_query_update_fail(): response = endpoint.post("/", data={"update": label_patch, "query": label_patch}) assert response.status_code == 400 def test_multiple_accept_return_json(): response = endpoint.get( "/", params={"query": concat_select}, headers={"accept": "text/html;q=0.3, application/xml;q=0.9, application/json, */*;q=0.8"}, ) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" def test_multiple_accept_return_json2(): response = endpoint.get( "/", params={"query": concat_select}, headers={"accept": "text/html;q=0.3, application/json, application/xml;q=0.9, */*;q=0.8"}, ) assert response.status_code == 200 assert response.json()["results"]["bindings"][0]["concat"]["value"] == "Firstlast" def test_fail_select_turtle(): response = endpoint.post("/", data={"query": concat_select}, headers={"accept": "text/turtle"}) assert response.status_code == 422 def test_concat_construct_turtle(): response = endpoint.post( "/", data={"query": custom_concat_construct}, headers={"accept": "text/turtle"}, ) assert response.status_code == 200 assert response.text.startswith("@prefix ") def test_concat_construct_csv(): response = endpoint.post( "/", data={"query": custom_concat_construct}, headers={"accept": "text/csv"}, ) assert response.status_code == 200 assert response.text.startswith("@prefix ") def test_concat_construct_jsonld(): response = endpoint.post( "/", data={"query": custom_concat_construct}, headers={"accept": "application/json"}, ) assert response.status_code == 200 assert response.json()[0]["@id"] == "http://example.com/test" def test_concat_construct_xml(): # expected to return turtle response = endpoint.post( "/", data={"query": custom_concat_construct}, headers={"accept": "application/xml"}, ) assert response.status_code == 200 assert response.text.startswith(" SELECT ?concat ?concatLength WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""" custom_concat_construct = """PREFIX myfunctions: CONSTRUCT { ?concat, ?concatLength . } WHERE { BIND("First" AS ?first) BIND(myfunctions:custom_concat(?first, "last") AS ?concat) }""" service_description = """@prefix dc: . @prefix ent: . @prefix rdfs: . @prefix sd: . a sd:Function . a sd:Service ; rdfs:label "SPARQL endpoint for RDFLib graph" ; dc:description "A SPARQL endpoint to serve machine learning models, or any other logic implemented in Python. [Source code](https://github.com/vemonet/rdflib-endpoint)" ; sd:defaultDataset [ a sd:Dataset ; sd:defaultGraph [ a sd:Graph ] ] ; sd:defaultEntailmentRegime ent:RDFS ; sd:endpoint ; sd:extensionFunction ; sd:feature sd:DereferencesURIs ; sd:resultFormat , ; sd:supportedLanguage sd:SPARQL11Query .""" rdflib-endpoint-0.5.1/tests/test_serve_cli.py000066400000000000000000000050321462402450000213200ustar00rootroot00000000000000import glob import os import tempfile from unittest.mock import MagicMock, patch from click.testing import CliRunner from rdflib_endpoint.__main__ import cli runner = CliRunner() out_formats = ["ttl", "nt", "xml", "jsonld", "trig"] def test_convert(): for out_format in out_formats: with tempfile.NamedTemporaryFile(delete=True) as tmp_file: out_file = str(f"{tmp_file}.{out_format}") result = runner.invoke( cli, ["convert", "tests/resources/test2.ttl", "--output", out_file], ) assert result.exit_code == 0 with open(out_file) as file: content = file.read() assert len(content) > 1 # Fix issue with python creating unnecessary temp files on disk for f in glob.glob(" 1 # Fix issue with python creating unnecessary temp files on disk for f in glob.glob(" None: """Test serve, mock uvicorn.run to prevent API hanging""" mock_run.return_value = None result = runner.invoke( cli, [ "serve", "tests/resources/test.nq", "tests/resources/test2.ttl", "tests/resources/another.jsonld", ], ) assert result.exit_code == 0 @patch("rdflib_endpoint.__main__.uvicorn.run") def test_serve_oxigraph(mock_run: MagicMock) -> None: """Test serve oxigraph, mock uvicorn.run to prevent API hanging""" mock_run.return_value = None result = runner.invoke( cli, [ "serve", "--store", "oxigraph", "tests/resources/test.nq", "tests/resources/test2.ttl", "tests/resources/another.jsonld", ], ) assert result.exit_code == 0