Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add wikidata query endpoint #126

Merged
merged 8 commits into from
Apr 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

.DS_Store
.env
/keypair
.venv/**
venv
__pycache__/**
*.pyc
**/mydb/**
73 changes: 73 additions & 0 deletions backend/project/app/qlever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This helper class is used to interact with the Wikidata API
import requests
import json
from typing import List

class QleverAPI:
def __init__(self):
self.endpoint_url = "https://qlever.cs.uni-freiburg.de/api/wikidata"
self.params = {
"action": "wbsearchentities",
"format": "json",
"language": "en",
}

# Send a semantic query to the Wikidata API
def execute_query(self, query):
try:
response = requests.get(
self.endpoint_url,
params={'query': query, 'format': 'json'}
)
response.raise_for_status()
print(response)
return response.json()
except requests.exceptions.RequestException as e:
print("Error:", e)
return None

def film_pattern_query(self, pattern, limit):

pattern = pattern.lower()
# remove spaces from the pattern
pattern = pattern.replace(" ", "")

SPARQL = f"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {{
{{
SELECT ?film ?filmLabel ?filmId (1 as ?order) WHERE {{
?film wdt:P31 wd:Q11424;
rdfs:label ?filmLabel.
FILTER(LANG(?filmLabel) = "en")
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "{pattern}"))
}}
}}
UNION
{{
SELECT ?film ?filmLabel ?filmId (2 as ?order) WHERE {{
?film wdt:P31 wd:Q11424;
rdfs:label ?filmLabel.
FILTER(LANG(?filmLabel) = "en")
BIND(REPLACE(LCASE(?filmLabel), " ", "") AS ?formattedLabel)
FILTER(REGEX(?formattedLabel, "{pattern}", "i"))
FILTER (!STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "{pattern}"))
}}
}}
}}
ORDER BY ?order
LIMIT {limit}
"""

print(SPARQL)

results = self.execute_query(SPARQL)

return results





7 changes: 7 additions & 0 deletions backend/project/app/serializers.py
Original file line number Diff line number Diff line change
@@ -86,3 +86,10 @@ class Meta:
model = Actor
fields = ['name', 'surname', 'description']


class WikidataQuerySerializer(serializers.Serializer):
query = serializers.CharField()

class FilmPatternWithLimitQuerySerializer(serializers.Serializer):
pattern = serializers.CharField()
limit = serializers.IntegerField()
10 changes: 7 additions & 3 deletions backend/project/app/urls.py
Original file line number Diff line number Diff line change
@@ -3,17 +3,21 @@
from app import views
from django.urls import path
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView
from .views import film_api, film_detail_api, RegisterView, MyObtainTokenPairView, LogoutView, VerifyEmail
from .views import film_api, film_detail_api, RegisterView, execute_query, query_film_pattern, MyObtainTokenPairView, LogoutView, VerifyEmail
from rest_framework_simplejwt.views import TokenRefreshView


urlpatterns = [
path('film/schema/', SpectacularAPIView.as_view(), name='schema'),
path('film/schema/swagger-ui/', SpectacularSwaggerView.as_view(url_name='schema'), name='swagger-ui'),
path('film/', film_api, name='film-list'),
path('film/<int:id>/', film_detail_api, name='film-detail'),
path('login/', MyObtainTokenPairView.as_view(), name='token_obtain_pair'),
path('login/refresh/', TokenRefreshView.as_view(), name='token_refresh'),
path('register/', RegisterView.as_view(), name='auth_register'),
path('register/', RegisterView.as_view(), name='auth_register'),
path('wikidata-query/', execute_query, name='wikidata-query'),
path('query-film-pattern/', query_film_pattern, name='query-film-pattern'),
path('logout/', LogoutView.as_view(), name='logout'),
path('email-verify/', VerifyEmail.as_view(), name='email-verify'),
path('email-verify/', VerifyEmail.as_view(), name='email-verify')
]

80 changes: 77 additions & 3 deletions backend/project/app/views.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
from django.conf import settings
from django.http.response import JsonResponse
from django.contrib.auth.models import User
from app.models import Film, Genre, Director, Actor
from app.serializers import *
#from app.serializers import UserSerializer, FilmSerializer, GenreSerializer, DirectorSerializer, ActorSerializer,WikidataQuerySerializer, FilmPatternWithLimitQuerySerializer, MyTokenObtainPairSerializer, LogoutSerializer
from rest_framework import permissions, status , viewsets, generics
from rest_framework.response import Response
from rest_framework_simplejwt.tokens import RefreshToken
@@ -14,9 +17,13 @@
from rest_framework.permissions import AllowAny, IsAuthenticated
import jwt
from rest_framework_simplejwt.views import TokenObtainPairView

from app.models import Film, Genre, Director, Actor
from app.serializers import UserSerializer, FilmSerializer, GenreSerializer, DirectorSerializer, ActorSerializer, RegisterSerializer, LogoutSerializer, MyTokenObtainPairSerializer
from rest_framework.permissions import IsAuthenticated
from .serializers import RegisterSerializer
from rest_framework import generics
from app.wikidata import WikidataAPI
from app.qlever import QleverAPI
from rest_framework.response import Response
from rest_framework import status
from .utils import Util


@@ -169,3 +176,70 @@ def film_detail_api(request, id):
return JsonResponse("Film Deleted Successfully", safe=False)


# A simple endpoint for sending a semantic query to the Wikidata API
@extend_schema(
description="API endpoint for sending a semantic query to the Wikidata API.",
methods=['POST'],
request=WikidataQuerySerializer,
)
@api_view(['POST'])
def execute_query(request):
"""
Allow users to send a semantic query to the Wikidata API.
"""
if request.method == 'POST':
serializer = WikidataQuerySerializer(data=request.data)
if serializer.is_valid():
query_text = serializer.validated_data.get('query')

# Execute the query using the WikidataAPI class
wikidata_api = WikidataAPI()
results = wikidata_api.execute_query(query_text)

print(results)

return Response(results)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)


# Find films with a pattern string and a limit value
@extend_schema(
description="API endpoint for finding films with a pattern string and a limit value.",
methods=['POST'],
request=FilmPatternWithLimitQuerySerializer,
)
@api_view(['POST'])
def query_film_pattern(request):
"""
Find films with a pattern string and a limit value using Qlever.
"""
if request.method == 'POST':
serializer = FilmPatternWithLimitQuerySerializer(data=request.data)
if serializer.is_valid():
pattern = serializer.validated_data.get('pattern')
limit = serializer.validated_data.get('limit')

# Execute the query using the Qlever class
qlever = QleverAPI()
results = qlever.film_pattern_query(pattern, limit)

print(results)

# change response format
# get only film ids and labels
results = results['results']['bindings']
films = []
for result in results:
film = {
'id': result['film']['value'],
'label': result['filmLabel']['value']
}
films.append(film)
return Response(films)

else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)



37 changes: 37 additions & 0 deletions backend/project/app/wikidata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This helper class is used to interact with the Wikidata API
import requests
import json
from typing import List

QUERY = """
SELECT ?item ?itemLabel ?itemDescription ?itemAltLabel WHERE {
?item wdt:P31 wd:Q11424.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

class WikidataAPI:
def __init__(self):
self.endpoint_url = "https://query.wikidata.org/sparql"
self.params = {
"action": "wbsearchentities",
"format": "json",
"language": "en",
}

# Send a semantic query to the Wikidata API
def execute_query(self, query):
try:
response = requests.get(
self.endpoint_url,
params={'query': query, 'format': 'json'}
)
response.raise_for_status()
print(response)
return response.json()
except requests.exceptions.RequestException as e:
print("Error:", e)
return None



6 changes: 6 additions & 0 deletions backend/project/project/settings.py
Original file line number Diff line number Diff line change
@@ -44,6 +44,12 @@
'PASSWORD': 'password', # MySQL password
'HOST': 'db', # Host where MySQL is running (in this case, Docker container)
'PORT': '3306', # Port where MySQL is running (in this case, Docker container)
}
}
"""

"""
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
5 changes: 3 additions & 2 deletions backend/project/docker-compose.yml → docker-compose.yml
Original file line number Diff line number Diff line change
@@ -17,11 +17,12 @@ services:
- backend-network

backend:
image: semanticflix_backend
build:
context: .
context: ./backend/project
dockerfile: Dockerfile
container_name: semanticflix_backend
command: sh -c "python3 manage.py migrate --noinput && python3 manage.py collectstatic --noinput && python manage.py runserver 0.0.0.0:8000"
command: sh -c "python backend/project/manage.py migrate --noinput && python backend/project/manage.py collectstatic --noinput && python backend/project/manage.py runserver 0.0.0.0:8000"
restart: always
volumes:
- .:/app
54 changes: 54 additions & 0 deletions sample_queries.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
======= Films played by dicaprio
{
"query": "SELECT ?film ?filmLabel WHERE { ?film wdt:P31 wd:Q11424; wdt:P161 wd:Q38111. SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } }"
}
=======


===== Film ids starting with the given string
{
"query": "SELECT DISTINCT ?film ?filmLabel ?filmId WHERE { ?film wdt:P31 wd:Q11424; rdfs:label ?filmLabel. FILTER(STRSTARTS(?filmLabel, \"OPP\")) OPTIONAL { ?film wdt:P345 ?filmId. } SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } } LIMIT 10"
}

=== Better version of it
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "opportunityk")) # Matches films with labels starting with "A" (case-insensitive and ignoring spaces)
}
LIMIT 3
====


====
Final film listing sparql

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {
{
SELECT ?film ?filmLabel ?filmId (1 as ?order) WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "oppen")) # Matches labels starting with "oppen" (case-insensitive and ignoring spaces)
}
}
UNION
{
SELECT ?film ?filmLabel ?filmId (2 as ?order) WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(REGEX(?filmLabel, "oppen", "i")) # Matches labels containing "oppen" (case-insensitive)
FILTER (!STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "oppen")) # Ensure it's not already matched by starts with
}
}
}
ORDER BY ?order
LIMIT 3


====