Skip to content

Commit

Permalink
Merge pull request #126 from bounswe/backend-wikidata-api
Browse files Browse the repository at this point in the history
feat: add wikidata query endpoint

Conflicts solved in the other branch, ready to merge.
iremnuy authored Apr 29, 2024
2 parents a01eebb + b14b162 commit 0ee9503
Showing 9 changed files with 267 additions and 8 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@

.DS_Store
.env
/keypair
.venv/**
venv
__pycache__/**
*.pyc
**/mydb/**
73 changes: 73 additions & 0 deletions backend/project/app/qlever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This helper class is used to interact with the Wikidata API
import requests
import json
from typing import List

class QleverAPI:
def __init__(self):
self.endpoint_url = "https://qlever.cs.uni-freiburg.de/api/wikidata"
self.params = {
"action": "wbsearchentities",
"format": "json",
"language": "en",
}

# Send a semantic query to the Wikidata API
def execute_query(self, query):
try:
response = requests.get(
self.endpoint_url,
params={'query': query, 'format': 'json'}
)
response.raise_for_status()
print(response)
return response.json()
except requests.exceptions.RequestException as e:
print("Error:", e)
return None

def film_pattern_query(self, pattern, limit):

pattern = pattern.lower()
# remove spaces from the pattern
pattern = pattern.replace(" ", "")

SPARQL = f"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {{
{{
SELECT ?film ?filmLabel ?filmId (1 as ?order) WHERE {{
?film wdt:P31 wd:Q11424;
rdfs:label ?filmLabel.
FILTER(LANG(?filmLabel) = "en")
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "{pattern}"))
}}
}}
UNION
{{
SELECT ?film ?filmLabel ?filmId (2 as ?order) WHERE {{
?film wdt:P31 wd:Q11424;
rdfs:label ?filmLabel.
FILTER(LANG(?filmLabel) = "en")
BIND(REPLACE(LCASE(?filmLabel), " ", "") AS ?formattedLabel)
FILTER(REGEX(?formattedLabel, "{pattern}", "i"))
FILTER (!STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "{pattern}"))
}}
}}
}}
ORDER BY ?order
LIMIT {limit}
"""

print(SPARQL)

results = self.execute_query(SPARQL)

return results





7 changes: 7 additions & 0 deletions backend/project/app/serializers.py
Original file line number Diff line number Diff line change
@@ -86,3 +86,10 @@ class Meta:
model = Actor
fields = ['name', 'surname', 'description']


class WikidataQuerySerializer(serializers.Serializer):
query = serializers.CharField()

class FilmPatternWithLimitQuerySerializer(serializers.Serializer):
pattern = serializers.CharField()
limit = serializers.IntegerField()
10 changes: 7 additions & 3 deletions backend/project/app/urls.py
Original file line number Diff line number Diff line change
@@ -3,17 +3,21 @@
from app import views
from django.urls import path
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView
from .views import film_api, film_detail_api, RegisterView, MyObtainTokenPairView, LogoutView, VerifyEmail
from .views import film_api, film_detail_api, RegisterView, execute_query, query_film_pattern, MyObtainTokenPairView, LogoutView, VerifyEmail
from rest_framework_simplejwt.views import TokenRefreshView


urlpatterns = [
path('film/schema/', SpectacularAPIView.as_view(), name='schema'),
path('film/schema/swagger-ui/', SpectacularSwaggerView.as_view(url_name='schema'), name='swagger-ui'),
path('film/', film_api, name='film-list'),
path('film/<int:id>/', film_detail_api, name='film-detail'),
path('login/', MyObtainTokenPairView.as_view(), name='token_obtain_pair'),
path('login/refresh/', TokenRefreshView.as_view(), name='token_refresh'),
path('register/', RegisterView.as_view(), name='auth_register'),
path('register/', RegisterView.as_view(), name='auth_register'),
path('wikidata-query/', execute_query, name='wikidata-query'),
path('query-film-pattern/', query_film_pattern, name='query-film-pattern'),
path('logout/', LogoutView.as_view(), name='logout'),
path('email-verify/', VerifyEmail.as_view(), name='email-verify'),
path('email-verify/', VerifyEmail.as_view(), name='email-verify')
]

80 changes: 77 additions & 3 deletions backend/project/app/views.py
Original file line number Diff line number Diff line change
@@ -6,6 +6,9 @@
from django.conf import settings
from django.http.response import JsonResponse
from django.contrib.auth.models import User
from app.models import Film, Genre, Director, Actor
from app.serializers import *
#from app.serializers import UserSerializer, FilmSerializer, GenreSerializer, DirectorSerializer, ActorSerializer,WikidataQuerySerializer, FilmPatternWithLimitQuerySerializer, MyTokenObtainPairSerializer, LogoutSerializer
from rest_framework import permissions, status , viewsets, generics
from rest_framework.response import Response
from rest_framework_simplejwt.tokens import RefreshToken
@@ -14,9 +17,13 @@
from rest_framework.permissions import AllowAny, IsAuthenticated
import jwt
from rest_framework_simplejwt.views import TokenObtainPairView

from app.models import Film, Genre, Director, Actor
from app.serializers import UserSerializer, FilmSerializer, GenreSerializer, DirectorSerializer, ActorSerializer, RegisterSerializer, LogoutSerializer, MyTokenObtainPairSerializer
from rest_framework.permissions import IsAuthenticated
from .serializers import RegisterSerializer
from rest_framework import generics
from app.wikidata import WikidataAPI
from app.qlever import QleverAPI
from rest_framework.response import Response
from rest_framework import status
from .utils import Util


@@ -169,3 +176,70 @@ def film_detail_api(request, id):
return JsonResponse("Film Deleted Successfully", safe=False)


# A simple endpoint for sending a semantic query to the Wikidata API
@extend_schema(
description="API endpoint for sending a semantic query to the Wikidata API.",
methods=['POST'],
request=WikidataQuerySerializer,
)
@api_view(['POST'])
def execute_query(request):
"""
Allow users to send a semantic query to the Wikidata API.
"""
if request.method == 'POST':
serializer = WikidataQuerySerializer(data=request.data)
if serializer.is_valid():
query_text = serializer.validated_data.get('query')

# Execute the query using the WikidataAPI class
wikidata_api = WikidataAPI()
results = wikidata_api.execute_query(query_text)

print(results)

return Response(results)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)


# Find films with a pattern string and a limit value
@extend_schema(
description="API endpoint for finding films with a pattern string and a limit value.",
methods=['POST'],
request=FilmPatternWithLimitQuerySerializer,
)
@api_view(['POST'])
def query_film_pattern(request):
"""
Find films with a pattern string and a limit value using Qlever.
"""
if request.method == 'POST':
serializer = FilmPatternWithLimitQuerySerializer(data=request.data)
if serializer.is_valid():
pattern = serializer.validated_data.get('pattern')
limit = serializer.validated_data.get('limit')

# Execute the query using the Qlever class
qlever = QleverAPI()
results = qlever.film_pattern_query(pattern, limit)

print(results)

# change response format
# get only film ids and labels
results = results['results']['bindings']
films = []
for result in results:
film = {
'id': result['film']['value'],
'label': result['filmLabel']['value']
}
films.append(film)
return Response(films)

else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)



37 changes: 37 additions & 0 deletions backend/project/app/wikidata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This helper class is used to interact with the Wikidata API
import requests
import json
from typing import List

QUERY = """
SELECT ?item ?itemLabel ?itemDescription ?itemAltLabel WHERE {
?item wdt:P31 wd:Q11424.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
"""

class WikidataAPI:
def __init__(self):
self.endpoint_url = "https://query.wikidata.org/sparql"
self.params = {
"action": "wbsearchentities",
"format": "json",
"language": "en",
}

# Send a semantic query to the Wikidata API
def execute_query(self, query):
try:
response = requests.get(
self.endpoint_url,
params={'query': query, 'format': 'json'}
)
response.raise_for_status()
print(response)
return response.json()
except requests.exceptions.RequestException as e:
print("Error:", e)
return None



6 changes: 6 additions & 0 deletions backend/project/project/settings.py
Original file line number Diff line number Diff line change
@@ -44,6 +44,12 @@
'PASSWORD': 'password', # MySQL password
'HOST': 'db', # Host where MySQL is running (in this case, Docker container)
'PORT': '3306', # Port where MySQL is running (in this case, Docker container)
}
}
"""

"""
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
5 changes: 3 additions & 2 deletions backend/project/docker-compose.yml → docker-compose.yml
Original file line number Diff line number Diff line change
@@ -17,11 +17,12 @@ services:
- backend-network

backend:
image: semanticflix_backend
build:
context: .
context: ./backend/project
dockerfile: Dockerfile
container_name: semanticflix_backend
command: sh -c "python3 manage.py migrate --noinput && python3 manage.py collectstatic --noinput && python manage.py runserver 0.0.0.0:8000"
command: sh -c "python backend/project/manage.py migrate --noinput && python backend/project/manage.py collectstatic --noinput && python backend/project/manage.py runserver 0.0.0.0:8000"
restart: always
volumes:
- .:/app
54 changes: 54 additions & 0 deletions sample_queries.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
======= Films played by dicaprio
{
"query": "SELECT ?film ?filmLabel WHERE { ?film wdt:P31 wd:Q11424; wdt:P161 wd:Q38111. SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } }"
}
=======


===== Film ids starting with the given string
{
"query": "SELECT DISTINCT ?film ?filmLabel ?filmId WHERE { ?film wdt:P31 wd:Q11424; rdfs:label ?filmLabel. FILTER(STRSTARTS(?filmLabel, \"OPP\")) OPTIONAL { ?film wdt:P345 ?filmId. } SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } } LIMIT 10"
}

=== Better version of it
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "opportunityk")) # Matches films with labels starting with "A" (case-insensitive and ignoring spaces)
}
LIMIT 3
====


====
Final film listing sparql

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {
{
SELECT ?film ?filmLabel ?filmId (1 as ?order) WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "oppen")) # Matches labels starting with "oppen" (case-insensitive and ignoring spaces)
}
}
UNION
{
SELECT ?film ?filmLabel ?filmId (2 as ?order) WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(REGEX(?filmLabel, "oppen", "i")) # Matches labels containing "oppen" (case-insensitive)
FILTER (!STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "oppen")) # Ensure it's not already matched by starts with
}
}
}
ORDER BY ?order
LIMIT 3


====

0 comments on commit 0ee9503

Please sign in to comment.