Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add wikidata query endpoint #126

Merged
merged 8 commits into from
Apr 29, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
feat: implement film search on qlever
hikasap committed Apr 29, 2024
commit 09bddd8122e500cf0a47a658e0068e372da13f37
73 changes: 73 additions & 0 deletions backend/project/app/qlever.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# This helper class is used to interact with the Wikidata API
import requests
import json
from typing import List

class QleverAPI:
def __init__(self):
self.endpoint_url = "https://qlever.cs.uni-freiburg.de/api/wikidata"
self.params = {
"action": "wbsearchentities",
"format": "json",
"language": "en",
}

# Send a semantic query to the Wikidata API
def execute_query(self, query):
try:
response = requests.get(
self.endpoint_url,
params={'query': query, 'format': 'json'}
)
response.raise_for_status()
print(response)
return response.json()
except requests.exceptions.RequestException as e:
print("Error:", e)
return None

def film_pattern_query(self, pattern, limit):

pattern = pattern.lower()
# remove spaces from the pattern
pattern = pattern.replace(" ", "")

SPARQL = f"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {{
{{
SELECT ?film ?filmLabel ?filmId (1 as ?order) WHERE {{
?film wdt:P31 wd:Q11424;
rdfs:label ?filmLabel.
FILTER(LANG(?filmLabel) = "en")
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "{pattern}"))
}}
}}
UNION
{{
SELECT ?film ?filmLabel ?filmId (2 as ?order) WHERE {{
?film wdt:P31 wd:Q11424;
rdfs:label ?filmLabel.
FILTER(LANG(?filmLabel) = "en")
BIND(REPLACE(LCASE(?filmLabel), " ", "") AS ?formattedLabel)
FILTER(REGEX(?formattedLabel, "{pattern}", "i"))
FILTER (!STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "{pattern}"))
}}
}}
}}
ORDER BY ?order
LIMIT {limit}
"""

print(SPARQL)

results = self.execute_query(SPARQL)

return results





6 changes: 5 additions & 1 deletion backend/project/app/serializers.py
Original file line number Diff line number Diff line change
@@ -79,4 +79,8 @@ class Meta:


class WikidataQuerySerializer(serializers.Serializer):
query = serializers.CharField()
query = serializers.CharField()

class FilmPatternWithLimitQuerySerializer(serializers.Serializer):
pattern = serializers.CharField()
limit = serializers.IntegerField()
9 changes: 5 additions & 4 deletions backend/project/app/urls.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
from app import views
from django.urls import path
from drf_spectacular.views import SpectacularAPIView, SpectacularSwaggerView
from .views import film_api, film_detail_api, RegisterView, execute_query
from .views import film_api, film_detail_api, RegisterView, execute_query, query_film_pattern
from .views import MyObtainTokenPairView
from rest_framework_simplejwt.views import TokenRefreshView

@@ -13,8 +13,9 @@
path('film/schema/swagger-ui/', SpectacularSwaggerView.as_view(url_name='schema'), name='swagger-ui'),
path('film/', film_api, name='film-list'),
path('film/<int:id>/', film_detail_api, name='film-detail'),
path('login/', MyObtainTokenPairView.as_view(), name='token_obtain_pair'),
path('login/', MyObtainTokenPairView.as_view(), name='token_obtain_pair'),
path('login/refresh/', TokenRefreshView.as_view(), name='token_refresh'),
path('register/', RegisterView.as_view(), name='auth_register'),
path('wikidata-query/', execute_query, name='wikidata-query'),
path('register/', RegisterView.as_view(), name='auth_register'),
path('wikidata-query/', execute_query, name='wikidata-query'),
path('query-film-pattern/', query_film_pattern, name='query-film-pattern'),
]
45 changes: 44 additions & 1 deletion backend/project/app/views.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
from django.http.response import JsonResponse
from django.contrib.auth.models import User
from app.models import Film, Genre, Director, Actor
from app.serializers import UserSerializer, FilmSerializer, GenreSerializer, DirectorSerializer, ActorSerializer, WikidataQuerySerializer
from app.serializers import UserSerializer, FilmSerializer, GenreSerializer, DirectorSerializer, ActorSerializer, WikidataQuerySerializer, FilmPatternWithLimitQuerySerializer
from rest_framework.decorators import api_view, permission_classes
from drf_spectacular.utils import extend_schema
from .serializers import MyTokenObtainPairSerializer
@@ -16,6 +16,7 @@
from .serializers import RegisterSerializer
from rest_framework import generics
from app.wikidata import WikidataAPI
from app.qlever import QleverAPI
from rest_framework.response import Response
from rest_framework import status

@@ -121,3 +122,45 @@ def execute_query(request):
return Response(results)
else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)


# Find films with a pattern string and a limit value
@extend_schema(
description="API endpoint for finding films with a pattern string and a limit value.",
methods=['POST'],
request=FilmPatternWithLimitQuerySerializer,
)
@api_view(['POST'])
def query_film_pattern(request):
"""
Find films with a pattern string and a limit value using Qlever.
"""
if request.method == 'POST':
serializer = FilmPatternWithLimitQuerySerializer(data=request.data)
if serializer.is_valid():
pattern = serializer.validated_data.get('pattern')
limit = serializer.validated_data.get('limit')

# Execute the query using the Qlever class
qlever = QleverAPI()
results = qlever.film_pattern_query(pattern, limit)

print(results)

# change response format
# get only film ids and labels
results = results['results']['bindings']
films = []
for result in results:
film = {
'id': result['film']['value'],
'label': result['filmLabel']['value']
}
films.append(film)
return Response(films)

else:
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)



51 changes: 50 additions & 1 deletion sample_queries.txt
Original file line number Diff line number Diff line change
@@ -2,4 +2,53 @@
{
"query": "SELECT ?film ?filmLabel WHERE { ?film wdt:P31 wd:Q11424; wdt:P161 wd:Q38111. SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } }"
}
=======
=======


===== Film ids starting with the given string
{
"query": "SELECT DISTINCT ?film ?filmLabel ?filmId WHERE { ?film wdt:P31 wd:Q11424; rdfs:label ?filmLabel. FILTER(STRSTARTS(?filmLabel, \"OPP\")) OPTIONAL { ?film wdt:P345 ?filmId. } SERVICE wikibase:label { bd:serviceParam wikibase:language \"[AUTO_LANGUAGE],en\". } } LIMIT 10"
}

=== Better version of it
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "opportunityk")) # Matches films with labels starting with "A" (case-insensitive and ignoring spaces)
}
LIMIT 3
====


====
Final film listing sparql

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT DISTINCT ?film ?filmLabel ?filmId WHERE {
{
SELECT ?film ?filmLabel ?filmId (1 as ?order) WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "oppen")) # Matches labels starting with "oppen" (case-insensitive and ignoring spaces)
}
}
UNION
{
SELECT ?film ?filmLabel ?filmId (2 as ?order) WHERE {
?film wdt:P31 wd:Q11424; # Instance of film
rdfs:label ?filmLabel. # Label of the film
FILTER(LANG(?filmLabel) = "en") # Filter out non-English labels
FILTER(REGEX(?filmLabel, "oppen", "i")) # Matches labels containing "oppen" (case-insensitive)
FILTER (!STRSTARTS(REPLACE(LCASE(?filmLabel), " ", ""), "oppen")) # Ensure it's not already matched by starts with
}
}
}
ORDER BY ?order
LIMIT 3


====