Skip to content

Commit

Permalink
tries fixing wd_query parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed May 4, 2024
1 parent 974e423 commit 022a336
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
6 changes: 3 additions & 3 deletions snapquery/snapquery_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ class NamedQuery:
# sparql query (to be hidden later)
sparql: str
# the url of the source code of the query
url: Optional[str]
url: Optional[str]=None
# one line title
title: Optional[str]
title: Optional[str]=None
# multiline description
description: Optional[str]
description: Optional[str]=None

def __post_init__(self):
"""
Expand Down
35 changes: 21 additions & 14 deletions tests/test_wd_query_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,25 @@
"""

import pprint
import unittest
import re
import requests
import wikitextparser as wtp
from wikitextparser import Section, Template
from ngwidgets.basetest import Basetest
from snapquery.snapquery_core import NamedQuery, NamedQueryManager

from snapquery.snapquery_core import NamedQuery


class TestWdQueryParsing(unittest.TestCase):
class TestWdQueryParsing(Basetest):
"""
test wikidata query parsing
"""

def setUp(self, debug=True, profile=True):
Basetest.setUp(self, debug=debug, profile=profile)
self.base_url = "https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples"

def _get_examples_wikitext(self) -> str:
"""Get wiki text with SPARQL query examples"""
url = "https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples?action=raw"
res = requests.get(url)
res = requests.get(f"{self.base_url}?action=raw")
return res.text

def _extract_query_from_section(self, section: Section) -> NamedQuery:
Expand All @@ -35,6 +40,7 @@ def _extract_query_from_section(self, section: Section) -> NamedQuery:
name=title,
title=title,
description=desc,
url=self.base_url,
sparql=query.arguments[0].value,
)
return named_query
Expand Down Expand Up @@ -71,13 +77,14 @@ def test_wikidata_examples_query_extraction(self):
"""
wikitext = self._get_examples_wikitext()
parsed = wtp.parse(wikitext)
queries = []
lod = []
for section in parsed.sections:
nq = self._extract_query_from_section(section)
if nq:
queries.append(nq)
pprint.pprint(nq)


if __name__ == "__main__":
unittest.main()
lod.append(nq.as_record())
if self.debug:
pprint.pprint(nq)
if self.debug:
print(f"found {len(lod)} queries")
nqm = NamedQueryManager.from_samples()
nqm.store(lod)

0 comments on commit 022a336

Please sign in to comment.