-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 0150e42
Showing
63 changed files
with
10,639 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
pip-wheel-metadata/ | ||
share/python-wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.nox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
*.py,cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
db.sqlite3-journal | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# IPython | ||
profile_default/ | ||
ipython_config.py | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# pipenv | ||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. | ||
# However, in case of collaboration, if having platform-specific dependencies or dependencies | ||
# having no cross-platform support, pipenv may install dependencies that don't work, or not | ||
# install all needed dependencies. | ||
#Pipfile.lock | ||
|
||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow | ||
__pypackages__/ | ||
|
||
# Celery stuff | ||
celerybeat-schedule | ||
celerybeat.pid | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
.dmypy.json | ||
dmypy.json | ||
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# IDE | ||
/.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
BSD 3-Clause License | ||
|
||
Copyright (c) 2022, Sandra Mierz | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
|
||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
3. Neither the name of the copyright holder nor the names of its | ||
contributors may be used to endorse or promote products derived from | ||
this software without specific prior written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | ||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | ||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | ||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | ||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# diophila | ||
Python API Wrapper for [OpenAlex](http://openalex.org/). | ||
|
||
### Entities / Endpoints | ||
OpenAlex currently describes five different [scholarly entity types](https://docs.openalex.org/about-the-data) | ||
and their connections: | ||
* [Authors](https://docs.openalex.org/about-the-data/author) | ||
* [Concepts](https://docs.openalex.org/about-the-data/concept) | ||
* [Institutions](https://docs.openalex.org/about-the-data/institution) | ||
* [Venues](https://docs.openalex.org/about-the-data/venue) | ||
* [Works](https://docs.openalex.org/about-the-data/work) | ||
|
||
Each entity type comes with an endpoint of the same name that can be queried | ||
for a single (random or specific) entity or multiple (grouped or listed) entities. | ||
|
||
### Installation | ||
```commandline | ||
pip (or pip3) install diophila | ||
``` | ||
|
||
### Usage | ||
First off, you need to initialize a client. The client offers all methods to query the API. | ||
|
||
```Python | ||
from diophila import OpenAlex | ||
|
||
openalex = OpenAlex() | ||
``` | ||
|
||
You can use the client to query for a [single random entity](https://docs.openalex.org/api/get-single-entities#random-entity) | ||
with the method `get_random_<entity>`: | ||
```Python | ||
random_author = openalex.get_random_author() | ||
random_author['orcid'] | ||
``` | ||
|
||
Or if you have a [specific entity](https://docs.openalex.org/api/get-single-entities) in mind, you can use the client | ||
using one of the entity's IDs via the `get_single_<entity>` method: | ||
```Python | ||
specific_work = openalex.get_single_work("https://doi.org/10.1364/PRJ.433188", "doi") | ||
specific_work['display_name'] | ||
``` | ||
|
||
If you are interested in [entities grouped](https://docs.openalex.org/api/get-groups-of-entities) into facets, | ||
use the `get_groups_of_<entities>` method: | ||
```Python | ||
grouped_institutions = openalex.get_groups_of_institutions("type") | ||
for group in grouped_institutions['group_by']: | ||
group['key'] | ||
``` | ||
|
||
And last but not least you can get [multiple entities](https://docs.openalex.org/api/get-lists-of-entities) from a type | ||
in a list by using the `get_list_of_<entities>` method. Note that this method uses pagination, | ||
either [basic paging](https://docs.openalex.org/api#basic-paging) or | ||
[cursor paging](https://docs.openalex.org/api#cursor-paging) | ||
depending on whether the `pages` parameter is supplied: | ||
```Python | ||
# if no `pages` parameter is supplied, we use cursor paging | ||
pages = None | ||
# if `pages` parameter is supplied, we use basic paging | ||
pages = [1, 2, 3] | ||
|
||
filters = {"is_oa": "true", | ||
"works_count": ">200"} | ||
pages_of_venues = openalex.get_list_of_venues(filters=filters, pages=pages) | ||
|
||
for page in pages_of_venues: # loop through pages | ||
for venue in page['results']: # loop though list of venues | ||
venue['id'] | ||
``` | ||
|
||
### The Polite Pool | ||
It's a good idea to use OpenAlex [polite pool](https://docs.openalex.org/api#the-polite-pool) | ||
which offers faster response times for users providing an email address. | ||
If you would like to use it, simply initialize the client with your email address. | ||
|
||
```Python | ||
from diophila import OpenAlex | ||
|
||
openalex = OpenAlex("[email protected]") | ||
``` | ||
|
||
### Rate limits | ||
The API currently doesn't have [rate limits](https://docs.openalex.org/api#rate-limits). | ||
However, if you need more than 100,000 calls per day, | ||
please drop the OpenAlex team a line at [email protected] | ||
or alternatively look into [using a snapshot](https://docs.openalex.org/download-snapshot). | ||
|
||
### Citation | ||
If you are using OpenAlex in your research, they kindly ask you to cite https://doi.org/10.48550/arXiv.2205.01833 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from diophila.openalex import OpenAlex |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
"""This module wraps all API calls to the OpenAlex API.""" | ||
from typing import Optional, List, Iterable | ||
import requests | ||
|
||
|
||
class APICaller: | ||
"""This class wraps all API calls to the OpenAlex API.""" | ||
|
||
# Basic paging only works for to read the first 10,000 results of any list. | ||
# see https://docs.openalex.org/api#basic-paging | ||
PAGING_RESULTS_MAX = 10000 | ||
|
||
# per-page parameter can be any number between 1 and 200 | ||
# see https://docs.openalex.org/api#basic-paging | ||
PER_PAGE_MAX = 200 | ||
|
||
def __init__(self, base_url: str, email: Optional[str] = None) -> object: | ||
""" Init API caller, preferably with an email to get into the polite pool.""" | ||
self.base_url = base_url | ||
self.headers = {'Accept': 'application/json'} | ||
if email: | ||
self.headers['User-Agent'] = f'mailto:{email}' | ||
|
||
def get(self, path: str, params: Optional[dict] = None) -> dict: | ||
""" Make a GET request to the API. | ||
Args: | ||
path (str): path that will be concatenated to the base URL of the OpenAlex API. | ||
params (Optional[dict]): dictionary containing items that will be constructed | ||
into a query string, optional. | ||
Returns: | ||
JSON object from HTTP response. | ||
""" | ||
response = requests.get(url=f"{self.base_url}/{path}", | ||
params=params, | ||
headers=self.headers) | ||
response.raise_for_status() | ||
result = response.json() | ||
return result | ||
|
||
def get_all(self, | ||
path: str, | ||
params: Optional[dict] = None, | ||
per_page: Optional[int] = None, | ||
pages: Optional[List[int]] = None) -> Iterable: | ||
""" Make multiple GET requests to the API to paginate through results. | ||
Args: | ||
path (str): path that will be concatenated to the base URL of the OpenAlex API. | ||
params (Optional[dict]): dictionary containing items that will be constructed | ||
into a query string, optional. | ||
per_page (Optional[int]): number of entities per page. Needs to be in [1;200]. | ||
Defaults to 25. | ||
pages (Optional[List[int]]): list of page numbers to query from API, optional. | ||
If empty, cursor pagination will be used. | ||
Returns: | ||
Generator, each item a dict from JSON representing a (partial) list of entities. | ||
""" | ||
params['per_page'] = self.__validate_per_page_param(per_page) | ||
if pages: | ||
return self.__do_basic_paging(path, params, pages) | ||
# else: | ||
return self.__do_cursor_paging(path, params) | ||
|
||
def __do_basic_paging(self, path: str, params: dict, pages: List[int]): | ||
""" Use basic pagination to loop thought the specified result pages. """ | ||
pages = self.__validate_pages(pages, params['per_page']) | ||
for page in pages: | ||
params['page'] = page | ||
yield self.get(path, params) | ||
|
||
def __do_cursor_paging(self, path: str, params: dict): | ||
""" Use cursor pagination to loop thought the results. """ | ||
params['cursor'] = "*" # start cursor pagination | ||
while True: | ||
json_response = self.get(path, params) | ||
yield json_response | ||
|
||
next_cursor = json_response['meta']['next_cursor'] | ||
if not next_cursor: | ||
break | ||
|
||
params['cursor'] = next_cursor | ||
|
||
def __validate_per_page_param(self, per_page: int) -> Optional[int]: | ||
"""Helper method validating the 'per_page' parameter.""" | ||
if not per_page or per_page <= 0: | ||
return 25 # set to default | ||
if 0 < per_page <= self.PER_PAGE_MAX: | ||
return per_page | ||
# elif per_page > self.PER_PAGE_MAX: | ||
return self.PER_PAGE_MAX | ||
|
||
def __validate_pages(self, pages, per_page): | ||
"""Helper method validating the 'pages' parameter.""" | ||
max_pages = self.PAGING_RESULTS_MAX / per_page | ||
valid_pages = [page for page in pages if 0 < page <= max_pages] | ||
return valid_pages |
Oops, something went wrong.