Skip to content

Commit

Permalink
Removed multithreading and added JSON helper methods (#38)
Browse files Browse the repository at this point in the history
* Remove multiprocessing from README

* Add part data container that can encode itself as a JSON string

* Add description of `to_json()` method in PartData

* Remove multithreading and integrate PartData

* Check and make sure that all the parts can be parsed into JSON

* Bump version to 2.2.0
  • Loading branch information
JonathanVusich authored Oct 29, 2019
1 parent cb5ef83 commit 1536c84
Show file tree
Hide file tree
Showing 9 changed files with 57 additions and 71 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
![](https://img.shields.io/pypi/dm/pcpartpicker.svg)

This is an unofficial Python 3.7+ API for the website pcpartpicker.com.
It is written using asynchronous code and multiprocessing for efficient data retrieval.
It is written using asynchronous requests for efficient data retrieval.
This package is currently in a stable beta.

## Installation:
Expand All @@ -25,6 +25,7 @@ api = API()
cpu_data = api.retrieve("cpu")
all_data = api.retrieve_all()
```
`api.retrieve()` and `api.retrieve_all()` methods both return a `PartData` instance, which contains a timestamp and a `to_json()` method.

A list of supported parts can be obtained in the following manner:
```python
Expand Down
2 changes: 1 addition & 1 deletion pcpartpicker/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .api import API

__name__ = ["pcpartpicker"]
__version__ = '2.1.1'
__version__ = '2.2.0'
__author__ = 'Jonathan Vusich'
__email__ = '[email protected]'
23 changes: 5 additions & 18 deletions pcpartpicker/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Set, Dict, List

from .handler import Handler
from .part_data import PartData

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)
Expand All @@ -14,12 +15,8 @@ class API:
the internals and the externally available functions.
"""

def __init__(self, region: str = "us", multithreading=True) -> None:
self._handler = Handler(region, multithreading=multithreading)

@property
def multithreading(self) -> bool:
return self._handler.multithreading
def __init__(self, region: str = "us") -> None:
self._handler = Handler(region)

@property
def supported_regions(self) -> Set[str]:
Expand All @@ -44,17 +41,7 @@ def set_region(self, region: str) -> None:
self._handler.set_region(region)
logger.debug(f"Region set to {self.region}")

def set_multithreading(self, multithreading: bool) -> None:
"""
Function that allows the user to determine whether the scraped HTML is parsed using multiple threads or not.
Single threading is especially useful for debugging purposes.
:param multithreading:
:return:
"""
self._handler.set_multithreading(multithreading)
logger.debug(f"Multithreading set to {self.multithreading}")

def retrieve(self, *args, force_refresh: bool = False) -> Dict[str, List]:
def retrieve(self, *args, force_refresh: bool = False) -> PartData:
"""
Public function that allows the user to make part requests.
Expand All @@ -67,7 +54,7 @@ def retrieve(self, *args, force_refresh: bool = False) -> Dict[str, List]:
logger.debug(f"Retrieving {args}...")
return self._handler.retrieve(*args, force_refresh=force_refresh)

def retrieve_all(self, force_refresh: bool = False) -> Dict[str, List]:
def retrieve_all(self, force_refresh: bool = False) -> PartData:
"""
Public function that allows the user to retrieve all supported part types.
Expand Down
26 changes: 6 additions & 20 deletions pcpartpicker/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .mappings import part_classes
from .parse_utils import parse
from .scraper import Scraper
from .part_data import PartData

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARN)
Expand All @@ -22,10 +23,9 @@ class Handler:
_supported_regions: Set[str] = {"au", "be", "ca", "de", "es", "fr", "se",
"in", "ie", "it", "nz", "uk", "us"}

def __init__(self, region: str = "us", multithreading: bool = False) -> None:
def __init__(self, region: str = "us") -> None:
if region not in self._supported_regions:
raise UnsupportedRegion(f"Region '{region}' is not supported for this API!")
self._multithreading = multithreading
self._region = region
self._last_refresh = time.time()
self.scraper = Scraper(self.region)
Expand All @@ -42,10 +42,6 @@ def supported_parts(self) -> Set[str]:
def supported_regions(self) -> Set[str]:
return self._supported_regions

@property
def multithreading(self) -> bool:
return self._multithreading

def set_region(self, region: str) -> None:
"""
Hidden method that changes the region for the parser and scraper objects contained in this instance.
Expand All @@ -58,26 +54,16 @@ def set_region(self, region: str) -> None:
self._region = region
self.scraper = Scraper(region)

def set_multithreading(self, multithreading: bool) -> None:
"""
Function that allows the user to specify whether or not the API should run multithreaded or not.
Multithreading allows for easier debugging of the internals but also greatly amplifies the amount
of time necessary to process all of the retrieved data.
:param multithreading:
:return:
"""
self._multithreading = multithreading

def retrieve(self, *args, force_refresh=False):
def retrieve(self, *args, force_refresh=False) -> PartData:
"""
Hidden function that is designed to retrieve and parse part data from PCPartPicker.
:param args: str: Variable number of arguments that must map to valid parts.
:param force_refresh: bool: This value determines whether or not to completely refresh the
entire API database, or to simply retrieve cached values.
:return: dict: A dictionary of the input part types with their mapped data object values.
:return: dict: A part data object that contains the part names and their mapped data object values.
"""
results: Dict[str, List] = {}
results: PartData = PartData()

# Verify the validity of the parts
for part in args:
Expand Down Expand Up @@ -107,7 +93,7 @@ def retrieve(self, *args, force_refresh=False):
logger.debug(f"Completed downloading! Time elapsed is {total_time} seconds.")

start = time.perf_counter()
parsed_data = parse(raw_data, self._multithreading)
parsed_data = parse(raw_data)
total_time = time.perf_counter() - start

logger.debug(f"Completed parsing! Time elapsed is {total_time} seconds.")
Expand Down
12 changes: 4 additions & 8 deletions pcpartpicker/parse_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import json
import re
from decimal import Decimal
from multiprocessing import Pool
from typing import Tuple, Dict
from typing import Tuple, Dict, List

from dacite import from_dict, Config
from moneyed import Money

from .mappings import part_classes
from .part_data import PartData


def dataclass_from_dict(datatype, dictionary: dict):
Expand All @@ -30,10 +30,6 @@ def deserialize_part_data(part_data: Tuple[str, str]) -> list:
return [dataclass_from_dict(part_classes[part_data[0]], item) for item in deserialized_parts]


def parse(part_dict: Dict[str, str], multithreading: bool = True) -> Dict[str, list]:
if multithreading:
with Pool() as pool:
results = pool.map(deserialize_part_data, (item for item in part_dict.items()))
else:
results = [deserialize_part_data(item) for item in part_dict.items()]
def parse(part_dict: Dict[str, str]) -> Dict[str, List]:
results = [deserialize_part_data(item) for item in part_dict.items()]
return dict(zip(part_dict.keys(), results))
23 changes: 23 additions & 0 deletions pcpartpicker/part_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from datetime import datetime
import json
from dataclasses import is_dataclass
from moneyed import Money


class PartData(dict):

def __init__(self):
super().__init__()
self.timestamp: datetime = datetime.now()

def to_json(self) -> str:
class CustomEncoder(json.JSONEncoder):
def default(self, o):
if is_dataclass(o):
return o.__dict__
if isinstance(o, Money):
return o.currency.code, str(o.amount)
if isinstance(o, datetime):
return str(o)
raise TypeError("Not JSON serializable!")
return json.dumps(self, indent=4, cls=CustomEncoder)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ def read(file_name: str):

setup(
name="pcpartpicker",
version="2.1.1",
version="2.2.0",
author="Jonathan Vusich",
author_email="[email protected]",
description="A fast, simple API for PCPartPicker.com.",
Expand Down
17 changes: 0 additions & 17 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ def test_api_default_init(self):
"in", "ie", "it", "nz", "uk", "us"})

self.assertEqual(api.region, 'us')
self.assertTrue(api.multithreading)

# Ensure that API can be initialized with a different region
def test_api_region_init(self):
Expand All @@ -46,19 +45,3 @@ def test_api_set_region_incorrect_region(self):
api = API()
api.set_region('oc')
assert 'Region \'oc\' is not supported for this API!' in str(excinfo.exception)

def test_api_multithreading_kwd(self):
api = API(multithreading=False)
self.assertFalse(api.multithreading)
self.assertFalse(api._handler.multithreading)

def test_api_modify_multithreading(self):
api = API()
self.assertTrue(api.multithreading)
self.assertTrue(api._handler._multithreading)
api.set_multithreading(False)
self.assertFalse(api._handler._multithreading)
self.assertFalse(api.multithreading)
api.set_multithreading(True)
self.assertTrue(api.multithreading)
self.assertTrue(api._handler._multithreading)
20 changes: 15 additions & 5 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import unittest

from pcpartpicker import API
from pcpartpicker.scraper import Scraper
from pcpartpicker.parse_utils import parse
from pcpartpicker.mappings import part_classes

import asyncio
import unittest


class ParserTest(unittest.TestCase):

Expand All @@ -23,87 +20,100 @@ def test_us_tokens(self):
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_uk_tokens(self):
results = API("uk").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_nz_tokens(self):
results = API("nz").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_it_tokens(self):
results = API("it").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_ie_tokens(self):
results = API("ie").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_in_tokens(self):
results = API("in").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_se_tokens(self):
results = API("se").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_fr_tokens(self):
results = API("fr").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_es_tokens(self):
results = API("es").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_de_tokens(self):
results = API("de").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_ca_tokens(self):
results = API("ca").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_be_tokens(self):
results = API("be").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

def test_au_tokens(self):
results = API("au").retrieve_all()
for part, part_data in results.items():
for p in part_data:
self.assertIsInstance(p, part_classes[part])
self.assertIsNotNone(p.brand)
self.assertIsNotNone(results.to_json())

0 comments on commit 1536c84

Please sign in to comment.