diff --git a/.github/workflows/dryrun.yml b/.github/workflows/dryrun.yml index e28d686..ff392d5 100644 --- a/.github/workflows/dryrun.yml +++ b/.github/workflows/dryrun.yml @@ -13,8 +13,7 @@ jobs: strategy: max-parallel: 2 matrix: - # windows-latest is excluded as tempfile doesn't work perfectly for unit testing - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] python-version: [3.7, 3.8, 3.9] steps: diff --git a/prelims/handler.py b/prelims/handler.py index d4d416d..014bc7d 100644 --- a/prelims/handler.py +++ b/prelims/handler.py @@ -1,16 +1,18 @@ from .post import Post import os - +from pathlib import Path class StaticSitePostsHandler(object): - def __init__(self, path_dir): + def __init__(self, path_dir, encoding='utf-8'): assert os.path.isdir(os.path.expanduser(path_dir)), \ f'path {path_dir} is not a directory or does not exist' - self.paths = [os.path.join(path_dir, f) for f in os.listdir(path_dir)] + exts = ['.md', '.html'] + self.paths = [p for p in Path(path_dir).rglob('*') if p.suffix in exts] self.processors = [] + self.encoding = encoding def register_processor(self, processor): """Add a front matter processor to the queue. @@ -32,7 +34,7 @@ def load_posts(self): """ posts = [] for path in self.paths: - post = Post.load(path) + post = Post.load(path, self.encoding) if post.is_valid(): posts.append(post) return posts diff --git a/prelims/post.py b/prelims/post.py index 07ea660..39fe128 100644 --- a/prelims/post.py +++ b/prelims/post.py @@ -16,11 +16,12 @@ class Post(object): - def __init__(self, path, front_matter, raw_content, content): + def __init__(self, path, front_matter, raw_content, content, encoding='utf-8'): self.path = path self.front_matter = front_matter self.raw_content = raw_content self.content = content + self.encoding = encoding def update(self, key, value, allow_overwrite=False): if key in self.front_matter and not allow_overwrite: @@ -47,7 +48,7 @@ def save(self): # https://github.com/yaml/pyyaml/pull/256 value_types = {type(value) for value in self.front_matter.values()} flow_style = None if list in value_types else False - with open(self.path, 'w') as f: + with open(self.path, 'w', encoding=self.encoding) as f: content = self.raw_content.replace( m.group(1), yaml.dump(self.front_matter, allow_unicode=True, @@ -56,8 +57,8 @@ def save(self): f.write(content) @staticmethod - def load(path): - with open(path) as f: + def load(path, encoding="utf-8"): + with open(path, encoding=encoding) as f: raw_content = f.read() front_matter = None @@ -74,4 +75,4 @@ def load(path): for re_filter in RE_FILTERS: content = re_filter.sub('', content).strip() - return Post(path, front_matter, raw_content, content) + return Post(path, front_matter, raw_content, content, encoding) diff --git a/prelims/processor/recommender.py b/prelims/processor/recommender.py index 8c600f9..bfac9ca 100644 --- a/prelims/processor/recommender.py +++ b/prelims/processor/recommender.py @@ -2,6 +2,7 @@ import re import numpy as np +from pathlib import PurePosixPath from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity @@ -48,4 +49,4 @@ def __path_to_permalink(self, path): """Extract a permalink portion of a file path, excluding a file extension. """ pattern = rf'({re.escape(self.permalink_base)}/.+?)(\.md|\.html)' - return re.search(pattern, path).group(1) + '/' + return re.search(pattern, str(PurePosixPath(path))).group(1) + '/' diff --git a/prelims/processor/tests/test_og_file_path_extractor.py b/prelims/processor/tests/test_og_file_path_extractor.py index 59a4994..f774360 100644 --- a/prelims/processor/tests/test_og_file_path_extractor.py +++ b/prelims/processor/tests/test_og_file_path_extractor.py @@ -27,7 +27,8 @@ def test_process(self): Your browser does not support the video tag. - """) + """, + "utf-8") extractor.process([p]) self.assertTrue('images' in p.front_matter) diff --git a/prelims/processor/tests/test_recommender.py b/prelims/processor/tests/test_recommender.py index 986fdb9..a51c445 100644 --- a/prelims/processor/tests/test_recommender.py +++ b/prelims/processor/tests/test_recommender.py @@ -8,9 +8,9 @@ class RecommenderTestCase(TestCase): def test_process(self): post_a = Post('/path/to/posts/a.md', {'title': 'foo'}, - '', 'Hello world.') + '', 'Hello world.', 'utf-8') post_b = Post('/path/to/posts/b.md', {'title': 'bar'}, - '', 'This is a pen.') + '', 'This is a pen.', 'utf-8') posts = [post_a, post_b] recommender = Recommender(permalink_base='/posts') diff --git a/prelims/tests/test_handler.py b/prelims/tests/test_handler.py index 2b61b9a..79e4ee7 100644 --- a/prelims/tests/test_handler.py +++ b/prelims/tests/test_handler.py @@ -2,6 +2,7 @@ from prelims.processor import BaseFrontMatterProcessor from unittest import TestCase +import os import tempfile @@ -39,17 +40,21 @@ class StaticSitePostsHandlerTestCase(TestCase): def setUp(self): self.dir = tempfile.TemporaryDirectory() self.mdfile = tempfile.NamedTemporaryFile(suffix='.md', - dir=self.dir.name) + dir=self.dir.name, + delete=False) self.mdfile.write(content.encode('utf-8')) self.mdfile.seek(0) self.mdfile_draft = tempfile.NamedTemporaryFile(suffix='.md', - dir=self.dir.name) + dir=self.dir.name, + delete=False) self.mdfile_draft.write(content_draft.encode('utf-8')) self.mdfile_draft.seek(0) def tearDown(self): self.mdfile.close() + os.unlink(self.mdfile.name) self.mdfile_draft.close() + os.unlink(self.mdfile_draft.name) self.dir.cleanup() def test_register_processor(self): @@ -79,4 +84,6 @@ def test_execute(self): Hello world. """ - self.assertEqual(self.mdfile.read().decode(), expected_content) + self.assertEqual( + '\n'.join(self.mdfile.read().decode().splitlines()) + '\n', + expected_content) diff --git a/prelims/tests/test_post.py b/prelims/tests/test_post.py index 26abc8c..5c0036d 100644 --- a/prelims/tests/test_post.py +++ b/prelims/tests/test_post.py @@ -1,6 +1,7 @@ from prelims import Post from unittest import TestCase +import os import tempfile @@ -29,40 +30,44 @@ class PostTestCase(TestCase): def setUp(self): self.dir = tempfile.TemporaryDirectory() self.mdfile = tempfile.NamedTemporaryFile(suffix='.md', - dir=self.dir.name) + dir=self.dir.name, + delete=False) self.mdfile.write(content.encode('utf-8')) self.mdfile.seek(0) self.mdfile_draft = tempfile.NamedTemporaryFile(suffix='.md', - dir=self.dir.name) + dir=self.dir.name, + delete=False) self.mdfile_draft.write(content_draft.encode('utf-8')) self.mdfile_draft.seek(0) def tearDown(self): self.mdfile.close() + os.unlink(self.mdfile.name) self.mdfile_draft.close() + os.unlink(self.mdfile_draft.name) self.dir.cleanup() def test_load(self): - post = Post.load(self.mdfile.name) + post = Post.load(self.mdfile.name, "utf-8") self.assertEqual(post.path, self.mdfile.name) self.assertEqual(post.front_matter, {'aaa': 'xxx', 'bbb': ['xxx']}) self.assertEqual(post.raw_content, content) self.assertEqual(post.content, 'Hello world.') def test_is_draft(self): - post = Post.load(self.mdfile.name) + post = Post.load(self.mdfile.name, "utf-8") self.assertFalse(post.is_draft()) post_draft = Post.load(self.mdfile_draft.name) self.assertTrue(post_draft.is_draft()) def test_is_valid(self): - post = Post.load(self.mdfile.name) + post = Post.load(self.mdfile.name, "utf-8") self.assertTrue(post.is_valid()) post_draft = Post.load(self.mdfile_draft.name) self.assertFalse(post_draft.is_valid()) def test_update(self): - post = Post.load(self.mdfile.name) + post = Post.load(self.mdfile.name, "utf-8") post.update('aaa', 'zzz', allow_overwrite=False) post.update('bbb', ['zzz'], allow_overwrite=True) @@ -73,7 +78,7 @@ def test_update(self): {'aaa': 'xxx', 'bbb': ['zzz'], 'foo': 'bar'}) def test_save(self): - post = Post.load(self.mdfile.name) + post = Post.load(self.mdfile.name, "utf-8") post.update('foo', 'bar') post.save() @@ -87,4 +92,6 @@ def test_save(self): Hello world. """ - self.assertEqual(self.mdfile.read().decode(), expected_content) + self.assertEqual( + '\n'.join(self.mdfile.read().decode().splitlines()) + '\n', + expected_content)