Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Windows Path with traversing recursively #7

Merged
merged 4 commits into from
Jan 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/dryrun.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ jobs:
strategy:
max-parallel: 2
matrix:
# windows-latest is excluded as tempfile doesn't work perfectly for unit testing
os: [ubuntu-latest, macos-latest]
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.7, 3.8, 3.9]

steps:
Expand Down
10 changes: 6 additions & 4 deletions prelims/handler.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
from .post import Post

import os

from pathlib import Path

class StaticSitePostsHandler(object):

def __init__(self, path_dir):
def __init__(self, path_dir, encoding='utf-8'):
assert os.path.isdir(os.path.expanduser(path_dir)), \
f'path {path_dir} is not a directory or does not exist'

self.paths = [os.path.join(path_dir, f) for f in os.listdir(path_dir)]
exts = ['.md', '.html']
self.paths = [p for p in Path(path_dir).rglob('*') if p.suffix in exts]
self.processors = []
self.encoding = encoding

def register_processor(self, processor):
"""Add a front matter processor to the queue.
Expand All @@ -32,7 +34,7 @@ def load_posts(self):
"""
posts = []
for path in self.paths:
post = Post.load(path)
post = Post.load(path, self.encoding)
if post.is_valid():
posts.append(post)
return posts
Expand Down
11 changes: 6 additions & 5 deletions prelims/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@

class Post(object):

def __init__(self, path, front_matter, raw_content, content):
def __init__(self, path, front_matter, raw_content, content, encoding='utf-8'):
self.path = path
self.front_matter = front_matter
self.raw_content = raw_content
self.content = content
self.encoding = encoding

def update(self, key, value, allow_overwrite=False):
if key in self.front_matter and not allow_overwrite:
Expand All @@ -47,7 +48,7 @@ def save(self):
# https://github.com/yaml/pyyaml/pull/256
value_types = {type(value) for value in self.front_matter.values()}
flow_style = None if list in value_types else False
with open(self.path, 'w') as f:
with open(self.path, 'w', encoding=self.encoding) as f:
content = self.raw_content.replace(
m.group(1),
yaml.dump(self.front_matter, allow_unicode=True,
Expand All @@ -56,8 +57,8 @@ def save(self):
f.write(content)

@staticmethod
def load(path):
with open(path) as f:
def load(path, encoding="utf-8"):
with open(path, encoding=encoding) as f:
raw_content = f.read()

front_matter = None
Expand All @@ -74,4 +75,4 @@ def load(path):
for re_filter in RE_FILTERS:
content = re_filter.sub('', content).strip()

return Post(path, front_matter, raw_content, content)
return Post(path, front_matter, raw_content, content, encoding)
3 changes: 2 additions & 1 deletion prelims/processor/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import re
import numpy as np
from pathlib import PurePosixPath
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Expand Down Expand Up @@ -48,4 +49,4 @@ def __path_to_permalink(self, path):
"""Extract a permalink portion of a file path, excluding a file extension.
"""
pattern = rf'({re.escape(self.permalink_base)}/.+?)(\.md|\.html)'
return re.search(pattern, path).group(1) + '/'
return re.search(pattern, str(PurePosixPath(path))).group(1) + '/'
3 changes: 2 additions & 1 deletion prelims/processor/tests/test_og_file_path_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ def test_process(self):
<source src="/videos/foo/movie.mp4" type="video/mp4">
Your browser does not support the video tag.
</video>
""")
""",
"utf-8")
extractor.process([p])

self.assertTrue('images' in p.front_matter)
Expand Down
4 changes: 2 additions & 2 deletions prelims/processor/tests/test_recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ class RecommenderTestCase(TestCase):

def test_process(self):
post_a = Post('/path/to/posts/a.md', {'title': 'foo'},
'', 'Hello world.')
'', 'Hello world.', 'utf-8')
post_b = Post('/path/to/posts/b.md', {'title': 'bar'},
'', 'This is a pen.')
'', 'This is a pen.', 'utf-8')
posts = [post_a, post_b]

recommender = Recommender(permalink_base='/posts')
Expand Down
13 changes: 10 additions & 3 deletions prelims/tests/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from prelims.processor import BaseFrontMatterProcessor

from unittest import TestCase
import os
import tempfile


Expand Down Expand Up @@ -39,17 +40,21 @@ class StaticSitePostsHandlerTestCase(TestCase):
def setUp(self):
self.dir = tempfile.TemporaryDirectory()
self.mdfile = tempfile.NamedTemporaryFile(suffix='.md',
dir=self.dir.name)
dir=self.dir.name,
delete=False)
self.mdfile.write(content.encode('utf-8'))
self.mdfile.seek(0)
self.mdfile_draft = tempfile.NamedTemporaryFile(suffix='.md',
dir=self.dir.name)
dir=self.dir.name,
delete=False)
self.mdfile_draft.write(content_draft.encode('utf-8'))
self.mdfile_draft.seek(0)

def tearDown(self):
self.mdfile.close()
os.unlink(self.mdfile.name)
self.mdfile_draft.close()
os.unlink(self.mdfile_draft.name)
self.dir.cleanup()

def test_register_processor(self):
Expand Down Expand Up @@ -79,4 +84,6 @@ def test_execute(self):

Hello world.
"""
self.assertEqual(self.mdfile.read().decode(), expected_content)
self.assertEqual(
'\n'.join(self.mdfile.read().decode().splitlines()) + '\n',
expected_content)
23 changes: 15 additions & 8 deletions prelims/tests/test_post.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from prelims import Post

from unittest import TestCase
import os
import tempfile


Expand Down Expand Up @@ -29,40 +30,44 @@ class PostTestCase(TestCase):
def setUp(self):
self.dir = tempfile.TemporaryDirectory()
self.mdfile = tempfile.NamedTemporaryFile(suffix='.md',
dir=self.dir.name)
dir=self.dir.name,
delete=False)
self.mdfile.write(content.encode('utf-8'))
self.mdfile.seek(0)
self.mdfile_draft = tempfile.NamedTemporaryFile(suffix='.md',
dir=self.dir.name)
dir=self.dir.name,
delete=False)
self.mdfile_draft.write(content_draft.encode('utf-8'))
self.mdfile_draft.seek(0)

def tearDown(self):
self.mdfile.close()
os.unlink(self.mdfile.name)
self.mdfile_draft.close()
os.unlink(self.mdfile_draft.name)
self.dir.cleanup()

def test_load(self):
post = Post.load(self.mdfile.name)
post = Post.load(self.mdfile.name, "utf-8")
self.assertEqual(post.path, self.mdfile.name)
self.assertEqual(post.front_matter, {'aaa': 'xxx', 'bbb': ['xxx']})
self.assertEqual(post.raw_content, content)
self.assertEqual(post.content, 'Hello world.')

def test_is_draft(self):
post = Post.load(self.mdfile.name)
post = Post.load(self.mdfile.name, "utf-8")
self.assertFalse(post.is_draft())
post_draft = Post.load(self.mdfile_draft.name)
self.assertTrue(post_draft.is_draft())

def test_is_valid(self):
post = Post.load(self.mdfile.name)
post = Post.load(self.mdfile.name, "utf-8")
self.assertTrue(post.is_valid())
post_draft = Post.load(self.mdfile_draft.name)
self.assertFalse(post_draft.is_valid())

def test_update(self):
post = Post.load(self.mdfile.name)
post = Post.load(self.mdfile.name, "utf-8")

post.update('aaa', 'zzz', allow_overwrite=False)
post.update('bbb', ['zzz'], allow_overwrite=True)
Expand All @@ -73,7 +78,7 @@ def test_update(self):
{'aaa': 'xxx', 'bbb': ['zzz'], 'foo': 'bar'})

def test_save(self):
post = Post.load(self.mdfile.name)
post = Post.load(self.mdfile.name, "utf-8")
post.update('foo', 'bar')
post.save()

Expand All @@ -87,4 +92,6 @@ def test_save(self):
Hello world.
"""

self.assertEqual(self.mdfile.read().decode(), expected_content)
self.assertEqual(
'\n'.join(self.mdfile.read().decode().splitlines()) + '\n',
expected_content)