Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timestamps to downloaded files #514

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add support for Syracuse University (@363843342)
- Add support for University of Illinois Chicago (@hoangngo-sudo)
- Add support for Università Bocconi (@giuliofrey)
- Timestamps are now added to the files downloaded (@Ovler-Young)

## [0.18.0] - 2024-10-22

Expand Down
16 changes: 13 additions & 3 deletions blackboard_sync/content/attachment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import uuid
import mimetypes
from datetime import datetime
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This import would be better with the other imports of the form 'from x import y', I usually order by length


from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
Expand All @@ -14,8 +15,13 @@
class Attachment(BStream):
"""File attached to a content."""

def __init__(self, attachment: BBAttachment, api_path: BBContentPath,
job: DownloadJob):
def __init__(
self,
attachment: BBAttachment,
api_path: BBContentPath,
job: DownloadJob,
modified_time: datetime | None = None,
) -> None:
filename = attachment.fileName or str(uuid.uuid1())
name_ext = '.' + filename.split('.')[-1]

Expand All @@ -29,8 +35,12 @@ def __init__(self, attachment: BBAttachment, api_path: BBContentPath,
real_ext = possible_ext[0] if possible_ext else '.txt'
self.filename = filename + real_ext

self.modified_time = modified_time

self.stream = job.session.download(attachment_id=attachment.id,
**api_path)

def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
super().write_base(path / self.filename, executor, self.stream)
super().write_base(
path / self.filename, executor, self.stream, self.modified_time
)
21 changes: 18 additions & 3 deletions blackboard_sync/content/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
from datetime import datetime
from pathlib import Path
from requests import Response

Expand All @@ -8,27 +10,40 @@ class BStream:
"""Base class for content that can be downloaded as a byte stream."""
CHUNK_SIZE = 1024

def write_base(self, path: Path, executor: ThreadPoolExecutor,
stream: Response) -> None:
def write_base(
self,
path: Path,
executor: ThreadPoolExecutor,
stream: Response,
modified_time: datetime | None = None
) -> None:
"""Schedule the write operation."""

def _write() -> None:
with path.open("wb") as f:
for chunk in stream.iter_content(chunk_size=self.CHUNK_SIZE):
f.write(chunk)

if modified_time is not None:
timestamp = modified_time.timestamp()
os.utime(path, (timestamp, timestamp))

executor.submit(_write)


class FStream:
"""Base class for content that can be written as text."""

def write_base(self, path: Path, executor: ThreadPoolExecutor,
body: str) -> None:
body: str, modified_time: datetime | None = None) -> None:
"""Schedule the write operation."""

def _write() -> None:
with path.open('w', encoding='utf-8') as f:
f.write(body)

if modified_time is not None:
timestamp = modified_time.timestamp()
os.utime(path, (timestamp, timestamp))

executor.submit(_write)
7 changes: 5 additions & 2 deletions blackboard_sync/content/body.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def __init__(self, content: BBCourseContent, _: None,
return

title = content.title or "Untitled"
self.modified_time = content.modified_time if content else None
parser = ContentParser(content.body, job.session.instance_url)

self.body = create_body(title, parser.body, parser.text)
Expand All @@ -30,7 +31,9 @@ def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
if self.ignore:
return

self.write_base(path / f"{path.stem}.html", executor, self.body)
self.write_base(
path / f"{path.stem}.html", executor, self.body, self.modified_time
)

for child in self.children:
child.write(path, executor)
child.write(path, executor, self.modified_time)
8 changes: 8 additions & 0 deletions blackboard_sync/content/content.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
from pathlib import Path
from json import JSONDecodeError
from pydantic import ValidationError
Expand Down Expand Up @@ -38,6 +39,7 @@ def __init__(self, content: BBCourseContent, api_path: BBContentPath,

Handler = Content.get_handler(content.contentHandler)
self.title = content.title_path_safe.replace('.', '_')
self.modified_time = content.modified if content else None

try:
self.handler = Handler(content, api_path, job)
Expand Down Expand Up @@ -69,6 +71,12 @@ def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
path.mkdir(exist_ok=True, parents=True)
self.body.write(path, executor)

if (self.modified_time and
(self.handler is not None or self.body is not None)):
timestamp = self.modified_time.timestamp()
path.touch(exist_ok=True)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think by this point path is guaranteed to exist (other than by destructive user action) so no need to touch

os.utime(path, (timestamp, timestamp))

@staticmethod
def should_download(content: BBCourseContent, job: DownloadJob) -> bool:
or_guards = [
Expand Down
7 changes: 7 additions & 0 deletions blackboard_sync/content/course.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from pathlib import Path
from datetime import datetime
import os
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This import seems out of order here, better at the top

from concurrent.futures import ThreadPoolExecutor

from blackboard.blackboard import BBCourse
Expand All @@ -23,6 +24,7 @@ def __init__(self, course: BBCourse, job: DownloadJob):

self.year = self.get_year(course.created)
self.title = course.title or 'Untitled Course'
self.modified_time = course.modified

contents = job.session.fetch_contents(course_id=course.id)
self.children = []
Expand All @@ -41,6 +43,11 @@ def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
for child in self.children:
child.write(path, executor)

if self.modified_time:
timestamp = self.modified_time.timestamp()
path.touch(exist_ok=True)
Copy link
Owner

@sanjacob sanjacob Feb 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Surely the folder exists by this point. So there is no use in touching it

os.utime(path, (timestamp, timestamp))

@staticmethod
def get_year(created: datetime | None) -> str:
return str(created.year) if created is not None else 'No Date'
11 changes: 10 additions & 1 deletion blackboard_sync/content/document.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

Expand All @@ -21,10 +22,13 @@ def __init__(self, content: BBCourseContent, api_path: BBContentPath,
filtered_attachments = list(att_filter.filter(attachments))

self.attachments = []
self.modified_time = content.modified if content else None

for i, attachment in enumerate(filtered_attachments):
self.attachments.append(
Attachment(attachment, api_path, job)
Attachment(
attachment, api_path, job, modified_time=self.modified_time
)
)

def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
Expand All @@ -37,6 +41,11 @@ def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
for attachment in self.attachments:
attachment.write(path, executor)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to do this in the write method, as it can just be handled by write_base

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So just pass it to attachment.write


if self.modified_time and self.attachments:
timestamp = self.modified_time.timestamp()
path.touch(exist_ok=True)
os.utime(path, (timestamp, timestamp))

@property
def create_dir(self) -> bool:
return False
3 changes: 2 additions & 1 deletion blackboard_sync/content/externallink.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class ExternalLink(FStream):
def __init__(self, content: BBCourseContent, _: None,
job: DownloadJob) -> None:
self.url = None
self.modified_time = content.modified if content else None

if content.contentHandler is not None:
self.url = content.contentHandler.url
Expand All @@ -31,7 +32,7 @@ def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
body = self.create_unix_body(self.url)
path = path.with_suffix(".desktop")

super().write_base(path, executor, body)
super().write_base(path, executor, body, self.modified_time)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is exactly how it should be


def create_unix_body(self, url: str) -> str:
return f"[Desktop Entry]\nIcon=text-html\nType=Link\nURL[$e]={url}"
Expand Down
8 changes: 7 additions & 1 deletion blackboard_sync/content/folder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from pathlib import Path
import os
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, this import seems better at the top

from concurrent.futures import ThreadPoolExecutor

from blackboard.blackboard import BBCourseContent
Expand All @@ -11,9 +12,10 @@
class Folder:
"""Content of type `x-bb-folder`."""

def __init__(self, _: BBCourseContent, api_path: BBContentPath,
def __init__(self, coursecontent: BBCourseContent, api_path: BBContentPath,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rename to content, as in other instances

job: DownloadJob) -> None:
self.children = []
self.modified_time = coursecontent.modified if coursecontent else None
course_id = api_path['course_id']

for child in job.session.fetch_content_children(**api_path):
Expand All @@ -28,6 +30,10 @@ def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
for child in self.children:
child.write(path, executor)

if self.modified_time:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is fine - since folder doesn't inherit from BStream or FStream

timestamp = self.modified_time.timestamp()
os.utime(path, (timestamp, timestamp))

@property
def create_dir(self) -> bool:
return False
5 changes: 3 additions & 2 deletions blackboard_sync/content/webdav.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,12 @@ def __init__(self, link: Link, job: DownloadJob) -> None:
self.valid = validate_webdav_response(self.stream, link.href,
job.session.instance_url)

def write(self, path: Path, executor: ThreadPoolExecutor) -> None:
def write(self, path: Path,
executor: ThreadPoolExecutor, modified_time=None) -> None:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lacking type datetime | None

if self.valid:
path = Path(path, self.title)

if self.extension:
path = path.with_suffix(self.extension)

super().write_base(path, executor, self.stream)
super().write_base(path, executor, self.stream, modified_time)
2 changes: 1 addition & 1 deletion tests/test_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def test_children_file(api_path: BBContentPath,
assume(not att.mimeType.startswith("video/"))
assume(not att.mimeType == '*')

calls.append(mock.call(att, api_path, job))
calls.append(mock.call(att, api_path, job, modified_time=None))

with mock.patch('blackboard_sync.content.document.Attachment') as p:
Document(None, api_path, job)
Expand Down