Skip to content

Commit

Permalink
Option to ignore SSL certificate
Browse files Browse the repository at this point in the history
  • Loading branch information
theophilegervet committed Jan 28, 2024
1 parent 9974b7b commit 7e9994a
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
8 changes: 7 additions & 1 deletion img2dataset/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from threading import Semaphore
import urllib.request
import io
import ssl
import math
import exifread
import json
Expand Down Expand Up @@ -43,7 +44,10 @@ def download_image(row, timeout, user_agent_token, disallowed_header_directives)
user_agent_string += f" (compatible; {user_agent_token}; +https://github.com/rom1504/img2dataset)"
try:
request = urllib.request.Request(url, data=None, headers={"User-Agent": user_agent_string})
with urllib.request.urlopen(request, timeout=timeout) as r:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with urllib.request.urlopen(request, context=ctx, timeout=timeout) as r:
if disallowed_header_directives and is_disallowed(
r.headers,
user_agent_token,
Expand Down Expand Up @@ -97,6 +101,7 @@ def __init__(
user_agent_token,
disallowed_header_directives,
blurring_bbox_col=None,
ignore_ssl_certificate=False,
) -> None:
self.sample_writer_class = sample_writer_class
self.resizer = resizer
Expand All @@ -119,6 +124,7 @@ def __init__(
else {directive.strip().lower() for directive in disallowed_header_directives}
)
self.blurring_bbox_col = blurring_bbox_col
self.ignore_ssl_certificate = ignore_ssl_certificate

def __call__(
self,
Expand Down
2 changes: 2 additions & 0 deletions img2dataset/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def download(
max_shard_retry: int = 1,
user_agent_token: Optional[str] = None,
disallowed_header_directives: Optional[List[str]] = None,
ignore_ssl_certificate: bool = False,
):
"""Download is the main entry point of img2dataset, it uses multiple processes and download multiple files"""
if disallowed_header_directives is None:
Expand Down Expand Up @@ -247,6 +248,7 @@ def signal_handler(signal_arg, frame): # pylint: disable=unused-argument
user_agent_token=user_agent_token,
disallowed_header_directives=disallowed_header_directives,
blurring_bbox_col=bbox_col,
ignore_ssl_certificate=ignore_ssl_certificate,
)

print("Starting the downloading of this file")
Expand Down

0 comments on commit 7e9994a

Please sign in to comment.