Skip to content

Commit

Permalink
🔒 Replace md5 with sha-256 (#1680)
Browse files Browse the repository at this point in the history
* Fix metadata path

* Ignore hidden directories in folder dataset

* Add check for mask_dir for segmentation tasks in Folder dataset

* Limit the gradio version to <4

* Replace md5 with sha256, Signed off: Samet Akcay [email protected]

* Rename checksum to hashsum since we use cryptographic algorithms

* Update btech hashsum

* update kolektor hashsum

* Update hashsum for mvtec download info

* Update hashsum in Visa image download info

* Update hashsums for Avenue Dataset and Annotations

* Update ucsd hashsum

* Update shanghai tech hashsum

* Update dsr weight hashsum

* Update efficient ad weight hashsum

* Fix albumentation tests

* Fix albumentation tests
  • Loading branch information
samet-akcay authored Feb 27, 2024
1 parent 7bd3cfc commit 7ac7a73
Show file tree
Hide file tree
Showing 11 changed files with 56 additions and 22 deletions.
2 changes: 1 addition & 1 deletion src/anomalib/data/depth/mvtec_3d.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
name="mvtec_3d",
url="https://www.mydrive.ch/shares/45920/dd1eb345346df066c63b5c95676b961b/download/428824485-1643285832"
"/mvtec_3d_anomaly_detection.tar.xz",
checksum="d8bb2800fbf3ac88e798da6ae10dc819",
hashsum="d8bb2800fbf3ac88e798da6ae10dc819",
)

CATEGORIES = ("bagel", "cable_gland", "carrot", "cookie", "dowel", "foam", "peach", "potato", "rope", "tire")
Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/data/image/btech.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
DOWNLOAD_INFO = DownloadInfo(
name="btech",
url="https://avires.dimi.uniud.it/papers/btad/btad.zip",
checksum="c1fa4d56ac50dd50908ce04e81037a8e",
hashsum="461c9387e515bfed41ecaae07c50cf6b10def647b36c9e31d239ab2736b10d2a",
)

CATEGORIES = ("01", "02", "03")
Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/data/image/kolektor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
DOWNLOAD_INFO = DownloadInfo(
name="kolektor",
url="https://go.vicos.si/kolektorsdd",
checksum="2b094030343c1cd59df02203ac6c57a0",
hashsum="65dc621693418585de9c4467d1340ea7958a6181816f0dc2883a1e8b61f9d4dc",
filename="KolektorSDD.zip",
)

Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/data/image/mvtec.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
name="mvtec",
url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/download/420938113-1629952094"
"/mvtec_anomaly_detection.tar.xz",
checksum="eefca59f2cede9c3fc5b6befbfec275e",
hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d",
)

CATEGORIES = (
Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/data/image/visa.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
DOWNLOAD_INFO = DownloadInfo(
name="VisA",
url="https://amazon-visual-anomaly.s3.us-west-2.amazonaws.com/VisA_20220922.tar",
checksum="ef908989b6dc701fc218f643c127a4de",
hashsum="2eb8690c803ab37de0324772964100169ec8ba1fa3f7e94291c9ca673f40f362",
)

CATEGORIES = (
Expand Down
50 changes: 42 additions & 8 deletions src/anomalib/data/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class DownloadInfo:

name: str
url: str
checksum: str
hashsum: str
filename: str | None = None


Expand Down Expand Up @@ -233,17 +233,51 @@ def safe_extract(tar_file: TarFile, root: Path, members: list[TarInfo]) -> None:
tar_file.extract(member, root)


def hash_check(file_path: Path, expected_hash: str) -> None:
"""Raise assert error if hash does not match the calculated hash of the file.
def generate_hash(file_path: str | Path, algorithm: str = "sha256") -> str:
"""Generate a hash of a file using the specified algorithm.
Args:
file_path (str | Path): Path to the file to hash.
algorithm (str): The hashing algorithm to use (e.g., 'sha256', 'sha3_512').
Returns:
str: The hexadecimal hash string of the file.
Raises:
ValueError: If the specified hashing algorithm is not supported.
"""
# Get the hashing algorithm.
try:
hasher = getattr(hashlib, algorithm)()
except AttributeError as err:
msg = f"Unsupported hashing algorithm: {algorithm}"
raise ValueError(msg) from err

# Read the file in chunks to avoid loading it all into memory
with Path(file_path).open("rb") as file:
for chunk in iter(lambda: file.read(4096), b""):
hasher.update(chunk)

# Return the computed hash value in hexadecimal format
return hasher.hexdigest()


def check_hash(file_path: Path, expected_hash: str, algorithm: str = "sha256") -> None:
"""Raise value error if hash does not match the calculated hash of the file.
Args:
file_path (Path): Path to file.
expected_hash (str): Expected hash of the file.
algorithm (str): Hashing algorithm to use ('sha256', 'sha3_512', etc.).
"""
with file_path.open("rb") as hash_file:
assert (
hashlib.new(name="md5", data=hash_file.read(), usedforsecurity=False).hexdigest() == expected_hash
), f"Downloaded file {file_path} does not match the required hash."
# Compare the calculated hash with the expected hash
calculated_hash = generate_hash(file_path, algorithm)
if calculated_hash != expected_hash:
msg = (
f"Calculated hash {calculated_hash} of downloaded file {file_path} does not match the required hash "
f"{expected_hash}."
)
raise ValueError(msg)


def extract(file_name: Path, root: Path) -> None:
Expand Down Expand Up @@ -303,7 +337,7 @@ def download_and_extract(root: Path, info: DownloadInfo) -> None:
reporthook=progress_bar.update_to,
)
logger.info("Checking the hash of the downloaded file.")
hash_check(downloaded_file_path, info.checksum)
check_hash(downloaded_file_path, info.hashsum)
else:
msg = f"Invalid URL to download dataset. Supported 'http://' or 'https://' but '{info.url}' is requested"
raise RuntimeError(msg)
Expand Down
4 changes: 2 additions & 2 deletions src/anomalib/data/video/avenue.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@
DATASET_DOWNLOAD_INFO = DownloadInfo(
name="Avenue Dataset",
url="http://www.cse.cuhk.edu.hk/leojia/projects/detectabnormal/Avenue_Dataset.zip",
checksum="b7a34b212ecdd30efbd989a6dcb1aceb",
hashsum="fc9cb8432a11ca79c18aa180c72524011411b69d3b0ff27c8816e41c0de61531",
)
ANNOTATIONS_DOWNLOAD_INFO = DownloadInfo(
name="Avenue Annotations",
url="http://www.cse.cuhk.edu.hk/leojia/projects/detectabnormal/ground_truth_demo.zip",
checksum="e8e3bff99195b6b511534083b9dbe1f5",
hashsum="60fec1728ec8f73a58aad3aeb5729d70a805a47e0b8eb4bf91ab67ef06386d77",
)


Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/data/video/shanghaitech.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
DATASET_DOWNLOAD_INFO = DownloadInfo(
name="ShanghaiTech Dataset",
url="http://101.32.75.151:8181/dataset/shanghaitech.tar.gz",
checksum="08494decd30fb0fa213b519a9c555040",
hashsum="c13a827043b259ccf8493c9d9130486872992153a9d714fe229e523cd4c94116",
)


Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/data/video/ucsd_ped.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
DOWNLOAD_INFO = DownloadInfo(
name="UCSD Pedestrian",
url="http://www.svcl.ucsd.edu/projects/anomaly/UCSD_Anomaly_Dataset.tar.gz",
checksum="5006421b89885f45a6f93b041145f2eb",
hashsum="2329af326951f5097fdd114c50e853957d3e569493a49d22fc082a9fd791915b",
)

CATEGORIES = ("UCSDped1", "UCSDped2")
Expand Down
2 changes: 1 addition & 1 deletion src/anomalib/models/image/dsr/lightning_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
WEIGHTS_DOWNLOAD_INFO = DownloadInfo(
name="vq_model_pretrained_128_4096.pckl",
url="https://github.com/openvinotoolkit/anomalib/releases/download/dsr_pretrained_weights/dsr_vq_model_pretrained.zip",
checksum="927f6b40841a7c885d12217c922b2bba",
hashsum="52fe7504ec8e9df70b4382f287ab26269dcfe000cd7a7e146a52c6f146f34afb",
)


Expand Down
8 changes: 4 additions & 4 deletions src/anomalib/models/image/efficient_ad/lightning_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
IMAGENETTE_DOWNLOAD_INFO = DownloadInfo(
name="imagenette2.tgz",
url="https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz",
checksum="fe2fc210e6bb7c5664d602c3cd71e612",
hashsum="6cbfac238434d89fe99e651496f0812ebc7a10fa62bd42d6874042bf01de4efd",
)

WEIGHTS_DOWNLOAD_INFO = DownloadInfo(
name="efficientad_pretrained_weights.zip",
url="https://github.com/openvinotoolkit/anomalib/releases/download/efficientad_pretrained_weights/efficientad_pretrained_weights.zip",
checksum="ec6113d728969cd233271eeed7d692f2",
hashsum="c09aeaa2b33f244b3261a5efdaeae8f8284a949470a4c5a526c61275fe62684a",
)


Expand Down Expand Up @@ -171,8 +171,8 @@ def teacher_channel_mean_std(self, dataloader: DataLoader) -> dict[str, torch.Te
if not arrays_defined:
_, num_channels, _, _ = y.shape
n = torch.zeros((num_channels,), dtype=torch.int64, device=y.device)
chanel_sum = torch.zeros((num_channels,), dtype=torch.float64, device=y.device)
chanel_sum_sqr = torch.zeros((num_channels,), dtype=torch.float64, device=y.device)
chanel_sum = torch.zeros((num_channels,), dtype=torch.float32, device=y.device)
chanel_sum_sqr = torch.zeros((num_channels,), dtype=torch.float32, device=y.device)
arrays_defined = True

n += y[:, 0].numel()
Expand Down

0 comments on commit 7ac7a73

Please sign in to comment.