Skip to content

Commit

Permalink
Accented characters inside Zip files now OK on Mac and Windows
Browse files Browse the repository at this point in the history
  • Loading branch information
aborel committed Mar 16, 2023
1 parent 46e42d3 commit 1ec9dfb
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
import os
import sys
import glob
import pathlib
import zipfile
Expand All @@ -10,7 +11,7 @@
from tkinter import Tk, Button, Label, font, IntVar, Checkbutton

from functools import partial
from unicodedata import normalize
from unicodedata import normalize, is_normalized

version = "0.7.0"

Expand All @@ -23,6 +24,15 @@ def log_message(message):
f_err.close()


def is_cp850(s):
# One way to check whether filenames are encoded as cp850 **sigh** or as utf-8
try:
x = s.encode('cp850').decode('utf-8')
return True
except:
return False


def md5Checksum(filePath, ziparchive=None):
# blocksize = 8192
# switch to 1MB blocks to improve performance
Expand Down Expand Up @@ -169,13 +179,14 @@ def runchecksum(tkroot, width_chars, check_zips):
archive_path = os.path.sep.join(myzipfile.split(os.sep)[0:-1]).replace(choosedir, '.')
# print(archive_path)
for archived_file in zipcontent[myzipfile]:
print(myzipfile, archived_file)
# Filenames of objects inside a zip are either cp850/cp437 (old style) or utf-8. Let's check
assumed_encoding = 'cp850' if is_cp850(archived_file) else 'utf-8'
progress += 1
try:
md5 = md5Checksum(archived_file, ziparchive=archive)
# filenames must be encoded as UTF-8, or they might not match what Libsafe sees on the filesystem
# also: NFC normalization for proper (composed) representation of accented characters
f.write(normalize('NFC',f'{md5} {archive_path + os.path.sep + archived_file}\n').encode("UTF-8"))
f.write(f'{md5} {archive_path + os.path.sep + archived_file.encode(assumed_encoding).decode("utf-8")}\n'.encode("UTF-8"))
except Exception as e:
trace = str(e)
log_message(trace)
Expand Down

0 comments on commit 1ec9dfb

Please sign in to comment.