Skip to content

Commit

Permalink
MailAttachment.filename parse non-ascii filename
Browse files Browse the repository at this point in the history
  • Loading branch information
ikvk committed Nov 13, 2023
1 parent 49312c1 commit d3cc0a6
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 7 deletions.
6 changes: 3 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ MailBox, MailBoxTls, MailBoxUnencrypted - for create mailbox client. `TLS exampl

BaseMailBox.<auth> - login, login_utf8, xoauth2, logout - authentication functions, they support context manager.

BaseMailBox.fetch - first searches email nums by criteria in current folder, then fetch and yields `MailMessage <#email-attributes>`_:
BaseMailBox.fetch - first searches email uids by criteria in current folder, then fetch and yields `MailMessage <#email-attributes>`_, args:

* *criteria* = 'ALL', message search criteria, `query builder <#search-criteria>`_
* *charset* = 'US-ASCII', indicates charset of the strings that appear in the search criteria. See rfc2978
Expand All @@ -69,7 +69,7 @@ BaseMailBox.fetch - first searches email nums by criteria in current folder, the
* *headers_only* = False, get only email headers (without text, html, attachments)
* *bulk* = False, False - fetch each message separately per N commands - low memory consumption, slow; True - fetch all messages per 1 command - high memory consumption, fast

BaseMailBox.uids - search mailbox for matching message uids in current folder, returns [str]
BaseMailBox.uids - search mailbox for matching message uids in current folder, returns [str | None], None when MailMessage.from_bytes used, args:

* *criteria* = 'ALL', message search criteria, `query builder <#search-criteria>`_
* *charset* = 'US-ASCII', indicates charset of the strings that appear in the search criteria. See rfc2978
Expand All @@ -92,7 +92,7 @@ MailMessage and MailAttachment public attributes are cached by functools.lru_cac
.. code-block:: python
for msg in mailbox.fetch(): # generator: imap_tools.MailMessage
msg.uid # str: '123'
msg.uid # str | None: '123'
msg.subject # str: 'some subject 你 привет'
msg.from_ # str: 'Bartö[email protected]'
msg.to # tuple: ('[email protected]', '[email protected]', )
Expand Down
4 changes: 4 additions & 0 deletions docs/release_notes.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
1.5.0
=====
* Fixed: MailAttachment.filename parse non-ascii filename

1.4.0
=====
* [Breaking] MailMessage.html replacing charset to utf-8 in html meta for consistency
Expand Down
2 changes: 1 addition & 1 deletion imap_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
from .utils import EmailAddress
from .errors import *

__version__ = '1.4.0'
__version__ = '1.5.0'
1 change: 1 addition & 0 deletions imap_tools/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

UID_PATTERN = re.compile(r'(^|\s+|\W)UID\s+(?P<uid>\d+)')

CODECS_OFFICIAL_REPLACEMENT_CHAR = '�'

class MailMessageFlags:
"""
Expand Down
34 changes: 32 additions & 2 deletions imap_tools/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
from itertools import chain
from functools import lru_cache
from email.header import decode_header
from email.message import _parseparam, _unquotevalue # noqa
from typing import Tuple, Dict, Optional, List

from .utils import decode_value, parse_email_addresses, parse_email_date, EmailAddress, replace_html_ct_charset
from .consts import UID_PATTERN
from .consts import UID_PATTERN, CODECS_OFFICIAL_REPLACEMENT_CHAR


class MailMessage:
Expand Down Expand Up @@ -238,8 +239,37 @@ def filename(self) -> str:
forwarded message (Content-Type = message/rfc822)
:return: filename
"""
# attempt 1
raw = self.part.get_filename() or ''
return ''.join(decode_value(*head_part) for head_part in decode_header(raw))
attempt_1_filename = ''.join(decode_value(*head_part) for head_part in decode_header(raw))
if CODECS_OFFICIAL_REPLACEMENT_CHAR not in attempt_1_filename:
return attempt_1_filename

# attempt 2 - non-ascii filename
for header_name, target_param_name in (('content-disposition', 'filename'), ('content-type', 'name')):
header_obj = self.part.get(header_name, None)
if not header_obj:
continue
for header_item in decode_header(header_obj):
if header_item[1] == 'unknown-8bit':
try:
# suppose encoded utf8
parsed_params = _parseparam(header_item[0].decode(errors='replace')) # ['',]
except Exception: # noqa
continue
for parsed_param_item in parsed_params:
try:
name, val = parsed_param_item.split('=', 1)
name = name.strip()
val = val.strip()
except ValueError:
# Must have been a bare attribute
name = parsed_param_item.strip()
val = ''
if name == target_param_name and val and CODECS_OFFICIAL_REPLACEMENT_CHAR not in val:
return val.strip('"')

return attempt_1_filename

@property
@lru_cache()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
headers={'mime-version': ('1.0 (Apple Message framework v730)',), 'content-type': ('multipart/mixed; boundary=Apple-Mail-13-196941151',), 'message-id': ('<[email protected]>',), 'from': ('[email protected]',), 'subject': ('testing',), 'date': ('Mon, 6 Jun 2005 22:21:22 +0200',), 'to': ('[email protected]',)},
attachments=[
dict(
filename='ci��le.txt',
filename='ciële.txt',
content_id='',
content_disposition='attachment',
content_type='text/plain',
Expand Down

0 comments on commit d3cc0a6

Please sign in to comment.