Skip to content

Commit

Permalink
⚡️ Simpler, faster ENVELOPE and address parser
Browse files Browse the repository at this point in the history
* Envelope can *NOT* be `NIL`.
* Using aliased rules for envelope and address parts
* Using QUOTED_rev2 regexp for IMAP4rev2 and UTF8=ACCEPT address strings
* remove extraneous `envelope_data` method

Benchmarks:
```
Calculating -------------------------------------
                          v0.4.5-6-gbc32ddb2-dirty        0.4.4
      bodystructure_mixed_boundary       3.379k           3.109k i/s
                  10.152k times in       3.004058s        3.265234s
rfc3501_8_example_3_FETCH_ENVELOPE       5.355k           4.646k i/s
                  15.924k times in       2.973921s        3.427366s

Comparison:
                   bodystructure_mixed_boundary
          v0.4.5-6-gbc32ddb2-dirty:      3379.4 i/s
                             0.4.4:      3109.1 i/s - 1.09x  slower

             rfc3501_8_example_3_FETCH_ENVELOPE
          v0.4.5-6-gbc32ddb2-dirty:      5354.5 i/s
                             0.4.4:      4646.1 i/s - 1.15x  slower
```
  • Loading branch information
nevans committed Nov 19, 2023
1 parent d9b7164 commit d3facc2
Showing 1 changed file with 76 additions and 81 deletions.
157 changes: 76 additions & 81 deletions lib/net/imap/response_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -941,41 +941,57 @@ def msg_att__label
# this represents the partial size for BODY or BINARY
alias gt__number__lt atom

# RFC3501 & RFC9051:
# envelope = "(" env-date SP env-subject SP env-from SP
# env-sender SP env-reply-to SP env-to SP env-cc SP
# env-bcc SP env-in-reply-to SP env-message-id ")"
def envelope
@lex_state = EXPR_DATA
token = lookahead
if token.symbol == T_NIL
shift_token
result = nil
else
match(T_LPAR)
date = nstring
match(T_SPACE)
subject = nstring
match(T_SPACE)
from = address_list
match(T_SPACE)
sender = address_list
match(T_SPACE)
reply_to = address_list
match(T_SPACE)
to = address_list
match(T_SPACE)
cc = address_list
match(T_SPACE)
bcc = address_list
match(T_SPACE)
in_reply_to = nstring
match(T_SPACE)
message_id = nstring
match(T_RPAR)
result = Envelope.new(date, subject, from, sender, reply_to,
to, cc, bcc, in_reply_to, message_id)
end
lpar; date = env_date
SP!; subject = env_subject
SP!; from = env_from
SP!; sender = env_sender
SP!; reply_to = env_reply_to
SP!; to = env_to
SP!; cc = env_cc
SP!; bcc = env_bcc
SP!; in_reply_to = env_in_reply_to
SP!; message_id = env_message_id
rpar
Envelope.new(date, subject, from, sender, reply_to,
to, cc, bcc, in_reply_to, message_id)
ensure
@lex_state = EXPR_BEG
return result
end

# env-date = nstring
# env-subject = nstring
# env-in-reply-to = nstring
# env-message-id = nstring
alias env_date nstring
alias env_subject nstring
alias env_in_reply_to nstring
alias env_message_id nstring

# env-from = "(" 1*address ")" / nil
# env-sender = "(" 1*address ")" / nil
# env-reply-to = "(" 1*address ")" / nil
# env-to = "(" 1*address ")" / nil
# env-cc = "(" 1*address ")" / nil
# env-bcc = "(" 1*address ")" / nil
def nlist__address
return if NIL?
lpar; list = [address]; list << address until rpar?
list
end

alias env_from nlist__address
alias env_sender nlist__address
alias env_reply_to nlist__address
alias env_to nlist__address
alias env_cc nlist__address
alias env_bcc nlist__address

# date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
# SP time SP zone DQUOTE
alias date_time quoted
Expand Down Expand Up @@ -1877,61 +1893,40 @@ def resp_code_copy__data
UIDPlusData.new(validity, src_uids, dst_uids)
end

def address_list
token = lookahead
if token.symbol == T_NIL
shift_token
return nil
else
result = []
match(T_LPAR)
while true
token = lookahead
case token.symbol
when T_RPAR
shift_token
break
when T_SPACE
shift_token
end
result.push(address)
end
return result
end
end

ADDRESS_REGEXP = /\G\
(?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
(?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
(?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
(?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
\)/ni

ADDRESS_REGEXP = /\G
\( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
\s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
\s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
\s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
\)
/nix

# address = "(" addr-name SP addr-adl SP addr-mailbox SP
# addr-host ")"
# addr-adl = nstring
# addr-host = nstring
# addr-mailbox = nstring
# addr-name = nstring
def address
match(T_LPAR)
if @str.index(ADDRESS_REGEXP, @pos)
# address does not include literal.
@pos = $~.end(0)
name = $1
route = $2
mailbox = $3
host = $4
for s in [name, route, mailbox, host]
Patterns.unescape_quoted! s
end
else
name = nstring
match(T_SPACE)
route = nstring
match(T_SPACE)
mailbox = nstring
match(T_SPACE)
host = nstring
match(T_RPAR)
if (match = accept_re(ADDRESS_REGEXP))
# note that "NIL" isn't captured by the regexp
name, route, mailbox, host = match.captures
.map { Patterns.unescape_quoted _1 }
else # address may include literals
lpar; name = addr_name
SP!; route = addr_adl
SP!; mailbox = addr_mailbox
SP!; host = addr_host
rpar
end
return Address.new(name, route, mailbox, host)
Address.new(name, route, mailbox, host)
end

alias addr_adl nstring
alias addr_host nstring
alias addr_mailbox nstring
alias addr_name nstring

# flag-list = "(" [flag *(SP flag)] ")"
def flag_list
match_re(Patterns::FLAG_LIST, "flag-list")[1]
Expand Down

0 comments on commit d3facc2

Please sign in to comment.