-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Python 3: Convert some unicode/bytes uses #3569
Changes from 11 commits
cb6b689
8801cb8
e729bdf
a14df28
ede1ace
f8172ef
df8f3e3
e0bf614
f0a00f0
35a41ab
4831ead
f04033e
e1bdb58
521a920
58df4a0
f152316
9fc33fd
6040be4
6745999
ed08bcb
da3502a
616864e
e876bcd
d5b735e
3cc58ea
b3a8de6
bfe288c
df8c45a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Unicode passwords are now normalised before hashing, preventing the instance where two different devices or browsers might send a different UTF-8 sequence for the password. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
# limitations under the License. | ||
|
||
import logging | ||
import unicodedata | ||
|
||
import attr | ||
import bcrypt | ||
|
@@ -855,8 +856,16 @@ def hash(self, password): | |
Deferred(str): Hashed password. | ||
""" | ||
def _do_hash(): | ||
return bcrypt.hashpw(password.encode('utf8') + self.hs.config.password_pepper, | ||
bcrypt.gensalt(self.bcrypt_rounds)) | ||
# Ensure that we normalise the password | ||
if isinstance(password, bytes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't we decide if it's meant to be a str or a bytes? or document it as such if it's really meant to be either? (also, shouldn't this be written:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be a nicer way of writing it, but password isn't in a writable scope (because it's inside another function). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wrt the type -- it's being passed in as str, but since it's not clear that it would only be decoded on py2, I've changed it to be a Py3 check. |
||
pw = unicodedata.normalize("NFKC", password.decode('utf8')) | ||
else: | ||
pw = unicodedata.normalize("NFKC", password) | ||
|
||
return bcrypt.hashpw( | ||
pw.encode('utf8') + self.hs.config.password_pepper.encode("utf8"), | ||
bcrypt.gensalt(self.bcrypt_rounds), | ||
) | ||
|
||
return make_deferred_yieldable( | ||
threads.deferToThreadPool( | ||
|
@@ -876,8 +885,12 @@ def validate_hash(self, password, stored_hash): | |
""" | ||
|
||
def _do_validate_hash(): | ||
if isinstance(password, bytes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as above |
||
pw = unicodedata.normalize("NFKC", password.decode('utf8')) | ||
else: | ||
pw = unicodedata.normalize("NFKC", password) | ||
return bcrypt.checkpw( | ||
password.encode('utf8') + self.hs.config.password_pepper, | ||
pw.encode('utf8') + self.hs.config.password_pepper.encode("utf8"), | ||
stored_hash.encode('utf8') | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,12 +13,13 @@ | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import cgi | ||
import collections | ||
import logging | ||
import urllib | ||
|
||
from six.moves import http_client | ||
from six import PY3 | ||
from six.moves import http_client, urllib | ||
|
||
from canonicaljson import encode_canonical_json, encode_pretty_printed_json, json | ||
|
||
|
@@ -264,6 +265,7 @@ def __init__(self, hs, canonical_json=True): | |
self.hs = hs | ||
|
||
def register_paths(self, method, path_patterns, callback): | ||
method = method.encode("utf-8") # method is bytes on py3 | ||
for path_pattern in path_patterns: | ||
logger.debug("Registering for %s %s", method, path_pattern.pattern) | ||
self.path_regexs.setdefault(method, []).append( | ||
|
@@ -296,8 +298,14 @@ def _async_render(self, request): | |
# here. If it throws an exception, that is handled by the wrapper | ||
# installed by @request_handler. | ||
|
||
def _parse(s): | ||
if PY3: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why don't we decode when we have PY3? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Given the URL encoded sequence string "%E2%98%83", encoded as a unicode string: Python2:
This is wrong, it returns Unicode but the ASCII escaped character codes. Python 2:
Correct, returns the Unicode literal, but escaped for display as Py2 will usually not print real Unicode characters by itself. Python 3:
Correct, returns the Unicode literal (not escaped, as Python 3 has the correct terminal encoding set up). |
||
return urllib.parse.unquote(s) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we not utf-8 decode here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's already decoded by |
||
else: | ||
return urllib.parse.unquote(s.encode('utf8')).decode('utf8') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we now encoding this when we weren't before? And why doesn't it need to happen on py3? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We aren't encoding this because we now parse the incoming arguments to Unicode in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (as to why we encode it on py2, see https://github.com/matrix-org/synapse/pull/3569/files/4831ead29a575a299d8e377f26bc8fc35dc49cc2#r205017485 ) |
||
|
||
kwargs = intern_dict({ | ||
name: urllib.unquote(value).decode("UTF-8") if value else value | ||
name: _parse(value) if value else value | ||
for name, value in group_dict.items() | ||
}) | ||
|
||
|
@@ -327,7 +335,7 @@ def _get_handler_for_request(self, request): | |
# Loop through all the registered callbacks to check if the method | ||
# and path regex match | ||
for path_entry in self.path_regexs.get(request.method, []): | ||
m = path_entry.pattern.match(request.path) | ||
m = path_entry.pattern.match(request.path.decode()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
if m: | ||
# We found a match! | ||
return path_entry.callback, m.groupdict() | ||
|
@@ -383,7 +391,7 @@ def __init__(self, path): | |
self.url = path | ||
|
||
def render_GET(self, request): | ||
return redirectTo(self.url, request) | ||
return redirectTo(self.url.encode(), request) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. likewise There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed |
||
|
||
def getChild(self, name, request): | ||
if len(name) == 0: | ||
|
@@ -404,12 +412,14 @@ def respond_with_json(request, code, json_object, send_cors=False, | |
return | ||
|
||
if pretty_print: | ||
json_bytes = encode_pretty_printed_json(json_object) + "\n" | ||
json_bytes = (encode_pretty_printed_json(json_object) + "\n" | ||
).encode("utf-8") | ||
else: | ||
if canonical_json or synapse.events.USE_FROZEN_DICTS: | ||
# canonicaljson already encodes to bytes | ||
json_bytes = encode_canonical_json(json_object) | ||
else: | ||
json_bytes = json.dumps(json_object) | ||
json_bytes = json.dumps(json_object).encode("utf-8") | ||
|
||
return respond_with_json_bytes( | ||
request, code, json_bytes, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
from collections import OrderedDict, deque, namedtuple | ||
from functools import wraps | ||
|
||
from six import iteritems, itervalues | ||
from six import PY3, iteritems, itervalues | ||
from six.moves import range | ||
|
||
from canonicaljson import json | ||
|
@@ -65,7 +65,10 @@ | |
|
||
|
||
def encode_json(json_object): | ||
return frozendict_json_encoder.encode(json_object) | ||
if PY3: | ||
return frozendict_json_encoder.encode(json_object) | ||
else: | ||
return frozendict_json_encoder.encode(json_object).decode("utf-8") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we only do this for py2? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. json/simplejson returns There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (also the decodes in the functions below have been moved to this one spot) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok comments please to explain this. |
||
|
||
|
||
class _EventPeristenceQueue(object): | ||
|
@@ -981,7 +984,7 @@ def _update_outliers_txn(self, txn, events_and_contexts): | |
|
||
metadata_json = encode_json( | ||
event.internal_metadata.get_dict() | ||
).decode("UTF-8") | ||
) | ||
|
||
sql = ( | ||
"UPDATE event_json SET internal_metadata = ?" | ||
|
@@ -1095,8 +1098,8 @@ def event_dict(event): | |
"room_id": event.room_id, | ||
"internal_metadata": encode_json( | ||
event.internal_metadata.get_dict() | ||
).decode("UTF-8"), | ||
"json": encode_json(event_dict(event)).decode("UTF-8"), | ||
), | ||
"json": encode_json(event_dict(event)), | ||
} | ||
for event, _ in events_and_contexts | ||
], | ||
|
@@ -1115,7 +1118,7 @@ def event_dict(event): | |
"type": event.type, | ||
"processed": True, | ||
"outlier": event.internal_metadata.is_outlier(), | ||
"content": encode_json(event.content).decode("UTF-8"), | ||
"content": encode_json(event.content), | ||
"origin_server_ts": int(event.origin_server_ts), | ||
"received_ts": self._clock.time_msec(), | ||
"sender": event.sender, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,7 +82,16 @@ def _get_event_reference_hashes_txn(self, txn, event_id): | |
" WHERE event_id = ?" | ||
) | ||
txn.execute(query, (event_id, )) | ||
return {k: v for k, v in txn} | ||
if six.PY2: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we not use the PY3 path under py2 as well? |
||
return {k: v for k, v in txn} | ||
else: | ||
done = {} | ||
for k, v in txn: | ||
if not isinstance(v, bytes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why does it vary? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It shouldn't, so I removed the if. |
||
done[k] = v.encode('ascii') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we update the docstring to note that this returns a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
else: | ||
done[k] = v | ||
return done | ||
|
||
|
||
class SignatureStore(SignatureWorkerStore): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
s/str/bytes/ ?