Skip to content

Commit

Permalink
fix(api): keep proto contents in bytes for longer (#14446)
Browse files Browse the repository at this point in the history
When we parse python protocols, we were doing it by (1) making it into a
string with decode('utf-8') and then (2) passing the string ast.parse().
The problem with this is that decode('utf-8') does not apply "universal
newlines", which means that the code object created by compiling the ast
will have line numbers that are around twice what they should be under
certain circumstances (windows machine, crlf file, mercury in the
seventh house, etc). Then, when we go and display a nice error message
about a syntax error or whatever, the user says "why is this error
message pointing to a place past the end of my protocol".

This should fix that by keeping the protocol contents in bytes form all
the way through to passing ast.parse() a bytes that _has never been
through str.decode('utf-8')_ which should preserve everything.
  • Loading branch information
sfoster1 authored Feb 9, 2024
1 parent 277074a commit 8dc6f79
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 22 deletions.
38 changes: 25 additions & 13 deletions api/src/opentrons/protocols/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,9 @@ def version_from_string(vstr: str) -> APIVersion:
return APIVersion(major=int(matches.group(1)), minor=int(matches.group(2)))


def _parse_json(protocol_contents: str, filename: Optional[str] = None) -> JsonProtocol:
def _parse_json(
protocol_contents: Union[str, bytes], filename: Optional[str] = None
) -> JsonProtocol:
"""Parse a protocol known or at least suspected to be json"""
protocol_json = json.loads(protocol_contents)
version, validated = validate_json(protocol_json)
Expand All @@ -208,7 +210,7 @@ def _parse_json(protocol_contents: str, filename: Optional[str] = None) -> JsonP


def _parse_python(
protocol_contents: str,
protocol_contents: Union[str, bytes],
python_parse_mode: PythonParseMode,
filename: Optional[str] = None,
bundled_labware: Optional[Dict[str, "LabwareDefinition"]] = None,
Expand Down Expand Up @@ -338,28 +340,37 @@ def parse(
)
return result
else:
if isinstance(protocol_file, bytes):
protocol_str = protocol_file.decode("utf-8")
else:
protocol_str = protocol_file

if filename and filename.endswith(".json"):
return _parse_json(protocol_str, filename)
return _parse_json(protocol_file, filename)
elif filename and filename.endswith(".py"):
return _parse_python(
protocol_contents=protocol_str,
protocol_contents=protocol_file,
python_parse_mode=python_parse_mode,
filename=filename,
extra_labware=extra_labware,
bundled_data=extra_data,
)

# our jsonschema says the top level json kind is object
if protocol_str and protocol_str[0] in ("{", b"{"):
return _parse_json(protocol_str, filename)
# our jsonschema says the top level json kind is object so we can
# rely on it starting with a { if it's valid. that could either be
# a string or bytes.
#
# if it's a string, then if the protocol file starts with a { and
# we do protocol_file[0] then we get the string "{".
#
# if it's a bytes, then if the protocol file starts with the ascii or
# utf-8 representation of { and we do protocol_file[0] we get 123,
# because while single elements of strings are strings, single elements
# of bytes are the byte value as a number.
#
# to get that number we could either use ord() or do what we do here
# which I think is a little nicer, if any of the above can be called
# "nice".
if protocol_file and protocol_file[0] in ("{", b"{"[0]):
return _parse_json(protocol_file, filename)
else:
return _parse_python(
protocol_contents=protocol_str,
protocol_contents=protocol_file,
python_parse_mode=python_parse_mode,
filename=filename,
extra_labware=extra_labware,
Expand Down Expand Up @@ -499,6 +510,7 @@ def _version_from_static_python_info(
"""
from_requirements = (static_python_info.requirements or {}).get("apiLevel", None)
from_metadata = (static_python_info.metadata or {}).get("apiLevel", None)

requested_level = from_requirements or from_metadata
if requested_level is None:
return None
Expand Down
10 changes: 8 additions & 2 deletions api/src/opentrons/protocols/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ class StaticPythonInfo:

@dataclass(frozen=True)
class _ProtocolCommon:
text: str
text: Union[str, bytes]
"""The original text of the protocol file in the format it was specified with.
This leads to a wide type but it is actually quite important that we do not ever
str.decode('utf-8') this because it will break the interpreter's understanding of
line numbers for if we have to format an exception.
"""

filename: Optional[str]
"""The original name of the main protocol file, if it had a name.
Expand Down Expand Up @@ -74,7 +80,7 @@ class PythonProtocol(_ProtocolCommon):


class BundleContents(NamedTuple):
protocol: str
protocol: Union[str, bytes]
bundled_labware: Dict[str, "LabwareDefinition"]
bundled_data: Dict[str, bytes]
bundled_python: Dict[str, str]
Expand Down
5 changes: 4 additions & 1 deletion api/src/opentrons/util/entrypoint_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,10 @@ def adapt_protocol_source(protocol: Protocol) -> Generator[ProtocolSource, None,
# through the filesystem. https://opentrons.atlassian.net/browse/RSS-281

main_file = pathlib.Path(temporary_directory) / main_file_name
main_file.write_text(protocol.text, encoding="utf-8")
if isinstance(protocol.text, str):
main_file.write_text(protocol.text, encoding="utf-8")
else:
main_file.write_bytes(protocol.text)

labware_files: List[pathlib.Path] = []
if isinstance(protocol, PythonProtocol) and protocol.extra_labware is not None:
Expand Down
15 changes: 9 additions & 6 deletions api/tests/opentrons/protocols/test_parse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from textwrap import dedent
from typing import Any, Callable, Optional, Union
from typing import Any, Callable, Optional, Union, Literal

import pytest
from opentrons_shared_data.robot.dev_types import RobotType
Expand Down Expand Up @@ -407,7 +407,7 @@ def run(ctx): pass
@pytest.mark.parametrize("filename", ["protocol.py", None])
def test_parse_python_details(
protocol_source: str,
protocol_text_kind: str,
protocol_text_kind: Literal["str", "bytes"],
filename: Optional[str],
expected_api_level: APIVersion,
expected_robot_type: RobotType,
Expand All @@ -423,8 +423,11 @@ def test_parse_python_details(
parsed = parse(text, filename)

assert isinstance(parsed, PythonProtocol)
assert parsed.text == protocol_source
assert isinstance(parsed.text, str)
assert parsed.text == text
if protocol_text_kind == "str":
assert isinstance(parsed.text, str)
else:
assert isinstance(parsed.text, bytes)

assert parsed.filename == filename
assert parsed.contents.co_filename == (
Expand Down Expand Up @@ -454,13 +457,13 @@ def test_parse_json_details(
get_json_protocol_fixture: Callable[..., Any],
fixture_version: str,
fixture_name: str,
protocol_text_kind: str,
protocol_text_kind: Literal["str", "bytes"],
filename: str,
) -> None:
protocol = get_json_protocol_fixture(
fixture_version=fixture_version, fixture_name=fixture_name, decode=False
)
if protocol_text_kind == "text":
if protocol_text_kind == "str":
protocol_text: Union[bytes, str] = protocol
else:
protocol_text = protocol.encode("utf-8")
Expand Down

0 comments on commit 8dc6f79

Please sign in to comment.