Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image2latex #751

Merged
merged 12 commits into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 89 additions & 8 deletions mathics/builtin/box/image.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,25 @@
# -*- coding: utf-8 -*-

import base64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without a docstring at the top, this doesn't appear in any printed documentation or in Django docs. If that is intended, then the homegrown tagging such as ImageBox (on line 26) is not used.

We should decide way we want to go and either add a docstring at the top or remove the homegrown tagging.

import tempfile
from copy import deepcopy
from io import BytesIO
from typing import Tuple

from mathics.builtin.box.expression import BoxExpression
from mathics.eval.image import pixels_as_ubyte

try:
Copy link
Member

@rocky rocky Jan 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No try please. PIL is now required .

(And in the far future when image processing is moved out of Mathics-core PIL will still be required in that package. Let's not complicate.)

import warnings

import PIL
import PIL.Image
import PIL.ImageEnhance
import PIL.ImageFilter
import PIL.ImageOps

except ImportError:
pass


class ImageBox(BoxExpression):
Expand All @@ -13,18 +32,80 @@ class ImageBox(BoxExpression):
an Image object.
"""

def boxes_to_b64text(self, elements=None, **options):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type annotation and a docstring would be nice here.

contents, size = self.boxes_to_png(elements, **options)
encoded = base64.b64encode(contents)
encoded = b"data:image/png;base64," + encoded
return encoded, size

def boxes_to_png(self, elements=None, **options) -> Tuple:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type is more than just a generic tuple it is something like Tuple[<type of contents>, Tuple[int, int]]

Please add and fill in "type of contents".

"""
returns a tuple with the set of bytes with a png representation of the image
and the scaled size.
"""
image = self.elements[0] if elements is None else elements[0]

pixels = pixels_as_ubyte(image.color_convert("RGB", True).pixels)
shape = pixels.shape

width = shape[1]
height = shape[0]
scaled_width = width
scaled_height = height

# If the image was created from PIL, use that rather than
# reconstruct it from pixels which we can get wrong.
# In particular getting color-mapping info right can be
# tricky.
if hasattr(image, "pillow"):
pillow = deepcopy(image.pillow)
else:
pixels_format = "RGBA" if len(shape) >= 3 and shape[2] == 4 else "RGB"
pillow = PIL.Image.fromarray(pixels, pixels_format)

# if the image is very small, scale it up using nearest neighbour.
min_size = 128
if width < min_size and height < min_size:
scale = min_size / max(width, height)
scaled_width = int(scale * width)
scaled_height = int(scale * height)
pillow = pillow.resize(
(scaled_height, scaled_width), resample=PIL.Image.NEAREST
)

with warnings.catch_warnings():
warnings.simplefilter("ignore")

stream = BytesIO()
pillow.save(stream, format="png")
stream.seek(0)
contents = stream.read()
stream.close()

return (contents, (scaled_width, scaled_height))

def boxes_to_text(self, elements=None, **options):
return "-Image-"

def boxes_to_mathml(self, elements=None, **options):
if elements is None:
elements = self._elements
encoded, size = self.boxes_to_b64text(elements, **options)
# see https://tools.ietf.org/html/rfc2397
return '<mglyph src="%s" width="%dpx" height="%dpx" />' % (
elements[0].get_string_value(),
elements[1].get_int_value(),
elements[2].get_int_value(),
)
return '<mglyph src="%s" width="%dpx" height="%dpx" />' % (encoded, *size)
rocky marked this conversation as resolved.
Show resolved Hide resolved

def boxes_to_tex(self, elements=None, **options):
return "-Image-"
data, size = self.boxes_to_png(elements, **options)
res = 100 # pixels/cm
width_str, height_str = (str(n / res).strip() for n in size)
head = rf"\includegraphics[width={width_str}cm,height={height_str}cm]"

# This produces a random name, where the png file is going to be stored.
# LaTeX does not have a native way to store an figure embeded in
# the source.
fp = tempfile.NamedTemporaryFile(delete=True, suffix=".png")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using pillow routines for writing PNG.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conversion was done when Image is converted in ImageBox. imageBox stores a B64 encoded version of the PNG file. Here we just decode it and store it in a file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If ImageBox can keep the pillow structure, that may be a win. A more general problem we have is that in digesting things for M-Expressions we lose the efficient and sometimes more flexible properties of whatever the object was before. And we spend a lot of time in conversion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is done now.

path = fp.name
fp.close()

with open(path, "wb") as imgfile:
imgfile.write(data)

return head + "{" + format(path) + "}"
53 changes: 2 additions & 51 deletions mathics/builtin/image/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import base64
from copy import deepcopy
from io import BytesIO
from typing import Tuple

from mathics.builtin.base import AtomBuiltin, String
from mathics.builtin.box.image import ImageBox
from mathics.builtin.colors.color_internals import convert_color
from mathics.core.atoms import Atom, Integer
from mathics.core.atoms import Atom
from mathics.core.evaluation import Evaluation
from mathics.core.expression import Expression
from mathics.core.list import ListExpression
Expand All @@ -17,8 +14,6 @@


try:
import warnings

import numpy
import PIL
import PIL.Image
Expand Down Expand Up @@ -91,51 +86,7 @@ def atom_to_boxes(self, form, evaluation: Evaluation) -> ImageBox:
"""
Converts our internal Image object into a PNG base64-encoded.
"""
pixels = pixels_as_ubyte(self.color_convert("RGB", True).pixels)
shape = pixels.shape

width = shape[1]
height = shape[0]
scaled_width = width
scaled_height = height

# If the image was created from PIL, use that rather than
# reconstruct it from pixels which we can get wrong.
# In particular getting color-mapping info right can be
# tricky.
if hasattr(self, "pillow"):
pillow = deepcopy(self.pillow)
else:
pixels_format = "RGBA" if len(shape) >= 3 and shape[2] == 4 else "RGB"
pillow = PIL.Image.fromarray(pixels, pixels_format)

# if the image is very small, scale it up using nearest neighbour.
min_size = 128
if width < min_size and height < min_size:
scale = min_size / max(width, height)
scaled_width = int(scale * width)
scaled_height = int(scale * height)
pillow = pillow.resize(
(scaled_height, scaled_width), resample=PIL.Image.NEAREST
)

with warnings.catch_warnings():
warnings.simplefilter("ignore")

stream = BytesIO()
pillow.save(stream, format="png")
stream.seek(0)
contents = stream.read()
stream.close()

encoded = base64.b64encode(contents)
encoded = b"data:image/png;base64," + encoded

return ImageBox(
String(encoded.decode("utf-8")),
Integer(scaled_width),
Integer(scaled_height),
)
return ImageBox(self)

# __hash__ is defined so that we can store Number-derived objects
# in a set or dictionary.
Expand Down
8 changes: 4 additions & 4 deletions mathics/doc/latex_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@
r"(?P<content>.*?)\\end\{(?P=tag)\}"
)

LATEX_TESTOUT_DELIM_RE = re.compile(r",")
NUMBER_RE = re.compile(r"(\d*(?<!\.)\.\d+|\d+\.(?!\.)\d*|\d+)")
LATEX_TESTOUT_DELIM_RE = re.compile(r", ")
NUMBER_RE = re.compile(r"([ -])(\d*(?<!\.)\.\d+|\d+\.(?!\.)\d*|\d+)")
rocky marked this conversation as resolved.
Show resolved Hide resolved
OUTSIDE_ASY_RE = re.compile(r"(?s)((?:^|\\end\{asy\}).*?(?:$|\\begin\{asy\}))")


Expand Down Expand Up @@ -366,13 +366,13 @@ def repl_text(match):
return text

def repl_out_delim(match):
return ",\\allowbreak{}"
return ",\\allowbreak{} "

def repl_number(match):
guard = r"\allowbreak{}"
inter_groups_pre = r"\,\discretionary{\~{}}{\~{}}{}"
inter_groups_post = r"\discretionary{\~{}}{\~{}}{}"
number = match.group(1)
number = match.group(1) + match.group(2)
parts = number.split(".")
if len(number) <= 3:
return number
Expand Down