Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image2latex #751

Merged
merged 12 commits into from
Jan 17, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 99 additions & 11 deletions mathics/builtin/box/image.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
# -*- coding: utf-8 -*-
"""
Boxing Routines for raster images.
"""

import base64
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without a docstring at the top, this doesn't appear in any printed documentation or in Django docs. If that is intended, then the homegrown tagging such as ImageBox (on line 26) is not used.

We should decide way we want to go and either add a docstring at the top or remove the homegrown tagging.

import tempfile
import warnings
from copy import deepcopy
from io import BytesIO
from typing import Tuple

import PIL
import PIL.Image

from mathics.builtin.box.expression import BoxExpression
from mathics.core.element import BaseElement
from mathics.eval.image import pixels_as_ubyte


class ImageBox(BoxExpression):
Expand All @@ -13,18 +28,91 @@ class ImageBox(BoxExpression):
an Image object.
"""

def boxes_to_text(self, elements=None, **options):
def boxes_to_b64text(
self, elements: Tuple[BaseElement] = None, **options
) -> Tuple[bytes, Tuple[int, int]]:
"""
Produces a base64 png representation and a tuple with the size of the pillow image
associated to the object.
"""
contents, size = self.boxes_to_png(elements, **options)
encoded = base64.b64encode(contents)
encoded = b"data:image/png;base64," + encoded
return (encoded, size)

def boxes_to_png(self, elements=None, **options) -> Tuple[bytes, Tuple[int, int]]:
"""
returns a tuple with the set of bytes with a png representation of the image
and the scaled size.
"""
image = self.elements[0] if elements is None else elements[0]

pixels = pixels_as_ubyte(image.color_convert("RGB", True).pixels)
shape = pixels.shape

width = shape[1]
height = shape[0]
scaled_width = width
scaled_height = height

# If the image was created from PIL, use that rather than
# reconstruct it from pixels which we can get wrong.
# In particular getting color-mapping info right can be
# tricky.
if hasattr(image, "pillow"):
pillow = deepcopy(image.pillow)
else:
pixels_format = "RGBA" if len(shape) >= 3 and shape[2] == 4 else "RGB"
pillow = PIL.Image.fromarray(pixels, pixels_format)

# if the image is very small, scale it up using nearest neighbour.
min_size = 128
if width < min_size and height < min_size:
scale = min_size / max(width, height)
scaled_width = int(scale * width)
scaled_height = int(scale * height)
pillow = pillow.resize(
(scaled_height, scaled_width), resample=PIL.Image.NEAREST
)

with warnings.catch_warnings():
warnings.simplefilter("ignore")

stream = BytesIO()
pillow.save(stream, format="png")
stream.seek(0)
contents = stream.read()
stream.close()

return (contents, (scaled_width, scaled_height))

def boxes_to_text(self, elements=None, **options) -> str:
return "-Image-"

def boxes_to_mathml(self, elements=None, **options):
if elements is None:
elements = self._elements
def boxes_to_mathml(self, elements=None, **options) -> str:
encoded, size = self.boxes_to_b64text(elements, **options)
# see https://tools.ietf.org/html/rfc2397
return '<mglyph src="%s" width="%dpx" height="%dpx" />' % (
elements[0].get_string_value(),
elements[1].get_int_value(),
elements[2].get_int_value(),
)
return f'<mglyph src="{encoded}" width="{size[0]}px" height="{size[1]}px" />'

def boxes_to_tex(self, elements=None, **options):
return "-Image-"
def boxes_to_tex(self, elements=None, **options) -> str:
"""
Store the associated image as a png file and return
a LaTeX command for including it.
"""

data, size = self.boxes_to_png(elements, **options)
res = 100 # pixels/cm
width_str, height_str = (str(n / res).strip() for n in size)
head = rf"\includegraphics[width={width_str}cm,height={height_str}cm]"

# This produces a random name, where the png file is going to be stored.
# LaTeX does not have a native way to store an figure embeded in
# the source.
fp = tempfile.NamedTemporaryFile(delete=True, suffix=".png")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider using pillow routines for writing PNG.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The conversion was done when Image is converted in ImageBox. imageBox stores a B64 encoded version of the PNG file. Here we just decode it and store it in a file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If ImageBox can keep the pillow structure, that may be a win. A more general problem we have is that in digesting things for M-Expressions we lose the efficient and sometimes more flexible properties of whatever the object was before. And we spend a lot of time in conversion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is done now.

path = fp.name
fp.close()

with open(path, "wb") as imgfile:
imgfile.write(data)

return head + "{" + format(path) + "}"
69 changes: 8 additions & 61 deletions mathics/builtin/image/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import base64
from copy import deepcopy
from io import BytesIO
from typing import Tuple

from mathics.builtin.base import AtomBuiltin, String
from mathics.builtin.box.image import ImageBox
from mathics.builtin.colors.color_internals import convert_color
from mathics.core.atoms import Atom, Integer
from mathics.core.atoms import Atom
from mathics.core.evaluation import Evaluation
from mathics.core.expression import Expression
from mathics.core.list import ListExpression
Expand All @@ -16,18 +13,12 @@
_skimage_requires = ("skimage", "scipy", "matplotlib", "networkx")


try:
import warnings

import numpy
import PIL
import PIL.Image
import PIL.ImageEnhance
import PIL.ImageFilter
import PIL.ImageOps

except ImportError:
pass
import numpy
import PIL
import PIL.Image
import PIL.ImageEnhance
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not seeing PIL.ImageFilter or PIL.ImageOps used anywhere (nor a plain PIL for that matter).

Probably left over cut and paste from older code that had this.

import PIL.ImageFilter
import PIL.ImageOps


def _image_pixels(matrix):
Expand Down Expand Up @@ -91,51 +82,7 @@ def atom_to_boxes(self, form, evaluation: Evaluation) -> ImageBox:
"""
Converts our internal Image object into a PNG base64-encoded.
"""
pixels = pixels_as_ubyte(self.color_convert("RGB", True).pixels)
shape = pixels.shape

width = shape[1]
height = shape[0]
scaled_width = width
scaled_height = height

# If the image was created from PIL, use that rather than
# reconstruct it from pixels which we can get wrong.
# In particular getting color-mapping info right can be
# tricky.
if hasattr(self, "pillow"):
pillow = deepcopy(self.pillow)
else:
pixels_format = "RGBA" if len(shape) >= 3 and shape[2] == 4 else "RGB"
pillow = PIL.Image.fromarray(pixels, pixels_format)

# if the image is very small, scale it up using nearest neighbour.
min_size = 128
if width < min_size and height < min_size:
scale = min_size / max(width, height)
scaled_width = int(scale * width)
scaled_height = int(scale * height)
pillow = pillow.resize(
(scaled_height, scaled_width), resample=PIL.Image.NEAREST
)

with warnings.catch_warnings():
warnings.simplefilter("ignore")

stream = BytesIO()
pillow.save(stream, format="png")
stream.seek(0)
contents = stream.read()
stream.close()

encoded = base64.b64encode(contents)
encoded = b"data:image/png;base64," + encoded

return ImageBox(
String(encoded.decode("utf-8")),
Integer(scaled_width),
Integer(scaled_height),
)
return ImageBox(self)

# __hash__ is defined so that we can store Number-derived objects
# in a set or dictionary.
Expand Down
18 changes: 14 additions & 4 deletions mathics/doc/latex_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,18 @@
r"(?P<content>.*?)\\end\{(?P=tag)\}"
)

LATEX_TESTOUT_DELIM_RE = re.compile(r",")
NUMBER_RE = re.compile(r"(\d*(?<!\.)\.\d+|\d+\.(?!\.)\d*|\d+)")
LATEX_TESTOUT_DELIM_RE = re.compile(r", ")

# The goal of the following pattern is to enclose the numbers included in
# expressions produced by tests between ```\allowbreak{}```. The pattern matches
# with negative numbers or positive numbers preceded by a space character.
# To avoid applying the replacement, what is needed if the number is part of a
# LaTeX parameter (for instance ```\includegraphics[width=5cm]{...}```)
# the space before the number must be avoided. For example,
# ```\includegraphics[width= 5cm]{...}``` must be rewritten as
# \includegraphics[width=\allowbreak{}5\allowbreak{}cm]{...} which is not a valid
# LaTeX command.
NUMBER_RE = re.compile(r"([ -])(\d*(?<!\.)\.\d+|\d+\.(?!\.)\d*|\d+)")
rocky marked this conversation as resolved.
Show resolved Hide resolved
OUTSIDE_ASY_RE = re.compile(r"(?s)((?:^|\\end\{asy\}).*?(?:$|\\begin\{asy\}))")


Expand Down Expand Up @@ -366,13 +376,13 @@ def repl_text(match):
return text

def repl_out_delim(match):
return ",\\allowbreak{}"
return ",\\allowbreak{} "

def repl_number(match):
guard = r"\allowbreak{}"
inter_groups_pre = r"\,\discretionary{\~{}}{\~{}}{}"
inter_groups_post = r"\discretionary{\~{}}{\~{}}{}"
number = match.group(1)
number = match.group(1) + match.group(2)
parts = number.split(".")
if len(number) <= 3:
return number
Expand Down