Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

To detect the missing figures in the tex compilation, comb through the tex log and find the missing files. #18

Merged
merged 4 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions tex2pdf_service/tex2pdf/atomic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
Atomic: sugar for threading
"""
import threading
import typing


class AtomicInteger:
"""Atomic integer increment/decrement variable"""
def __init__(self, value:int = 0):
self._value = int(value)
self._lock = threading.Lock()

def increment(self, step: int = 1) -> int:
with self._lock:
self._value += int(step)
return self._value

def decrement(self, step: int = 1) -> int:
return self.increment(-step)

@property
def value(self) -> int:
with self._lock:
return self._value

@value.setter
def value(self, value: int) -> None:
with self._lock:
self._value = int(value)


class AtomicStrings:
"""Atomic String List"""
_value: list[str]

def __init__(self) -> None:
self._value = []
self._lock = threading.Lock()


def append(self, value: str) -> list[str]:
with self._lock:
self._value.append(value)
return self._value

@property
def value(self) -> list[str]:
with self._lock:
return self._value

@value.setter
def value(self, value: list[str]) -> None:
with self._lock:
self._value = value

@property
def unguarded_value(self) -> list[str]:
return self._value

class AtomicStringSet:
"""Atomic integer increment/decrement variable"""
_value: typing.Set[str]

def __init__(self) -> None:
self._value = set()
self._lock = threading.Lock()


def add(self, value: str) -> typing.Set[str]:
with self._lock:
self._value.add(value)
return self._value

@property
def value(self) -> typing.Set[str]:
with self._lock:
return self._value

@value.setter
def value(self, value: typing.Set[str]) -> None:
with self._lock:
self._value = value

@property
def unguarded_value(self) -> typing.Set[str]:
return self._value
55 changes: 55 additions & 0 deletions tex2pdf_service/tex2pdf/log_inspection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import typing
from multiprocessing.pool import ThreadPool
import re
from typing import Pattern

from .atomic import AtomicStringSet

# This triggers for .bbl as well
# r'^No file\s+(.*)\.$',

# make sure there is exactly one group catching the file name
TEX_LOG_ERRORS: typing.List[Pattern] = [
re.compile(exp) for exp in [
r'^\! LaTeX Error: File `([^\\\']*)\\\' not found\.',
r'^\! I can\'t find file `([^\\\']*)\\\'\.',
r'.*?:\d*: LaTeX Error: File `([^\\\']*)\\\' not found\.',
r'^LaTeX Warning: File `([^\\\']*)\\\' not found',
r'^Package .* [fF]ile `([^\\\']*)\\\' not found',
r'^Package .* No file `([^\\\']*)\\\'',
r'Error: pdflatex \(file ([^\)]*)\): cannot find image file',
r': File (.*) not found:\s*$',
r'! Unable to load picture or PDF file \'([^\\\']+)\'.',
r'Package pdftex.def Error: File (.*) not found: using draft setting\.',
]
]


def inspect_log(log: str,
patterns: typing.List[Pattern] | None = None,
break_on_found: bool = True) -> list[str]:
"""Run the list of regex against a blob string and count the matches.
log: The log blob
patterns: a list of regex patterns. default is TEX_LOG_ERRORS if not given.
break_on_found: stop the search at first found
"""
if patterns is None:
patterns = TEX_LOG_ERRORS
matched_results = AtomicStringSet()
log_lines = log.splitlines()

def _inspect(needle: re.Pattern) -> None:
for line in log_lines:
if (matched := needle.search(line)) is not None:
matched_results.add(matched.group(1))
if break_on_found:
break

with ThreadPool(processes=len(patterns)) as pool:
pool.map(_inspect, patterns)

return list(matched_results.unguarded_value)


if __name__ == '__main__':
print(inspect_log("\nNo file foo.\n"))
38 changes: 26 additions & 12 deletions tex2pdf_service/tex2pdf/tex_to_pdf_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from tex2pdf.service_logger import get_logger
from tex_inspection import (pick_package_names, ZeroZeroReadMe, is_pdftex_line,
is_pdflatex_line, find_pdfoutput_1, TEX_FILE_EXTS)
from .log_inspection import inspect_log

WITH_SHELL_ESCAPE = False

Expand Down Expand Up @@ -224,6 +225,17 @@ def _to_pdf_run(self, args: list[str], stem: str,
run["log"] = self.log
return run

def check_missing(self, in_dir: str, run: dict, artifact: str) -> dict:
"""Scoop up the missing files from the tex command log"""
name = run[artifact]["name"]
artifact_file = os.path.join(in_dir, name)
if os.path.exists(artifact_file) and (missings := inspect_log(run["log"])):
run["missings"] = missings
get_logger().debug(f"Output {name} deleted due to incomplete run.")
os.unlink(artifact_file)
run[artifact] = file_props(artifact_file)
pass
return run
pass

#
Expand Down Expand Up @@ -380,11 +392,13 @@ def _base_to_dvi_run(self, step: str, stem: str, args: typing.List[str],
run, out, err = self._exec_cmd(args, in_dir, work_dir, extra={"step": step})
dvi_filename = os.path.join(in_dir, f"{stem}.dvi")
self._check_cmd_run(run, dvi_filename)
self._report_run(run, out, err, step, in_dir, work_dir, "dvi", dvi_filename)
latex_log_file = os.path.join(in_dir, f"{stem}.log")
self.fetch_log(latex_log_file)
if self.log:
run["log"] = self.log
artifact = "dvi"
self._report_run(run, out, err, step, in_dir, work_dir, artifact, dvi_filename)
run = self.check_missing(in_dir, run, artifact)
return run

def _base_ps_to_pdf_run(self, stem: str, work_dir: str, in_dir: str, out_dir: str) -> dict:
Expand Down Expand Up @@ -597,15 +611,13 @@ def produce_pdf(self, tex_file: str, work_dir: str, in_dir: str, out_dir: str) -
status = "success" if run["return_code"] == 0 else "fail"
run["iteration"] = iteration
if return_code in [0, 1]:
with open(os.path.join(in_dir, f"{stem}.log"), encoding='iso-8859-1') as src:
for line in src.readlines():
if line.find(rerun_needle) >= 0:
# Need retry
status = "fail"
break
else:
status = "success"
pass
for line in run["log"].splitlines():
if line.find(rerun_needle) >= 0:
# Need retry
status = "fail"
break
else:
status = "success"
pass
pass
outcome.update({"runs": self.runs, "status": status, "step": step})
Expand All @@ -619,8 +631,10 @@ def produce_pdf(self, tex_file: str, work_dir: str, in_dir: str, out_dir: str) -

def _pdflatex_run(self, step: str, work_dir: str, in_dir: str, out_dir: str) -> dict:
cmd_log = os.path.join(in_dir, f"{self.stem}.log")
return self._to_pdf_run(self.to_pdf_args, self.stem,
step, work_dir, in_dir, out_dir, cmd_log)
run = self._to_pdf_run(self.to_pdf_args, self.stem,
step, work_dir, in_dir, out_dir, cmd_log)
run = self.check_missing(in_dir, run, "pdf")
return run

def converter_name(self) -> str:
return "pdflatex: %s" % (shlex.join(self.to_pdf_args))
Expand Down
Loading