Skip to content

Commit

Permalink
Improvements follow @gmischler code review
Browse files Browse the repository at this point in the history
  • Loading branch information
Lucas-C committed Jul 2, 2024
1 parent 03fc88f commit a3792ea
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 14 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ This can also be enabled programmatically with `warnings.simplefilter('default',
* feature to identify the Unicode script of the input text and break it into fragments when different scripts are used, improving [text shaping](https://py-pdf.github.io/fpdf2/TextShaping.html) results
* [`FPDF.image()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.image): now handles `keep_aspect_ratio` in combination with an enum value provided to `x`
* [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html): now supports CSS page breaks properties : [documentation](https://py-pdf.github.io/fpdf2/HTML.html#page-breaks)
* [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html): new optional `font_family` parameter to set the default font family
* [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html): spacing before lists can now be adjusted via the `tag_styles` attribute - thanks to @lcgeneralprojects
* file names are mentioned in errors when `fpdf2` fails to parse a SVG image
### Fixed
Expand Down
22 changes: 22 additions & 0 deletions docs/HTML.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,28 @@ pdf.output("html_dd_indented.pdf")
and that some [`FontFace`](https://py-pdf.github.io/fpdf2/fpdf/fonts.html#fpdf.fonts.FontFace) or [`TextStyle`](https://py-pdf.github.io/fpdf2/fpdf/fonts.html#fpdf.fonts.TextStyle) properties may not be honored.
However, **Pull Request are welcome** to implement missing features!

### Default font

_New in [:octicons-tag-24: 2.7.10](https://github.com/py-pdf/fpdf2/blob/master/CHANGELOG.md)_

The default font used by [`FPDF.write_html()`](https://py-pdf.github.io/fpdf2/fpdf/fpdf.html#fpdf.fpdf.FPDF.write_html) is **Times**.

You can change this default font by passing `font_family` to this method:
```python
from fpdf import FPDF

pdf = FPDF()
pdf.add_page()
pdf.write_html("""
<h1>Big title</h1>
<section>
<h2>Section title</h2>
<p>Hello world!</p>
</section>
""", font_family="Helvetica")
pdf.output("html_helvetica.pdf")
```


## Supported HTML features

Expand Down
42 changes: 28 additions & 14 deletions fpdf/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
DEGREE_WIN1252 = "\xb0"
HEADING_TAGS = ("h1", "h2", "h3", "h4", "h5", "h6")
DEFAULT_TAG_STYLES = {
# inline tags:
# Inline tags are FontFace instances :
"a": FontFace(color="#00f", emphasis="UNDERLINE"),
"b": FontFace(emphasis="BOLD"),
"code": FontFace(family="Courier"),
Expand All @@ -32,7 +32,7 @@
"i": FontFace(emphasis="ITALICS"),
"strong": FontFace(emphasis="BOLD"),
"u": FontFace(emphasis="UNDERLINE"),
# block tags:
# Block tags are TextStyle instances :
"blockquote": TextStyle(color="#64002d", t_margin=3, b_margin=3),
"center": TextStyle(t_margin=4 + 7 / 30),
"dd": TextStyle(l_margin=10),
Expand Down Expand Up @@ -61,9 +61,20 @@
"ol": TextStyle(t_margin=2),
"ul": TextStyle(t_margin=2),
}
INLINE_TAGS = HEADING_TAGS + ("a", "b", "code", "em", "font", "i", "strong", "u")
BLOCK_TAGS = ("blockquote", "center", "dd", "dt", "li", "p", "pre", "ol", "ul")
assert (set(BLOCK_TAGS) | set(INLINE_TAGS)) == set(DEFAULT_TAG_STYLES.keys())
INLINE_TAGS = ("a", "b", "code", "em", "font", "i", "strong", "u")
BLOCK_TAGS = HEADING_TAGS + (
"blockquote",
"center",
"dd",
"dt",
"li",
"p",
"pre",
"ol",
"ul",
)
# This defensive programming check ensures that we do not forget any tag in the 2 *_TAGS constants above:
assert (set(BLOCK_TAGS) ^ set(INLINE_TAGS)) == set(DEFAULT_TAG_STYLES.keys())

# Pattern to substitute whitespace sequences with a single space character each.
# The following are all Unicode characters with White_Space classification plus the newline.
Expand Down Expand Up @@ -300,6 +311,7 @@ def __init__(
warn_on_tags_not_matching=True,
tag_indents=None,
tag_styles=None,
font_family="times",
):
"""
Args:
Expand All @@ -322,6 +334,7 @@ def __init__(
tag_indents (dict): [**DEPRECATED since v2.7.10**]
mapping of HTML tag names to numeric values representing their horizontal left identation. - Set `tag_styles` instead
tag_styles (dict[str, fpdf.fonts.TextStyle]): mapping of HTML tag names to `fpdf.TextStyle` or `fpdf.FontFace` instances
font_family (str): optional font family. Default to Times.
"""
super().__init__()
self.pdf = pdf
Expand All @@ -342,7 +355,7 @@ def __init__(
# If a font was defined previously, we reinstate that seperately after we're finished here.
# In this case the TOC will be rendered with that font and not ours. But adding a TOC tag only
# makes sense if the whole document gets converted from HTML, so this should be acceptable.
self.font_family = pdf.font_family or "times"
self.font_family = pdf.font_family or font_family
self.font_size_pt = pdf.font_size_pt
self.set_font(
family=self.font_family, emphasis=TextEmphasis.NONE, set_default=True
Expand Down Expand Up @@ -379,11 +392,12 @@ def __init__(
raise NotImplementedError(
f"Cannot set style for HTML tag <{tag}> (contributions are welcome to add support for this)"
)
default_tag_style = self.tag_styles[tag]
is_base_fontFace = isinstance(tag_style, FontFace) and not isinstance(
tag_style, TextStyle
)
if is_base_fontFace and isinstance(default_tag_style, TextStyle):
if not isinstance(tag_style, FontFace):
raise ValueError(
f"tag_styles values must be instances of FontFace or TextStyle - received: {tag_style}"
)
# We convert FontFace values provided for block tags into TextStyle values:
if tag in BLOCK_TAGS and not isinstance(tag_style, TextStyle):
# pylint: disable=redefined-loop-name
tag_style = TextStyle(
font_family=tag_style.family,
Expand All @@ -394,9 +408,9 @@ def __init__(
color=tag_style.color,
fill_color=tag_style.fill_color,
# Using default tag margins:
t_margin=default_tag_style.t_margin,
l_margin=default_tag_style.l_margin,
b_margin=default_tag_style.b_margin,
t_margin=self.tag_styles[tag].t_margin,
l_margin=self.tag_styles[tag].l_margin,
b_margin=self.tag_styles[tag].b_margin,
)
self.tag_styles[tag] = tag_style
if heading_sizes is not None:
Expand Down
Binary file added test/html/html_dd_tag_indent_deprecated.pdf
Binary file not shown.
Binary file added test/html/html_font_family.pdf
Binary file not shown.
28 changes: 28 additions & 0 deletions test/html/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -1083,3 +1083,31 @@ def test_html_heading_above_below(tmp_path):
},
)
assert_pdf_equal(pdf, HERE / "html_heading_above_below.pdf", tmp_path)


def test_html_dd_tag_indent_deprecated(tmp_path):
pdf = FPDF()
pdf.add_page()
pdf.write_html(
"<dl><dt>description title</dt><dd>description details</dd></dl>",
tag_styles={"dd": TextStyle(l_margin=5)},
)
assert_pdf_equal(pdf, HERE / "html_dd_tag_indent_deprecated.pdf", tmp_path)
pdf = FPDF()
pdf.add_page()
with pytest.warns(DeprecationWarning):
pdf.write_html(
"<dl><dt>description title</dt><dd>description details</dd></dl>",
dd_tag_indent=5,
)
assert_pdf_equal(pdf, HERE / "html_dd_tag_indent_deprecated.pdf", tmp_path)


def test_html_font_family(tmp_path):
pdf = FPDF()
pdf.add_page()
pdf.write_html(
"<p><b>hello</b> world. i am <i>sleepy</i>.</p>",
font_family="Helvetica",
)
assert_pdf_equal(pdf, HERE / "html_font_family.pdf", tmp_path)

0 comments on commit a3792ea

Please sign in to comment.