From 7ba8431c86c77bc03bbb5f3b5ffbc581e1e9e46e Mon Sep 17 00:00:00 2001 From: Gregory Anders <8965202+gpanders@users.noreply.github.com> Date: Tue, 16 Jan 2024 11:13:43 -0600 Subject: [PATCH] Ignore
tags in table rows (#354) Closes #198 Co-authored-by: Alireza Savand <591113+Alir3z4@users.noreply.github.com> --- ChangeLog.rst | 2 ++ html2text/__init__.py | 2 ++ test/no_p_in_table.html | 12 ++++++++++++ test/no_p_in_table.md | 15 +++++++++++++++ 4 files changed, 31 insertions(+) create mode 100644 test/no_p_in_table.html create mode 100644 test/no_p_in_table.md diff --git a/ChangeLog.rst b/ChangeLog.rst index caf2f78..b648fe4 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -8,6 +8,8 @@ UNRELEASED * Fix extra line breaks inside html link text (between '[' and ']') * Fix #344: indent ``
`` tags inside table rows. * Don't wrap tables by default and add a ``--wrap-tables`` config option * Remove support for Python ≤ 3.5. Now requires Python 3.6+. * Support for Python 3.10. diff --git a/html2text/__init__.py b/html2text/__init__.py index 89b98a1..ae4e154 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -367,6 +367,8 @@ def handle_tag( self.soft_br() elif self.astack: pass + elif self.split_next_td: + pass else: self.p() diff --git a/test/no_p_in_table.html b/test/no_p_in_table.html new file mode 100644 index 0000000..73c1bb8 --- /dev/null +++ b/test/no_p_in_table.html @@ -0,0 +1,12 @@ + +
code
, bolds and italics. Header 1 | Header 2 | Header 3 |
---|---|---|
Content 1 | 2 | Image! |
Content 1 longer | Content 2 | blah |
Content | Content 2 | blah |
t | Content 2 | blah blah blah |