Skip to content

Commit

Permalink
Return line breaks where available
Browse files Browse the repository at this point in the history
  • Loading branch information
RogerSelwyn committed Jun 9, 2022
1 parent 27c122c commit 8f445ed
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion custom_components/o365/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,17 @@ def clean_html(html):
"""Clean the HTML."""
soup = BeautifulSoup(html, features="html.parser")
if body := soup.find("body"):
return body.get_text(" ", strip=True)
# get text
text = body.get_text()

# break into lines and remove leading and trailing space on each
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = "\n".join(chunk for chunk in chunks if chunk)
text = text.replace("\xa0", " ")
return text

return html

Expand Down

0 comments on commit 8f445ed

Please sign in to comment.