Skip to content

Commit

Permalink
Use overflow-wrap: anywhere; instead of overflow-wrap: break-word; (#144
Browse files Browse the repository at this point in the history
)

* Use overflow-wrap: anywhere;

* Fix python style errors
  • Loading branch information
tamanyan authored May 29, 2023
1 parent b92479a commit 296f53d
Show file tree
Hide file tree
Showing 14 changed files with 37 additions and 39 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ You can also translate an HTML string by wrapping phrases with non-breaking mark

```python
print(parser.translate_html_string('今日は<b>とても天気</b>です。'))
# <span style="word-break: keep-all; overflow-wrap: break-word;">今日は<b ><wbr>とても<wbr>天気</b>です。</span>
# <span style="word-break: keep-all; overflow-wrap: anywhere;">今日は<b ><wbr>とても<wbr>天気</b>です。</span>
```

If you have a custom model, you can use it as follows.
Expand Down Expand Up @@ -130,7 +130,7 @@ $ echo $'本日は晴天です。\n明日は曇りでしょう。' | budoux

```shellsession
$ budoux 本日は晴天です。 -H
<span style="word-break: keep-all; overflow-wrap: break-word;">本日は<wbr>晴天です。</span>
<span style="word-break: keep-all; overflow-wrap: anywhere;">本日は<wbr>晴天です。</span>
```

If you want to see help, run `budoux -h`.
Expand Down
2 changes: 1 addition & 1 deletion budoux/html_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from .utils import SEP

HTMLAttr = typing.List[typing.Tuple[str, typing.Union[str, None]]]
PARENT_CSS_STYLE = 'word-break: keep-all; overflow-wrap: break-word;'
PARENT_CSS_STYLE = 'word-break: keep-all; overflow-wrap: anywhere;'
with open(
os.path.join(os.path.dirname(__file__), 'skip_nodes.json'),
encoding='utf-8') as f:
Expand Down
2 changes: 1 addition & 1 deletion java/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ method to get an HTML string with non-breaking markup to wrap phrases.

```java
System.out.println(parser.translateHTMLString("今日は<strong>良い天気</strong>ですね。"));
//<span style="word-break: keep-all; overflow-wrap: break-word;">今日は<strong><wbr>良い<wbr>天気</strong>ですね。</span>
//<span style="word-break: keep-all; overflow-wrap: anywhere;">今日は<strong><wbr>良い<wbr>天気</strong>ですね。</span>
```

## Caveat
Expand Down
2 changes: 1 addition & 1 deletion java/src/main/java/com/google/budoux/HTMLProcessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
/** Processes phrases into an HTML string wrapping them in no-breaking markup. */
final class HTMLProcessor {
private static final Set<String> skipNodes;
private static final String STYLE = "word-break: keep-all; overflow-wrap: break-word;";
private static final String STYLE = "word-break: keep-all; overflow-wrap: anywhere;";

private HTMLProcessor() {}

Expand Down
8 changes: 4 additions & 4 deletions java/src/test/java/com/google/budoux/HTMLProcessorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public void testResolveWithSimpleTextInput() {
String html = "abcdef";
String result = HTMLProcessor.resolve(phrases, html);
assertEquals(
"<span style=\"word-break: keep-all; overflow-wrap: break-word;\">abc<wbr>def</span>",
"<span style=\"word-break: keep-all; overflow-wrap: anywhere;\">abc<wbr>def</span>",
result);
}

Expand All @@ -44,7 +44,7 @@ public void testResolveWithStandardHTMLInput() {
String html = "ab<a href=\"http://example.com\">cd</a>ef";
String result = HTMLProcessor.resolve(phrases, html);
assertEquals(
"<span style=\"word-break: keep-all; overflow-wrap: break-word;\">ab<a"
"<span style=\"word-break: keep-all; overflow-wrap: anywhere;\">ab<a"
+ " href=\"http://example.com\">c<wbr>d</a>ef</span>",
result);
}
Expand All @@ -56,7 +56,7 @@ public void testResolveWithNodesToSkip() {
String result = HTMLProcessor.resolve(phrases, html);
assertEquals(
"<span style=\"word-break: keep-all; overflow-wrap:"
+ " break-word;\">a<button>bcde</button>f</span>",
+ " anywhere;\">a<button>bcde</button>f</span>",
result);
}

Expand All @@ -66,7 +66,7 @@ public void testResolveWithNothingToSplit() {
String html = "abcdef";
String result = HTMLProcessor.resolve(phrases, html);
assertEquals(
"<span style=\"word-break: keep-all; overflow-wrap: break-word;\">abcdef</span>", result);
"<span style=\"word-break: keep-all; overflow-wrap: anywhere;\">abcdef</span>", result);
}

@Test
Expand Down
2 changes: 1 addition & 1 deletion java/src/test/java/com/google/budoux/ParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public void testTranslateHTMLString() {
String html = "<a href=\"http://example.com\">xyza</a>bc";
String result = parser.translateHTMLString(html);
assertEquals(
"<span style=\"word-break: keep-all; overflow-wrap: break-word;\"><a"
"<span style=\"word-break: keep-all; overflow-wrap: anywhere;\"><a"
+ " href=\"http://example.com\">xyz<wbr>a</a>bc</span>",
result);
}
Expand Down
6 changes: 3 additions & 3 deletions javascript/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ You can also translate an HTML string to wrap phrases with non-breaking markup.

```javascript
console.log(parser.translateHTMLString('今日は<b>とても天気</b>です。'));
// <span style="word-break: keep-all; overflow-wrap: break-word;">今日は<b><wbr>とても<wbr>天気</b>です。</span>
// <span style="word-break: keep-all; overflow-wrap: anywhere;">今日は<b><wbr>とても<wbr>天気</b>です。</span>
```

### Applying to an HTML element
Expand All @@ -69,7 +69,7 @@ console.log(ele.outerHTML);
// <p class="budou-this">今日は<b>とても天気</b>です。</p>
parser.applyElement(ele);
console.log(ele.outerHTML);
// <p class="budou-this" style="word-break: keep-all; overflow-wrap: break-word;">今日は<b><wbr>とても<wbr>天気</b>です。</p>
// <p class="budou-this" style="word-break: keep-all; overflow-wrap: anywhere;">今日は<b><wbr>とても<wbr>天気</b>です。</p>
```

Internally, the `applyElement` calls the [`HTMLProcessor`] class
Expand Down Expand Up @@ -162,7 +162,7 @@ $ echo $'本日は晴天です。\n明日は曇りでしょう。' | budoux

```shellsession
$ budoux 本日は晴天です。 -H
<span style="word-break: keep-all; overflow-wrap: break-word;">本日は<wbr>晴天です。</span>
<span style="word-break: keep-all; overflow-wrap: anywhere;">本日は<wbr>晴天です。</span>
```

If you want to see help, run `budoux -h`.
Expand Down
2 changes: 1 addition & 1 deletion javascript/src/dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,5 @@ export const setInnerHtml = (element: Element | ShadowRoot, html: string) => {
*/
export const applyWrapStyle = (element: HTMLElement) => {
element.style.wordBreak = 'keep-all';
element.style.overflowWrap = 'break-word';
element.style.overflowWrap = 'anywhere';
};
2 changes: 1 addition & 1 deletion javascript/src/html_processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ export interface HTMLProcessorOptions {
/**
* This class name is added to the containing block when the BudouX is applied.
* The containing block should have following CSS properties to make it work.
* `{ word-break: keep-all; overflow-wrap: break-word; }`
* `{ word-break: keep-all; overflow-wrap: anywhere; }`
*
* When falsy, an inline style is set instead.
*/
Expand Down
4 changes: 2 additions & 2 deletions javascript/src/tests/test_cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ describe('cli', () => {
const inputText = '今日は天気です。';
const argv = ['node', 'budoux', '--html', inputText];
const expectedStdOut =
'<span style="word-break: keep-all; overflow-wrap: break-word;">今日は<wbr>天気です。</span>';
'<span style="word-break: keep-all; overflow-wrap: anywhere;">今日は<wbr>天気です。</span>';
cli(argv);
expect(console.log).toHaveBeenCalledWith(expectedStdOut);
});
Expand All @@ -57,7 +57,7 @@ describe('cli', () => {
const inputText = '今日は天気です。';
const argv = ['node', 'budoux', '-H', inputText];
const expectedStdOut =
'<span style="word-break: keep-all; overflow-wrap: break-word;">今日は<wbr>天気です。</span>';
'<span style="word-break: keep-all; overflow-wrap: anywhere;">今日は<wbr>天気です。</span>';
cli(argv);
expect(console.log).toHaveBeenCalledWith(expectedStdOut);
});
Expand Down
18 changes: 9 additions & 9 deletions javascript/src/tests/test_parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ describe('Parser.applyElement', () => {
it('should insert WBR tags where the sentence should break.', () => {
const inputHTML = '<p>xyzabcabc</p>';
const expectedHTML = `
<p style="word-break: keep-all; overflow-wrap: break-word;"
<p style="word-break: keep-all; overflow-wrap: anywhere;"
>xyz<wbr>abc<wbr>abc</p>`;
const model = {
UW4: {a: 1001}, // means "should separate right before 'a'".
Expand All @@ -73,7 +73,7 @@ describe('Parser.applyElement', () => {

it('should insert WBR tags even it overlaps with other HTML tags.', () => {
const inputHTML = '<p>xy<a href="#">zabca</a>bc</p>';
const expectedHTML = `<p style="word-break: keep-all; overflow-wrap: break-word;"
const expectedHTML = `<p style="word-break: keep-all; overflow-wrap: anywhere;"
>xy<a href="#">z<wbr>abc<wbr>a</a>bc</p>`;
const model = {
UW4: {a: 1001}, // means "should separate right before 'a'".
Expand Down Expand Up @@ -101,15 +101,15 @@ describe('Parser.translateHTMLString', () => {
it('should output a html string with a SPAN parent with proper style attributes.', () => {
const inputHTML = 'xyzabcd';
const expectedHTML = `
<span style="word-break: keep-all; overflow-wrap: break-word;">xyz<wbr>abcd</span>`;
<span style="word-break: keep-all; overflow-wrap: anywhere;">xyz<wbr>abcd</span>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});

it('should not add a SPAN parent if the input already has one single parent.', () => {
const inputHTML = '<p class="foo" style="color: red">xyzabcd</p>';
const expectedHTML = `
<p class="foo"
style="color: red; word-break: keep-all; overflow-wrap: break-word;"
style="color: red; word-break: keep-all; overflow-wrap: anywhere;"
>xyz<wbr>abcd</p>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});
Expand All @@ -123,39 +123,39 @@ describe('Parser.translateHTMLString', () => {
it('should pass script tags as-is.', () => {
const inputHTML = 'xyz<script>alert(1);</script>xyzabc';
const expectedHTML = `<span
style="word-break: keep-all; overflow-wrap: break-word;"
style="word-break: keep-all; overflow-wrap: anywhere;"
>xyz<script>alert(1);</script>xyz<wbr>abc</span>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});

it('script tags on top should be discarded by the DOMParser.', () => {
const inputHTML = '<script>alert(1);</script>xyzabc';
const expectedHTML = `<span
style="word-break: keep-all; overflow-wrap: break-word;"
style="word-break: keep-all; overflow-wrap: anywhere;"
>xyz<wbr>abc</span>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});

it('should skip some specific tags.', () => {
const inputHTML = 'xyz<code>abc</code>abc';
const expectedHTML = `<span
style="word-break: keep-all; overflow-wrap: break-word;"
style="word-break: keep-all; overflow-wrap: anywhere;"
>xyz<code>abc</code><wbr>abc</span>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});

it('should not ruin attributes of child elements.', () => {
const inputHTML = 'xyza<a href="#" hidden>bc</a>abc';
const expectedHTML = `<span
style="word-break: keep-all; overflow-wrap: break-word;"
style="word-break: keep-all; overflow-wrap: anywhere;"
>xyz<wbr>a<a href="#" hidden>bc</a><wbr>abc</span>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});

it('should work with emojis.', () => {
const inputHTML = 'xyza🇯🇵🇵🇹abc';
const expectedHTML = `<span
style="word-break: keep-all; overflow-wrap: break-word;"
style="word-break: keep-all; overflow-wrap: anywhere;"
>xyz<wbr>a🇯🇵🇵🇹<wbr>abc</span>`;
checkEqual(defaultModel, inputHTML, expectedHTML);
});
Expand Down
8 changes: 4 additions & 4 deletions tests/test_html_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,26 @@ def test_with_simple_text_input(self) -> None:
chunks = ['abc', 'def']
html = 'abcdef'
result = html_processor.resolve(chunks, html)
expected = '<span style="word-break: keep-all; overflow-wrap: break-word;">abc<wbr>def</span>'
expected = '<span style="word-break: keep-all; overflow-wrap: anywhere;">abc<wbr>def</span>'
self.assertEqual(result, expected)

def test_with_standard_html_input(self) -> None:
chunks = ['abc', 'def']
html = 'ab<a href="http://example.com">cd</a>ef'
result = html_processor.resolve(chunks, html)
expected = '<span style="word-break: keep-all; overflow-wrap: break-word;">ab<a href="http://example.com">c<wbr>d</a>ef</span>'
expected = '<span style="word-break: keep-all; overflow-wrap: anywhere;">ab<a href="http://example.com">c<wbr>d</a>ef</span>'
self.assertEqual(result, expected)

def test_with_nodes_to_skip(self) -> None:
chunks = ['abc', 'def']
html = "a<button>bcde</button>f"
result = html_processor.resolve(chunks, html)
expected = '<span style="word-break: keep-all; overflow-wrap: break-word;">a<button>bcde</button>f</span>'
expected = '<span style="word-break: keep-all; overflow-wrap: anywhere;">a<button>bcde</button>f</span>'
self.assertEqual(result, expected)

def test_with_nothing_to_split(self) -> None:
chunks = ['abcdef']
html = 'abcdef'
result = html_processor.resolve(chunks, html)
expected = '<span style="word-break: keep-all; overflow-wrap: break-word;">abcdef</span>'
expected = '<span style="word-break: keep-all; overflow-wrap: anywhere;">abcdef</span>'
self.assertEqual(result, expected)
6 changes: 2 additions & 4 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ def test_cmdargs_single_html(self) -> None:
output = main._main(cmdargs)

self.assertEqual(
output,
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
output, '<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'今日は<b><wbr>とても<wbr>天気</b>です。</span>')

def test_cmdargs_multi_html(self) -> None:
Expand Down Expand Up @@ -172,8 +171,7 @@ def test_cmdargs_html_stdin(self) -> None:
output = main._main(["-H"])

self.assertEqual(
output,
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
output, '<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'これは<b><wbr>テスト</b>です。<wbr>\n'
'</span>')

Expand Down
10 changes: 5 additions & 5 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_translate_html_string(self) -> None:

input_html = 'xyzabcd'
expected_html = (
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
'<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'xyz<wbr>abcd</span>')
output_html = p.translate_html_string(input_html)
self.assertEqual(
Expand All @@ -71,31 +71,31 @@ def test_translate_html_string(self) -> None:

input_html = 'xyz<script>alert(1);</script>xyzabc'
expected_html = (
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
'<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'xyz<script>alert(1);</script>xyz<wbr>abc</span>')
output_html = p.translate_html_string(input_html)
self.assertEqual(output_html, expected_html,
'Should pass script tags as is.')

input_html = 'xyz<code>abc</code>abc'
expected_html = (
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
'<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'xyz<code>abc</code><wbr>abc</span>')
output_html = p.translate_html_string(input_html)
self.assertEqual(output_html, expected_html,
'Should skip some specific tags.')

input_html = 'xyza<a href="#" hidden>bc</a>abc'
expected_html = (
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
'<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'xyz<wbr>a<a href="#" hidden>bc</a><wbr>abc</span>')
output_html = p.translate_html_string(input_html)
self.assertEqual(output_html, expected_html,
'Should not ruin attributes of child elements.')

input_html = 'xyza🇯🇵🇵🇹abc'
expected_html = (
'<span style="word-break: keep-all; overflow-wrap: break-word;">'
'<span style="word-break: keep-all; overflow-wrap: anywhere;">'
'xyz<wbr>a🇯🇵🇵🇹<wbr>abc</span>')
output_html = p.translate_html_string(input_html)
self.assertEqual(output_html, expected_html, 'Should work with emojis.')
Expand Down

0 comments on commit 296f53d

Please sign in to comment.