diff --git a/Cargo.lock b/Cargo.lock index 4bb1d3e..2eccc0a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "1.1.1" @@ -86,6 +92,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -94,9 +115,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "block-buffer" @@ -209,6 +230,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -219,6 +249,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "deranged" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", +] + [[package]] name = "diff" version = "0.1.13" @@ -275,6 +314,22 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "generic-array" version = "0.14.7" @@ -299,6 +354,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hermit-abi" version = "0.3.3" @@ -334,6 +395,16 @@ dependencies = [ "cc", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -366,6 +437,15 @@ version = "0.2.148" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +[[package]] +name = "line-wrap" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" +dependencies = [ + "safemem", +] + [[package]] name = "linereader" version = "0.4.0" @@ -375,6 +455,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.10" @@ -427,6 +513,7 @@ dependencies = [ "regex", "semver", "serde_json", + "syntect", "tempfile", "textwrap", ] @@ -437,6 +524,15 @@ version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + [[package]] name = "normpath" version = "1.1.1" @@ -461,6 +557,28 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "onig" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +dependencies = [ + "bitflags 1.3.2", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "opener" version = "0.6.1" @@ -517,6 +635,26 @@ dependencies = [ "sha2", ] +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "plist" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a4a0cfc5fb21a09dc6af4bf834cf10d4a32fccd9e2ea468c4b1751a097487aa" +dependencies = [ + "base64", + "indexmap", + "line-wrap", + "quick-xml", + "serde", + "time", +] + [[package]] name = "polib" version = "0.2.0" @@ -526,6 +664,12 @@ dependencies = [ "linereader", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pretty_assertions" version = "1.4.0" @@ -565,6 +709,15 @@ dependencies = [ "pulldown-cmark", ] +[[package]] +name = "quick-xml" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.33" @@ -618,7 +771,7 @@ version = "0.38.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", @@ -631,6 +784,21 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "1.0.19" @@ -702,6 +870,27 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syntect" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02b4b303bf8d08bfeb0445cba5068a3d306b6baece1d5582171a9bf49188f91" +dependencies = [ + "bincode", + "bitflags 1.3.2", + "flate2", + "fnv", + "once_cell", + "onig", + "plist", + "regex-syntax", + "serde", + "serde_json", + "thiserror", + "walkdir", + "yaml-rust", +] + [[package]] name = "tempfile" version = "3.8.0" @@ -760,6 +949,35 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +dependencies = [ + "deranged", + "itoa", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +dependencies = [ + "time-core", +] + [[package]] name = "toml" version = "0.5.11" @@ -814,6 +1032,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasm-bindgen" version = "0.2.87" @@ -974,6 +1202,15 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yansi" version = "0.5.1" diff --git a/i18n-helpers/Cargo.toml b/i18n-helpers/Cargo.toml index 610e6ac..2226b38 100644 --- a/i18n-helpers/Cargo.toml +++ b/i18n-helpers/Cargo.toml @@ -19,6 +19,7 @@ pulldown-cmark-to-cmark = "11.0.0" regex = "1.9.4" semver = "1.0.16" serde_json = "1.0.91" +syntect = "5.1.0" textwrap = { version = "0.16.0", default-features = false } [dev-dependencies] diff --git a/i18n-helpers/USAGE.md b/i18n-helpers/USAGE.md index a7b3599..30ee945 100644 --- a/i18n-helpers/USAGE.md +++ b/i18n-helpers/USAGE.md @@ -210,6 +210,9 @@ Itemized list: - C should be translated. ```` +Note that we don't extract the full text of code blocks. Only text that is +recognized as comments and literal strings is extracted. + ## Normalizing Existing PO Files When mdbook-i18n-helpers change, the generated PO files change as well. This can diff --git a/i18n-helpers/fuzz/Cargo.lock b/i18n-helpers/fuzz/Cargo.lock index e190c31..dc4b55a 100644 --- a/i18n-helpers/fuzz/Cargo.lock +++ b/i18n-helpers/fuzz/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + [[package]] name = "aho-corasick" version = "1.1.1" @@ -95,6 +101,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -103,9 +124,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" [[package]] name = "block-buffer" @@ -219,6 +240,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -229,6 +259,15 @@ dependencies = [ "typenum", ] +[[package]] +name = "deranged" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f32d04922c60427da6f9fef14d042d9edddef64cb9d4ce0d64d0685fbeb1fd3" +dependencies = [ + "powerfmt", +] + [[package]] name = "derive_arbitrary" version = "1.3.1" @@ -296,6 +335,22 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "generic-array" version = "0.14.7" @@ -320,6 +375,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + [[package]] name = "hermit-abi" version = "0.3.3" @@ -355,6 +416,16 @@ dependencies = [ "cc", ] +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -407,6 +478,15 @@ dependencies = [ "once_cell", ] +[[package]] +name = "line-wrap" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" +dependencies = [ + "safemem", +] + [[package]] name = "linereader" version = "0.4.0" @@ -416,6 +496,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.10" @@ -467,6 +553,7 @@ dependencies = [ "regex", "semver", "serde_json", + "syntect", "textwrap", ] @@ -488,6 +575,15 @@ version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + [[package]] name = "normpath" version = "1.1.1" @@ -512,6 +608,28 @@ version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" +[[package]] +name = "onig" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +dependencies = [ + "bitflags 1.3.2", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "opener" version = "0.6.1" @@ -568,6 +686,26 @@ dependencies = [ "sha2", ] +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + +[[package]] +name = "plist" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a4a0cfc5fb21a09dc6af4bf834cf10d4a32fccd9e2ea468c4b1751a097487aa" +dependencies = [ + "base64", + "indexmap", + "line-wrap", + "quick-xml", + "serde", + "time", +] + [[package]] name = "polib" version = "0.2.0" @@ -577,6 +715,12 @@ dependencies = [ "linereader", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "pretty_assertions" version = "1.4.0" @@ -616,6 +760,15 @@ dependencies = [ "pulldown-cmark", ] +[[package]] +name = "quick-xml" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.33" @@ -669,7 +822,7 @@ version = "0.38.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "745ecfa778e66b2b63c88a61cb36e0eea109e803b0b86bf9879fbc77c70e86ed" dependencies = [ - "bitflags 2.4.0", + "bitflags 2.4.1", "errno", "libc", "linux-raw-sys", @@ -682,6 +835,21 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "semver" version = "1.0.19" @@ -753,6 +921,27 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syntect" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e02b4b303bf8d08bfeb0445cba5068a3d306b6baece1d5582171a9bf49188f91" +dependencies = [ + "bincode", + "bitflags 1.3.2", + "flate2", + "fnv", + "once_cell", + "onig", + "plist", + "regex-syntax", + "serde", + "serde_json", + "thiserror", + "walkdir", + "yaml-rust", +] + [[package]] name = "tempfile" version = "3.8.0" @@ -811,6 +1000,35 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a34ab300f2dee6e562c10a046fc05e358b29f9bf92277f30c3c8d82275f6f5" +dependencies = [ + "deranged", + "itoa", + "powerfmt", + "serde", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" + +[[package]] +name = "time-macros" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ad70d68dba9e1f8aceda7aa6711965dfec1cac869f311a51bd08b3a2ccbce20" +dependencies = [ + "time-core", +] + [[package]] name = "toml" version = "0.5.11" @@ -865,6 +1083,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasm-bindgen" version = "0.2.87" @@ -1025,6 +1253,15 @@ version = "0.48.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yansi" version = "0.5.1" diff --git a/i18n-helpers/fuzz/fuzz_targets/group_events.rs b/i18n-helpers/fuzz/fuzz_targets/group_events.rs index af4faef..bcb6499 100644 --- a/i18n-helpers/fuzz/fuzz_targets/group_events.rs +++ b/i18n-helpers/fuzz/fuzz_targets/group_events.rs @@ -1,7 +1,7 @@ #![no_main] use libfuzzer_sys::fuzz_target; -use mdbook_i18n_helpers::{extract_events, group_events, Group}; +use mdbook_i18n_helpers::{extract_events, group_events, reconstruct_markdown, Group}; use pretty_assertions::assert_eq; fuzz_target!(|text: String| { @@ -11,8 +11,13 @@ fuzz_target!(|text: String| { .flat_map(|group| match group { Group::Translate(events) | Group::Skip(events) => events, }) - .cloned() .collect::>(); - assert_eq!(events, flattened_groups); + // Comparison through markdown text to detect missing text. + // Events can't be compared directly because `group_events` + // may split a event into some events. + let text_from_events = reconstruct_markdown(&events, None); + let text_from_groups = reconstruct_markdown(&flattened_groups, None); + + assert_eq!(text_from_events, text_from_groups); }); diff --git a/i18n-helpers/src/gettext.rs b/i18n-helpers/src/gettext.rs index 168c32d..346f079 100644 --- a/i18n-helpers/src/gettext.rs +++ b/i18n-helpers/src/gettext.rs @@ -212,21 +212,21 @@ mod tests { #[test] fn test_translate_code_block() { - let catalog = create_catalog(&[( - "```rust,editable\n\ - fn foo() {\n\n let x = \"hello\";\n\n}\n\ - ```", - "```rust,editable\n\ - fn FOO() {\n\n let X = \"guten tag\";\n\n}\n\ - ```", - )]); + let catalog = create_catalog(&[ + ("\"hello\"", "\"guten tag\""), + ("// line comment\n", "// linie kommentar\n"), + ("/* block\ncomment */", "/* block\nkommentar */"), + ("/* inline comment */", "/* inline kommentar */"), + ]); assert_eq!( translate( "Text before.\n\ \n\ \n\ ```rust,editable\n\ - fn foo() {\n\n let x = \"hello\";\n\n}\n\ + // line comment\n\ + fn foo() {\n\n let x /* inline comment */ = \"hello\"; // line comment\n\n}\n\ + /* block\ncomment */\n\ ```\n\ \n\ Text after.\n", @@ -235,7 +235,9 @@ mod tests { "Text before.\n\ \n\ ```rust,editable\n\ - fn FOO() {\n\n let X = \"guten tag\";\n\n}\n\ + // linie kommentar\n\ + fn foo() {\n\n let x /* inline kommentar */ = \"guten tag\"; // linie kommentar\n\n}\n\ + /* block\nkommentar */\n\ ```\n\ \n\ Text after.", diff --git a/i18n-helpers/src/lib.rs b/i18n-helpers/src/lib.rs index 668f019..b8797d9 100644 --- a/i18n-helpers/src/lib.rs +++ b/i18n-helpers/src/lib.rs @@ -24,10 +24,12 @@ //! how to use the supplied `mdbook` plugins. use polib::catalog::Catalog; -use pulldown_cmark::{Event, LinkType, Tag}; +use pulldown_cmark::{CodeBlockKind, Event, LinkType, Tag}; use pulldown_cmark_to_cmark::{cmark_resume_with_options, Options, State}; use regex::Regex; use std::sync::OnceLock; +use syntect::easy::ScopeRangeIterator; +use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxSet}; pub mod gettext; pub mod normalize; @@ -128,19 +130,19 @@ pub fn extract_events<'a>(text: &'a str, state: Option>) -> Vec<( } /// Markdown events grouped by type. -#[derive(Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum Group<'a> { /// Markdown events which should be translated. /// /// This includes `[Text("foo")]` as well as sequences with text /// such as `[Start(Emphasis), Text("foo") End(Emphasis)]`. - Translate(&'a [(usize, Event<'a>)]), + Translate(Vec<(usize, Event<'a>)>), /// Markdown events which should be skipped when translating. /// /// This includes structural events such as `Start(Heading(H1, /// None, vec![]))`. - Skip(&'a [(usize, Event<'a>)]), + Skip(Vec<(usize, Event<'a>)>), } /// Group Markdown events into translatable and skipped events. @@ -171,14 +173,14 @@ pub enum Group<'a> { /// assert_eq!( /// groups, /// vec![ -/// Group::Skip(&[ +/// Group::Skip(vec![ /// (1, Event::Start(Tag::List(None))), /// (1, Event::Start(Tag::Item)), /// ]), -/// Group::Translate(&[ +/// Group::Translate(vec![ /// (1, Event::Text("A list item.".into())), /// ]), -/// Group::Skip(&[ +/// Group::Skip(vec![ /// (1, Event::End(Tag::Item)), /// (1, Event::End(Tag::List(None))), /// ]), @@ -209,27 +211,27 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { } impl State { - /// Creates a group based on the capturing state and context. - fn into_group<'a>( + /// Creates groups based on the capturing state and context. + fn into_groups<'a>( self, idx: usize, events: &'a [(usize, Event<'a>)], ctx: GroupingContext, - ) -> (Group<'a>, GroupingContext) { + ) -> (Vec>, GroupingContext) { match self { State::Translate(start) => { if ctx.skip_next_group { ( - Group::Skip(&events[start..idx]), + vec![Group::Skip(events[start..idx].into())], ctx.clear_skip_next_group(), ) - } else if is_nontranslatable_codeblock_group(&events[start..idx]) { - (Group::Skip(&events[start..idx]), ctx) + } else if is_codeblock_group(&events[start..idx]) { + (parse_codeblock(&events[start..idx]), ctx) } else { - (Group::Translate(&events[start..idx]), ctx) + (vec![Group::Translate(events[start..idx].into())], ctx) } } - State::Skip(start) => (Group::Skip(&events[start..idx]), ctx), + State::Skip(start) => (vec![Group::Skip(events[start..idx].into())], ctx), } } } @@ -246,18 +248,18 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { // make the group self-contained. Event::Start(Tag::Paragraph | Tag::CodeBlock(..)) => { // A translatable group starts here. - let next_group; - (next_group, ctx) = state.into_group(idx, events, ctx); - groups.push(next_group); + let mut next_groups; + (next_groups, ctx) = state.into_groups(idx, events, ctx); + groups.append(&mut next_groups); state = State::Translate(idx); } Event::End(Tag::Paragraph | Tag::CodeBlock(..)) => { // A translatable group ends after `idx`. let idx = idx + 1; - let next_group; - (next_group, ctx) = state.into_group(idx, events, ctx); - groups.push(next_group); + let mut next_groups; + (next_groups, ctx) = state.into_groups(idx, events, ctx); + groups.append(&mut next_groups); state = State::Skip(idx); } @@ -277,9 +279,9 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { // If we're currently skipping, then a new // translatable group starts here. if let State::Skip(_) = state { - let next_group; - (next_group, ctx) = state.into_group(idx, events, ctx); - groups.push(next_group); + let mut next_groups; + (next_groups, ctx) = state.into_groups(idx, events, ctx); + groups.append(&mut next_groups); state = State::Translate(idx); } @@ -290,9 +292,9 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { Event::Html(s) if is_comment_skip_directive(s) => { // If in the middle of translation, finish it. if let State::Translate(_) = state { - let next_group; - (next_group, ctx) = state.into_group(idx, events, ctx); - groups.push(next_group); + let mut next_groups; + (next_groups, ctx) = state.into_groups(idx, events, ctx); + groups.append(&mut next_groups); // Restart translation: subtle but should be // needed to handle the skipping of the rest of @@ -307,9 +309,9 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { // skipping group. _ => { if let State::Translate(_) = state { - let next_group; - (next_group, ctx) = state.into_group(idx, events, ctx); - groups.push(next_group); + let mut next_groups; + (next_groups, ctx) = state.into_groups(idx, events, ctx); + groups.append(&mut next_groups); state = State::Skip(idx); } @@ -318,8 +320,8 @@ pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec> { } match state { - State::Translate(start) => groups.push(Group::Translate(&events[start..])), - State::Skip(start) => groups.push(Group::Skip(&events[start..])), + State::Translate(start) => groups.push(Group::Translate(events[start..].into())), + State::Skip(start) => groups.push(Group::Skip(events[start..].into())), } groups @@ -334,20 +336,154 @@ fn is_comment_skip_directive(html: &str) -> bool { re.is_match(html.trim()) } -/// Returns true if the events appear to be a codeblock without translatable text. -fn is_nontranslatable_codeblock_group(events: &[(usize, Event)]) -> bool { - match events { +/// Returns true if the events appear to be a codeblock. +fn is_codeblock_group(events: &[(usize, Event)]) -> bool { + matches!( + events, + [ + (_, Event::Start(Tag::CodeBlock(_))), + .., + (_, Event::End(Tag::CodeBlock(_))) + ] + ) +} + +/// Returns true if the scope should be translated. +fn is_translate_scope(x: Scope) -> bool { + static SCOPE_STRING: OnceLock = OnceLock::new(); + static SCOPE_COMMENT: OnceLock = OnceLock::new(); + + let scope_string = SCOPE_STRING.get_or_init(|| Scope::new("string").unwrap()); + let scope_comment = SCOPE_COMMENT.get_or_init(|| Scope::new("comment").unwrap()); + scope_string.is_prefix_of(x) || scope_comment.is_prefix_of(x) +} + +/// Creates groups by checking codeblock with heuristic way. +fn heuristic_codeblock<'a>(events: &'a [(usize, Event)]) -> Vec> { + let is_translate = match events { [(_, Event::Start(Tag::CodeBlock(_))), .., (_, Event::End(Tag::CodeBlock(_)))] => { let (codeblock_text, _) = reconstruct_markdown(events, None); // Heuristic to check whether the codeblock nether has a // literal string nor a line comment. We may actually // want to use a lexer here to make this more robust. - !codeblock_text.contains('"') && !codeblock_text.contains("//") + codeblock_text.contains('"') || codeblock_text.contains("//") } - _ => false, + _ => true, + }; + + if is_translate { + vec![Group::Translate(events.into())] + } else { + vec![Group::Skip(events.into())] } } +/// Creates groups by parsing codeblock. +fn parse_codeblock<'a>(events: &'a [(usize, Event)]) -> Vec> { + // Language detection from language identifier of codeblock. + let ss = SyntaxSet::load_defaults_newlines(); + let syntax = if let (_, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(x)))) = &events[0] { + ss.find_syntax_by_token(x.split(',').next().unwrap()) + } else { + None + }; + + let Some(syntax) = syntax else { + // If there is no language specifier, falling back to heuristic way. + return heuristic_codeblock(events); + }; + + let mut ps = ParseState::new(syntax); + let mut ret = vec![]; + + for (idx, event) in events.iter().enumerate() { + match event { + (text_line, Event::Text(text)) => { + let mut stack = ScopeStack::new(); + let mut stack_failure = false; + + let Ok(ops) = ps.parse_line(text, &ss) else { + // If parse is failed, the text event should be translated. + ret.push(Group::Translate(events[idx..idx + 1].into())); + continue; + }; + + let mut translate_events = vec![]; + let mut groups = vec![]; + + for (range, op) in ScopeRangeIterator::new(&ops, text) { + if stack.apply(op).is_err() { + stack_failure = true; + break; + } + + if range.is_empty() { + continue; + } + + // Calculate line number of the range + let range_line = if range.start == 0 { + *text_line + } else { + text_line + text[0..range.start].lines().count() - 1 + }; + + let text = &text[range]; + + // Whitespaces between translate texts should be added to translate + // group. + // So all whitespaces are added to the translate events buffer temporary, + // and the trailing whitespaces will be remvoed finally. + let is_whitespace = text.trim_matches(&[' ', '\t'] as &[_]).is_empty(); + + let is_translate = stack.scopes.iter().any(|x| is_translate_scope(*x)); + + if is_translate || (is_whitespace && !translate_events.is_empty()) { + translate_events.push((range_line, Event::Text(text.into()))); + } else { + let whitespace_events = extract_trailing_whitespaces(&mut translate_events); + groups.push(Group::Translate(std::mem::take(&mut translate_events))); + groups.push(Group::Skip(whitespace_events)); + groups.push(Group::Skip(vec![(range_line, Event::Text(text.into()))])); + } + } + + let whitespace_events = extract_trailing_whitespaces(&mut translate_events); + groups.push(Group::Translate(std::mem::take(&mut translate_events))); + groups.push(Group::Skip(whitespace_events)); + + if stack_failure { + // If stack operation is failed, the text event should be translated. + ret.push(Group::Translate(events[idx..idx + 1].into())); + } else { + ret.append(&mut groups); + } + } + _ => { + ret.push(Group::Skip(events[idx..idx + 1].into())); + } + } + } + ret +} + +/// Extract trailing events which have whitespace only. +fn extract_trailing_whitespaces<'a>(buf: &mut Vec<(usize, Event<'a>)>) -> Vec<(usize, Event<'a>)> { + let mut ret = vec![]; + + while let Some(last) = buf.last() { + match &last.1 { + Event::Text(text) if text.as_ref().trim_matches(&[' ', '\t'] as &[_]).is_empty() => { + let last = buf.pop().unwrap(); + ret.push(last); + } + _ => break, + } + } + ret.reverse(); + ret +} + /// Render a slice of Markdown events back to Markdown. /// /// # Examples @@ -403,7 +539,7 @@ pub fn reconstruct_markdown( // `\n` for code blocks (since they must start on a new line). We // can safely trim this here since we know that we always // reconstruct Markdown for a self-contained group of events. - (String::from(markdown.trim_matches('\n')), new_state) + (String::from(markdown.trim_start_matches('\n')), new_state) } /// Extract translatable strings from `document`. @@ -462,7 +598,7 @@ pub fn extract_messages(document: &str) -> Vec<(usize, String)> { match group { Group::Translate(events) => { if let Some((lineno, _)) = events.first() { - let (text, new_state) = reconstruct_markdown(events, state); + let (text, new_state) = reconstruct_markdown(&events, state); // Skip empty messages since they are special: // they contains the PO file metadata. if !text.trim().is_empty() { @@ -472,7 +608,7 @@ pub fn extract_messages(document: &str) -> Vec<(usize, String)> { } } Group::Skip(events) => { - let (_, new_state) = reconstruct_markdown(events, state); + let (_, new_state) = reconstruct_markdown(&events, state); state = Some(new_state); } } @@ -536,7 +672,7 @@ pub fn translate_events<'a>( match group { Group::Translate(events) => { // Reconstruct the message. - let (msgid, new_state) = reconstruct_markdown(events, state.clone()); + let (msgid, new_state) = reconstruct_markdown(&events, state.clone()); let translated = catalog .find_message(None, &msgid, None) .filter(|msg| !msg.flags().is_fuzzy() && msg.is_translated()) @@ -547,19 +683,19 @@ pub fn translate_events<'a>( // care to trim away unwanted paragraphs. translated_events.extend_from_slice(trim_paragraph( &extract_events(msgstr, state), - events, + &events, )); } - None => translated_events.extend_from_slice(events), + None => translated_events.extend_from_slice(&events), } // Advance the state. state = Some(new_state); } Group::Skip(events) => { // Copy the events unchanged to the output. - translated_events.extend_from_slice(events); + translated_events.extend_from_slice(&events); // Advance the state. - let (_, new_state) = reconstruct_markdown(events, state); + let (_, new_state) = reconstruct_markdown(&events, state); state = Some(new_state); } } @@ -896,8 +1032,8 @@ The document[^1] text. vec![ (1, "Preamble"), ( - 2, - "```rust\n// Example:\nfn hello() {\n some_code()\n\n todo!()\n}\n```", + 3, + "// Example:\n", ), (10, "Postamble"), ], @@ -937,15 +1073,82 @@ The document[^1] text. > Postamble", vec![ (1, "Preamble"), - ( - 2, - "```rust\nfn hello() {\n some_code()\n\n // FIXME: do something here!\n todo!()\n}\n```", - ), + (6, "// FIXME: do something here!\n"), (10, "Postamble"), ], ); } + #[test] + fn extract_messages_code_block_with_block_comment() { + assert_extract_messages( + "```rust\n\ + /* block comment\n\ + * /* nested block comment\n\ + * */\n\ + * \n\ + * \n\ + * \n\ + * */\n\ + ```\n", + vec![( + 2, + "/* block comment\n* /* nested block comment\n* */\n* \n* \n* \n* */", + )], + ); + } + + #[test] + fn extract_messages_code_block_with_continuous_line_comments() { + assert_extract_messages( + r"```rust +// continuous +// line +// comments +{ + // continuous + // line + // comments + let a = 1; // single line comment + let b = 1; // single line comment +} +```", + vec![ + (2, "// continuous\n// line\n// comments\n"), + (6, "// continuous\n // line\n // comments\n"), + (9, "// single line comment\n"), + (10, "// single line comment\n"), + ], + ); + } + + #[test] + fn extract_messages_multi_language_code_blocks() { + assert_extract_messages( + r##"```c +// C +'C'; "C"; +``` +```html + +``` +```javascript +`JavaScript` +``` +```ruby +# Ruby +```"##, + vec![ + (2, "// C\n'C'"), + (3, "\"C\""), + (6, ""), + (10, "`JavaScript`"), + (13, "# Ruby\n"), + ], + ); + } + #[test] fn extract_messages_details() { // This isn't great: we lose text following a HTML tag: @@ -1255,7 +1458,7 @@ not-skipped", fn extract_messages_automatic_skipping_nontranslatable_codeblocks_simple() { assert_extract_messages( r#" -``` +```python def g(x): this_should_be_skipped_no_strings_or_comments() ``` @@ -1268,6 +1471,25 @@ def g(x): fn extract_messages_automatic_skipping_nontranslatable_codeblocks() { assert_extract_messages( r#" +```python +def f(x): + print("this should be translated") +``` + + +```python +def g(x): + but_this_should_not() +``` +"#, + vec![(4, "\"this should be translated\"")], + ); + } + + #[test] + fn extract_messages_without_language_specifier() { + assert_extract_messages( + r#" ``` def f(x): print("this should be translated") @@ -1285,26 +1507,4 @@ def g(x): )], ); } - - #[test] - fn is_nontranslatable_codeblock_group_true() { - let events = extract_events( - r#"``` -f(x) -```"#, - None, - ); - assert!(is_nontranslatable_codeblock_group(&events)); - } - - #[test] - fn is_nontranslatable_codeblock_group_false() { - let events = extract_events( - r#"``` -f("hello world") -```"#, - None, - ); - assert!(!is_nontranslatable_codeblock_group(&events)); - } } diff --git a/i18n-helpers/src/normalize.rs b/i18n-helpers/src/normalize.rs index 009a69a..79b6103 100644 --- a/i18n-helpers/src/normalize.rs +++ b/i18n-helpers/src/normalize.rs @@ -474,23 +474,7 @@ mod tests { * BAR\n\ ```", )]); - assert_normalized_messages_eq( - catalog, - &[exact( - "```rust,editable\n\ - // Example\n\ - foo\n\ - \n\ - * bar\n\ - ```", - "```rust,editable\n\ - // Beispiel\n\ - FOO\n\ - \n\ - * BAR\n\ - ```", - )], - ); + assert_normalized_messages_eq(catalog, &[exact("// Example\n", "// Beispiel\n")]); } #[test]