From f42602c5eb54b1831df8f88af3224e58c90247c1 Mon Sep 17 00:00:00 2001 From: Kristaps Dz Date: Thu, 18 May 2023 22:54:48 -0700 Subject: [PATCH] Don't emit superfluous newlines in -thtml. --- html.c | 18 +++++++++--------- regress/bang-before-footnote.html | 1 - regress/diff/diff.html | 20 -------------------- regress/diff/metadata-add.html | 1 - regress/diff/metadata-change.html | 1 - regress/diff/metadata-remove.html | 1 - regress/diff/table-badfree.html | 1 - regress/footnote-in-table.html | 3 --- regress/footnote-multi.html | 1 - regress/footnote-nested.html | 1 - regress/footnote.html | 1 - regress/header-attr-class-multi.html | 1 - regress/header-attr-class.html | 1 - regress/header-attr-id-empty.html | 1 - regress/header-attr-id-zerolen.html | 1 - regress/header-attr-id.html | 1 - regress/header-attr-id2.html | 1 - regress/header-ids.html | 9 +-------- regress/header-with-self-link.html | 1 - regress/list-complex.html | 3 --- regress/list-irregular-spacing.html | 1 - regress/list-switch-listtype.html | 1 - regress/list-switch-listtype2.html | 1 - regress/list-switch-listtype3.html | 1 - regress/list-switch-listtype4.html | 1 - regress/list-switch-listtype5.html | 3 --- regress/list-switch-listtype6.html | 1 - regress/list-switch-listtype7.html | 2 -- regress/list-switch-listtype8.html | 2 -- regress/list-switch-listtype9.html | 2 -- regress/list-with-spaces.html | 8 +++----- regress/list-with-sublists.html | 1 - regress/shift-heading-level-by-neg.html | 4 ---- regress/shift-heading-level-by-pos.html | 4 ---- regress/shift-heading-level-by-zero.html | 4 ---- regress/simple.html | 14 -------------- 36 files changed, 13 insertions(+), 105 deletions(-) diff --git a/html.c b/html.c index 7ddc06dc..a4bba75a 100644 --- a/html.c +++ b/html.c @@ -166,7 +166,7 @@ rndr_blockcode(struct lowdown_buf *ob, const struct rndr_blockcode *parm, const struct html *st) { - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (parm->lang.size) { @@ -220,7 +220,7 @@ rndr_definition(struct lowdown_buf *ob, const struct lowdown_buf *content) { - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (!HBUF_PUTSL(ob, "
\n")) return 0; @@ -329,7 +329,7 @@ rndr_header(struct lowdown_buf *ob, const struct lowdown_buf *content, else if (level > 6) level = 6; - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (!hbuf_printf(ob, "size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (param->flags & HLIST_FL_ORDERED) { if (param->start > 1) { @@ -586,7 +586,7 @@ rndr_raw_block(struct lowdown_buf *ob, if (org >= sz) return 1; - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (!hbuf_put(ob, param->text.data + org, sz - org)) @@ -610,7 +610,7 @@ static int rndr_hrule(struct lowdown_buf *ob) { - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; return hbuf_puts(ob, "
\n"); } @@ -707,7 +707,7 @@ rndr_table(struct lowdown_buf *ob, const struct lowdown_buf *content) { - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (!HBUF_PUTSL(ob, "\n")) return 0; @@ -721,7 +721,7 @@ rndr_table_header(struct lowdown_buf *ob, const struct lowdown_buf *content) { - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (!HBUF_PUTSL(ob, "\n")) return 0; @@ -923,7 +923,7 @@ rndr_doc_footer(struct lowdown_buf *ob, const struct html *st) */ if (st->footsz > 0) { - if (ob->size && !hbuf_putc(ob, '\n')) + if (!newline(ob)) return 0; if (!HBUF_PUTSL(ob, "
\n
\n
    \n")) diff --git a/regress/bang-before-footnote.html b/regress/bang-before-footnote.html index 89cef01f..204ad70f 100644 --- a/regress/bang-before-footnote.html +++ b/regress/bang-before-footnote.html @@ -1,5 +1,4 @@

    This is a test!1

    -

      diff --git a/regress/diff/diff.html b/regress/diff/diff.html index f52d0b9b..9e3a69b4 100644 --- a/regress/diff/diff.html +++ b/regress/diff/diff.html @@ -13,7 +13,6 @@ Lowdown Diffing Engine -

      Lowdown Diffing Engine

      In this paper, I briefly describe the “diff” engine used in lowdown-diff(1) tool @@ -37,10 +36,8 @@

      Lowdown Diffing Engine

      (or diff.diff.pdf), which shows the difference between this document and a [fabricated] earlier version..

      -

      Introduction

      Let two source files, foo.mdold.md and bar.mdnew.md, refer to the old and new versions of a file respectively.The goal is to establish the changes between these snippets in formatted output. Let’s begin with the old version, old.md.

      -
      *Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do
       eiusmod tempor incididunt ut [labore](index.html) et dolore magna
       aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco
      @@ -48,7 +45,6 @@ 

      Introduction

      in reprehenderit...

      In the new version, new.md, I add some more links and styles.

      -
      *Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do
       eiusmod tempor incididunt ut [labore](index.html) et dolore [magna
       aliqua](index.html). Ut enim ad minim veniam, quis nostrud exercitation
      @@ -58,7 +54,6 @@ 

      Introduction

      The most simple way of viewing changes is with the venerable diff(1) utility. However, this will only reflect changes in the input document—not the formatted output.

      -
      --- old.md      Tue Oct 17 11:25:01 2017
       +++ new.md      Tue Oct 17 11:25:01 2017
       @@ -1,5 +1,5 @@
      @@ -80,7 +75,6 @@ 

      Introduction

      A similar possibility is to use wdiff(1), which produces a set of word-by-word differences.

      -
      *Lorem* ipsum dolor sit amet, consectetur adipiscing elit, sed do
       eiusmod tempor incididunt ut [labore](index.html) et dolore [-magna
       aliqua.-] {+[magna aliqua](index.html).+} Ut enim ad minim veniam, quis
      @@ -120,7 +114,6 @@ 

      Introduction

      the usual linear difference, as in the case of diff(1) and friends), one can work within the language to produce differences.1

      -

      Algorithm

      The algorithm is in effect an ordered tree diff. I began with well-studied algorithms for a well-studied problem: XML tree @@ -135,7 +128,6 @@

      Algorithm

      in different ways, or augment it at a later date.

      The BULD algorithm described in this paper is straightforward. It begins with a short sanitisation pass.

      -
      1. Annotate each node in the parse tree with a hash of the subtree rooted at the node, inclusive. @@ -149,7 +141,6 @@

        Algorithm

        weight. Then, while the priority queue is non-empty: (diff.c, lowdown_diff())

        -
        1. Pop the first node of the priority queue.
        2. Look for candidates in the old document whose hash matches the @@ -177,14 +168,12 @@

          Algorithm

        My implementation changes or extends the BULD algorithm in several small ways, described in the per-step documentation below.

        -

        Sanitise

        Before the BULD algorithm is run, the input tree is sanitised. This process merges all adjacent text nodes into a single text node. By doing so, possible differences are pushed into large blocks of contiguous text—which in this case are managed by the word-difference algorithm described later in this paper.

        -

        Annotation

        Each node in the tree is annotated with a hash and a weight. The hash, MD5, is computed in all data concerning a node. For example, normal @@ -199,7 +188,6 @@

        Annotation

        Non-leaf nodes compute their hashes from the node type and the hashes of all of their children. Thus, this step is a bottom-up search.

        Node weight is computed exactly as noted in the paper.

        -

        Optimal candidacy

        A node’s candidate in the old tree is one whose hash matches. In most documents, there are many candidates for certain types of nodes. @@ -211,7 +199,6 @@

        Optimal candidacy

        In the event of similar optimality, the node “closest” to the current node is chosen. Proximity is defined by the node identifier, which is its prefix order in the parse tree.

        -

        “Propagate up”

        When propagating a match upward, the distance upward is bound depending on the matched sub-tree as defined in the paper. This makes it so that @@ -221,19 +208,16 @@

        “Propagate up”

        I did modify the algorithm to propagate upward “for free” through similar singleton nodes, even if it means going beyond the maximum number allowed by the sub-tree weight.

        -

        Optimisation

        The lowdown-diff(1) algorithm has two optimisations, both lightly derived from the paper: top-down and bottom-up propagation.

        -

        Top-down

        The top-down optimisation, which is performed first, takes matched nodes and matches un-matched, non-terminal children by label.The children examined must be siblings of adjacent matching nodes.

        This is useful when, say, a document consists of several paragraphs where the text has changed within paragraphs. It won’t be able to match the text content, but it will match the paragraphs, which will push the difference downward in the tree.

        -

        Bottom-up

        In the bottom-up propagation, the weight of any given sub-tree is used to compute how high a match will propagate. I extend the paper’s @@ -246,12 +230,10 @@

        Bottom-up

        (where the parent node is equal in label and attributes to the examined node) are computed. If any given parent of the matched children has greater than 50% of the possible weight, it is matched.

        -

        Merging

        The merging phase, which is not described in the paper, is very straightforward. It uses a recursive merge algorithm starting at the root node of the new tree and the root node of the old tree.

        -
        1. The invariant is that the current node is matched by the corresponding node in the old tree.
        2. @@ -315,7 +297,6 @@

          API

          produces the merged tree.

          A set of convenience functions, lowdown_buf_diff() and lowdown_file_diff(), also provide this functionality.

          -

          Future work

          There are many possible improvements to the algorithm.

          @@ -335,7 +316,6 @@

          Future work

          the insert/delete macros don’t disrupt the flow of text.

          Document last updated: $Date$

          -

            diff --git a/regress/diff/metadata-add.html b/regress/diff/metadata-add.html index da13e524..2fef100b 100644 --- a/regress/diff/metadata-add.html +++ b/regress/diff/metadata-add.html @@ -7,7 +7,6 @@ -

            section

            body

            diff --git a/regress/diff/metadata-change.html b/regress/diff/metadata-change.html index 085fe9aa..5d692099 100644 --- a/regress/diff/metadata-change.html +++ b/regress/diff/metadata-change.html @@ -7,7 +7,6 @@ -

            section

            body

            diff --git a/regress/diff/metadata-remove.html b/regress/diff/metadata-remove.html index 472f59f7..e6ccefde 100644 --- a/regress/diff/metadata-remove.html +++ b/regress/diff/metadata-remove.html @@ -6,7 +6,6 @@ -

            section

            body

            diff --git a/regress/diff/table-badfree.html b/regress/diff/table-badfree.html index 585f3503..575f589f 100644 --- a/regress/diff/table-badfree.html +++ b/regress/diff/table-badfree.html @@ -13,7 +13,6 @@

            section

            • what

              -
              • what what?
            • diff --git a/regress/footnote-in-table.html b/regress/footnote-in-table.html index 640081fc..df911ff0 100644 --- a/regress/footnote-in-table.html +++ b/regress/footnote-in-table.html @@ -1,5 +1,4 @@

              first1

              -
@@ -15,7 +14,6 @@

Now4

-

    @@ -31,7 +29,6 @@
  1. three 

    hello

    -
    • world
    diff --git a/regress/footnote-multi.html b/regress/footnote-multi.html index ae7ef98c..b43bc665 100644 --- a/regress/footnote-multi.html +++ b/regress/footnote-multi.html @@ -1,6 +1,5 @@

    Hi.1

    Another hi.[^pt]

    -

      diff --git a/regress/footnote-nested.html b/regress/footnote-nested.html index 0061c952..9bcac720 100644 --- a/regress/footnote-nested.html +++ b/regress/footnote-nested.html @@ -1,5 +1,4 @@

      Hi.1

      -

        diff --git a/regress/footnote.html b/regress/footnote.html index 6bc36b8d..056e4019 100644 --- a/regress/footnote.html +++ b/regress/footnote.html @@ -1,5 +1,4 @@

        Hi.1

        -

          diff --git a/regress/header-attr-class-multi.html b/regress/header-attr-class-multi.html index 7ffc83cc..0c6c3090 100644 --- a/regress/header-attr-class-multi.html +++ b/regress/header-attr-class-multi.html @@ -1,5 +1,4 @@

          a

          c

          -

          a

          c

          diff --git a/regress/header-attr-class.html b/regress/header-attr-class.html index 896cdb46..5f9bccec 100644 --- a/regress/header-attr-class.html +++ b/regress/header-attr-class.html @@ -1,5 +1,4 @@

          a

          c

          -

          a

          c

          diff --git a/regress/header-attr-id-empty.html b/regress/header-attr-id-empty.html index c6cc2790..3eac1d2f 100644 --- a/regress/header-attr-id-empty.html +++ b/regress/header-attr-id-empty.html @@ -1,5 +1,4 @@

          c

          -

          c

          diff --git a/regress/header-attr-id-zerolen.html b/regress/header-attr-id-zerolen.html index 1f37a95b..7bbc1dfc 100644 --- a/regress/header-attr-id-zerolen.html +++ b/regress/header-attr-id-zerolen.html @@ -1,5 +1,4 @@

          a

          c

          -

          b

          c

          diff --git a/regress/header-attr-id.html b/regress/header-attr-id.html index 8290f650..a76389cb 100644 --- a/regress/header-attr-id.html +++ b/regress/header-attr-id.html @@ -1,5 +1,4 @@

          a

          c

          -

          a

          c

          diff --git a/regress/header-attr-id2.html b/regress/header-attr-id2.html index 9b32429c..1b75f86a 100644 --- a/regress/header-attr-id2.html +++ b/regress/header-attr-id2.html @@ -1,5 +1,4 @@

          a

          c

          -

          a

          c

          diff --git a/regress/header-ids.html b/regress/header-ids.html index 8bdafb1f..1baf3706 100644 --- a/regress/header-ids.html +++ b/regress/header-ids.html @@ -1,15 +1,8 @@

          section

          -

          section-2

          -

          section

          -

          section

          -

          section-3

          -

          section 3

          -

          section()3

          - -

          section () 3

          +

          section () 3

          diff --git a/regress/header-with-self-link.html b/regress/header-with-self-link.html index 9348d81f..20e53a8b 100644 --- a/regress/header-with-self-link.html +++ b/regress/header-with-self-link.html @@ -1,3 +1,2 @@

          Header with link

          -

          link

          diff --git a/regress/list-complex.html b/regress/list-complex.html index 298fbf51..3edb6fa8 100644 --- a/regress/list-complex.html +++ b/regress/list-complex.html @@ -1,15 +1,12 @@

          An initial paragraph.

          -
          • An outer list.

            With a paragraph.

            -
            1. An inner ordered list.
            2. That’s inline.
            3. Followed by…
            -
            • An inner regular lits.

              With inner paragraph.

            • diff --git a/regress/list-irregular-spacing.html b/regress/list-irregular-spacing.html index 4f473e03..9903f20c 100644 --- a/regress/list-irregular-spacing.html +++ b/regress/list-irregular-spacing.html @@ -1,7 +1,6 @@
              • One space, with no indent for wrapped text. -
                1. Irregular nesting… DO NOT DO THIS.
              • diff --git a/regress/list-switch-listtype.html b/regress/list-switch-listtype.html index 2b6b9527..e2c81c18 100644 --- a/regress/list-switch-listtype.html +++ b/regress/list-switch-listtype.html @@ -1,7 +1,6 @@
                1. c
                -
                • a
                diff --git a/regress/list-switch-listtype2.html b/regress/list-switch-listtype2.html index af4ad347..e7b4cc75 100644 --- a/regress/list-switch-listtype2.html +++ b/regress/list-switch-listtype2.html @@ -1,7 +1,6 @@
                1. a
                -
                • a
                diff --git a/regress/list-switch-listtype3.html b/regress/list-switch-listtype3.html index 2ec027a7..4cf4753b 100644 --- a/regress/list-switch-listtype3.html +++ b/regress/list-switch-listtype3.html @@ -2,7 +2,6 @@
              • a Whoa a continuing line.
        -
        • a
        diff --git a/regress/list-switch-listtype4.html b/regress/list-switch-listtype4.html index b6e3331a..a8d28155 100644 --- a/regress/list-switch-listtype4.html +++ b/regress/list-switch-listtype4.html @@ -2,7 +2,6 @@
      1. a

        Whoa a new line.

      -
      • a
      diff --git a/regress/list-switch-listtype5.html b/regress/list-switch-listtype5.html index 6a6b1a1d..3da71c65 100644 --- a/regress/list-switch-listtype5.html +++ b/regress/list-switch-listtype5.html @@ -1,12 +1,9 @@
      1. a -
        • b -
          1. c -
            • d
          2. diff --git a/regress/list-switch-listtype6.html b/regress/list-switch-listtype6.html index e0b1fbf9..cffc67e9 100644 --- a/regress/list-switch-listtype6.html +++ b/regress/list-switch-listtype6.html @@ -1,6 +1,5 @@
            1. a -
              • b

                c

              • diff --git a/regress/list-switch-listtype7.html b/regress/list-switch-listtype7.html index 39128bec..5ff36b16 100644 --- a/regress/list-switch-listtype7.html +++ b/regress/list-switch-listtype7.html @@ -1,9 +1,7 @@
                1. a -
                  • b -
                    1. c

                      d

                    2. diff --git a/regress/list-switch-listtype8.html b/regress/list-switch-listtype8.html index 89f0a327..17d1b36b 100644 --- a/regress/list-switch-listtype8.html +++ b/regress/list-switch-listtype8.html @@ -1,12 +1,10 @@
                      • a -
                        1. sublist

                          with paragraph

                      -
                      1. b
                      diff --git a/regress/list-switch-listtype9.html b/regress/list-switch-listtype9.html index cc35c349..ff74bc1d 100644 --- a/regress/list-switch-listtype9.html +++ b/regress/list-switch-listtype9.html @@ -1,12 +1,10 @@
                      • a

                        -
                        1. sublist

                          with paragraph

                      -
                      1. b
                      diff --git a/regress/list-with-spaces.html b/regress/list-with-spaces.html index fade3d8b..eafe1188 100644 --- a/regress/list-with-spaces.html +++ b/regress/list-with-spaces.html @@ -1,16 +1,14 @@
                      1. 2 spaces after a numbered list. - 4 space indent for wrapped text.
                      2. +4 space indent for wrapped text.
                      3. 2 spaces again.
                      -
                      • 3 spaces after a bullet. - 4 space indent for wrapped text. - +4 space indent for wrapped text.
                        1. 2 spaces after a numbered list. - 8 space indent for the wrapped text of a nested list.
                        2. +8 space indent for the wrapped text of a nested list.
                        3. Looks nice, don’t it?
                      • 3 spaces after a bullet.
                      • diff --git a/regress/list-with-sublists.html b/regress/list-with-sublists.html index 3a4a1f05..770a220c 100644 --- a/regress/list-with-sublists.html +++ b/regress/list-with-sublists.html @@ -1,7 +1,6 @@
                        • list

                          with paragraph

                          -
                          • inner list

                            inner para

                          • diff --git a/regress/shift-heading-level-by-neg.html b/regress/shift-heading-level-by-neg.html index f7eb04a6..fb037439 100644 --- a/regress/shift-heading-level-by-neg.html +++ b/regress/shift-heading-level-by-neg.html @@ -1,14 +1,10 @@

                            header 1

                            1

                            -

                            header 2

                            2

                            -

                            header 3

                            3

                            -

                            header 4

                            4

                            -

                            header 5

                            5

                            diff --git a/regress/shift-heading-level-by-pos.html b/regress/shift-heading-level-by-pos.html index 5638f9e2..71fbce13 100644 --- a/regress/shift-heading-level-by-pos.html +++ b/regress/shift-heading-level-by-pos.html @@ -1,14 +1,10 @@

                            header 1

                            1

                            -

                            header 2

                            2

                            -
                            header 3

                            3

                            -
                            header 4

                            4

                            -
                            header 5

                            5

                            diff --git a/regress/shift-heading-level-by-zero.html b/regress/shift-heading-level-by-zero.html index 8f0de027..8b3b04fb 100644 --- a/regress/shift-heading-level-by-zero.html +++ b/regress/shift-heading-level-by-zero.html @@ -1,14 +1,10 @@

                            header 1

                            1

                            -

                            header 2

                            2

                            -

                            header 3

                            3

                            -

                            header 4

                            4

                            -
                            header 5

                            5

                            diff --git a/regress/simple.html b/regress/simple.html index 67189213..a277e756 100644 --- a/regress/simple.html +++ b/regress/simple.html @@ -2,7 +2,6 @@

                            An h1 header

                            Paragraphs are separated by a blank line.

                            2nd paragraph. Italic, bold, and monospace. Itemized lists look like:

                            -
                            • this one
                            • that one
                            • @@ -19,10 +18,8 @@

                              An h1 header

                              Use 3 dashes for an em-dash. Use 2 dashes for ranges (ex., “it’s all in chapters 12–14”). Three dots … will be converted to an ellipsis. Unicode is supported. ☺

                              -

                              An h2 header

                              Here’s a numbered list:

                              -
                              1. first item
                              2. second item
                              3. @@ -30,20 +27,17 @@

                                An h2 header

                              Note again how the actual text starts at 4 columns in (4 characters from the left side). Here’s a code sample:

                              -
                              # Let me re-iterate ...
                               for i in 1 .. 10 { do-something(i) }
                               

                              As you probably guessed, indented 4 spaces. By the way, instead of indenting the block, you can use delimited blocks, if you like:

                              -
                              define foobar() {
                                   print "Welcome to flavor country!";
                               }
                               

                              (which makes copying & pasting easier). You can optionally mark the delimited block for Pandoc to syntax highlight it:

                              -
                              import time
                               # Quick, count to ten!
                               for i in range(10):
                              @@ -51,13 +45,10 @@ 

                              An h2 header

                              time.sleep(0.5) print i
                              -

                              An h3 header

                              Now a nested list:

                              -
                              1. First, get these ingredients:

                                -
                                • carrots
                                • celery
                                • @@ -66,7 +57,6 @@

                                  An h3 header

                                • Boil some water.

                                • Dump everything in the pot and follow this algorithm:

                                  -
                                  find wooden spoon
                                   uncover pot
                                   stir
                                  @@ -83,7 +73,6 @@ 

                                  An h3 header

                                  doc, and to a section heading in the current doc. Here’s a footnote 1.

                                  Tables can look like this:

                                  - @@ -113,10 +102,8 @@

                                  An h3 header

                                  Table: Shoes, their sizes, and what they’re made of

                                  (The above is the caption for the table.)

                                  A horizontal rule follows.

                                  -

                                  Here’s a definition list:

                                  -
                                  apples
                                  @@ -137,7 +124,6 @@

                                  An h3 header

                                  example image

                                  And note that you can backslash-escape any punctuation characters which you wish to be displayed literally, ex.: `foo`, *bar*, etc.

                                  -