Skip to content

Commit

Permalink
Merge pull request #5 from warpfork/normalizing-linebreaks-pt3
Browse files Browse the repository at this point in the history
fix parsing CRLF files, part 3
  • Loading branch information
warpfork authored Sep 26, 2021
2 parents 9dc11b3 + 83e3eb5 commit 91a00c1
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
18 changes: 17 additions & 1 deletion read.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func ReadFile(name string) (*Document, error) {
var (
sigilLineBreak = []byte{'\n'}
sigilCarriageReturn = []byte{'\r'}
sigilCrLf = []byte{'\r', '\n'}
sigilCodeBlock = []byte("```")
sigilTestmark = []byte("[testmark]:# ")
)
Expand Down Expand Up @@ -67,7 +68,7 @@ func Parse(data []byte) (*Document, error) {
case true: // ending a block
if hunkInProgress.LineStart > -1 {
hunkInProgress.LineEnd = i
hunkInProgress.Body = doc.Original[codeBlockOffset:offset]
hunkInProgress.Body = normalizeEndings(doc.Original[codeBlockOffset:offset])
doc.DataHunks = append(doc.DataHunks, hunkInProgress)
doc.HunksByName[hunkInProgress.Name] = hunkInProgress
hunkInProgress = DocHunk{LineStart: -1}
Expand Down Expand Up @@ -124,3 +125,18 @@ func Parse(data []byte) (*Document, error) {
}
return &doc, nil
}

// normalizeEndings looks for instances of "\r\n" and flattens them to "\n".
// If it finds no instances of "\r\n", the original byte slice is returned unchanged.
//
// This function does not bring joy; however,
// see https://github.com/warpfork/go-testmark/pull/4#issuecomment-922760414
// and see https://github.com/warpfork/go-testmark/pull/4#issuecomment-922782549
// for discussion. Performing this kind of normalization to data hunk boundaries
// seems to be a "least bad" behavior in a practical sense.
func normalizeEndings(in []byte) []byte {
if bytes.Count(in, sigilCrLf) == 0 {
return in
}
return bytes.Replace(in, sigilCrLf, sigilLineBreak, -1)
}
2 changes: 0 additions & 2 deletions read_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ func readFixturesExample(t *testing.T, doc *testmark.Document) {
}

func TestParseCRLF(t *testing.T) {
t.Skip("currently broken")

input, err := ioutil.ReadFile(filepath.Join("testdata", "example.md"))
if err != nil {
t.Fatal(err)
Expand Down
5 changes: 5 additions & 0 deletions testmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ type Hunk struct {

// The full body of the hunk, as bytes.
// (This is *still* a subslice of Document.Original, if this hunk was created by Parse, but probably a unique slice otherwise.)
//
// When produced by Parse, the Body has been normalized to have '\n' linebreaks if it originally contained '\r\n'.
// This is meant as a practical conceit to the fact some systems in the Windows ecosystem tend to mutate documents when checking them out of version control,
// and thus testmark finds it practical to pave that back out that again rather than making it an application-level problem.
// (If such a normalization had to be applied, the earlier coment about subslicing of Document.Original probably no longer applies.)
Body []byte
}

Expand Down

0 comments on commit 91a00c1

Please sign in to comment.