From 6c4f44739f0cb224c030c21be4f71930a4d83e2a Mon Sep 17 00:00:00 2001 From: Alfred Klomp Date: Tue, 9 Jan 2024 21:57:38 +0100 Subject: [PATCH] bin/base64: decode: ignore newlines By popular demand, ignore newlines in the encoded input. This achieves bug compatibility with GNU base64. The algorithm is quite naive and slow because it checks each byte independently. There is definitely room for improvement. --- bin/base64.c | 70 ++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/bin/base64.c b/bin/base64.c index 6a94074..8fca938 100644 --- a/bin/base64.c +++ b/bin/base64.c @@ -340,10 +340,23 @@ encode (const struct config *config, struct buffer *buf) return true; } -static int +static inline size_t +find_newline (const char *p, const size_t avail) +{ + // This is very naive and can definitely be improved. + for (size_t len = 0; len < avail; len++) { + if (p[len] == '\n') { + return len; + } + } + + return avail; +} + +static bool decode (const struct config *config, struct buffer *buf) { - size_t nread, nout; + size_t avail, nout; struct base64_state state; // Initialize the decoder's state structure. @@ -351,19 +364,50 @@ decode (const struct config *config, struct buffer *buf) // Read encoded data into the buffer. Use the smallest buffer size to // be on the safe side: the decoded output will fit the raw buffer. - while ((nread = fread(buf->enc, 1, BUFFER_RAW_SIZE, config->fp)) > 0) { + while ((avail = fread(buf->enc, 1, BUFFER_RAW_SIZE, config->fp)) > 0) { + char *start = buf->enc; + + // By popular demand, this utility tries to be bug-compatible + // with GNU `base64'. That includes silently ignoring newlines + // in the input. Tokenize the input on newline characters. + // This is quite slow, and at some point we will want to + // vectorize this. + while (avail > 0) { + + // Find the offset of the next newline character. + size_t len = find_newline(start, avail); + + // Ignore empty chunks. + if (len == 0) { + start++; + avail--; + continue; + } - // Decode the input into the raw buffer. - if (base64_stream_decode(&state, buf->enc, nread, - buf->raw, &nout) == 0) { - fprintf(stderr, "%s: %s: decoding error\n", - config->name, config->file); - return false; - } + // Decode the input into the raw buffer. + if (base64_stream_decode(&state, start, len, + buf->raw, &nout) == 0) { + fprintf(stderr, "%s: %s: decoding error\n", + config->name, config->file); + return false; + } - // Append the raw data to the output stream. - if (write_stdout(config, buf->raw, nout) == false) { - return false; + // Append the raw data to the output stream. + if (write_stdout(config, buf->raw, nout) == false) { + return false; + } + + // Bail out if the whole string has been consumed. + if (len == avail) { + break; + } + + // Add the newline to the chunk length. + len++; + + // Move the start pointer past the newline. + start += len; + avail -= len; } }