Skip to content

Commit

Permalink
Increase buffer size for ndjson parsing. Fixes #19
Browse files Browse the repository at this point in the history
  • Loading branch information
coolbutuseless committed Oct 28, 2023
1 parent dfd5036 commit 7365385
Show file tree
Hide file tree
Showing 5 changed files with 774 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: yyjsonr
Type: Package
Title: Fast JSON, GeoJSON and NDJSON Parsing and Serialisation
Version: 0.1.11
Version: 0.1.12
Authors@R: c(
person("Mike", "FC", role = c("aut", "cre"), email = "[email protected]"),
person("Yao", "Yuan", role = "cph", email = "[email protected]",
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@

# yyjsonr 0.1.12 2023-10-29

* Fix an off-by-one error when reporting line numbers in NDJSON handling.
* Increase buffer size when reading lines from NDJSON files.
* MAX_LINE_LENGTH now 131072 (was 10000)

# yyjsonr 0.1.11 2023-10-27

* Writing to JSON objects now supports a `digits` argument for rounding floating
Expand Down
16 changes: 7 additions & 9 deletions src/ndjson.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include "R-yyjson-parse.h"
#include "R-yyjson-serialize.h"

#define MAX_LINE_LENGTH 10000
#define MAX_LINE_LENGTH 131072
#define INIT_LIST_LENGTH 64


Expand Down Expand Up @@ -61,16 +61,14 @@ SEXP grow_list(SEXP oldlist) {
// For now (2023-08-09), ndjson->list will use method (2) and
// ndjson->data.frame will use method (10)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#define BUF_SIZE 65536
int count_lines(const char *filename)
{
char buf[BUF_SIZE];
int count_lines(const char *filename) {
char buf[MAX_LINE_LENGTH];
int counter = 0;

gzFile file = gzopen(filename, "r");

for(;;) {
size_t res = gzfread(buf, 1, BUF_SIZE, file);
size_t res = gzfread(buf, 1, MAX_LINE_LENGTH, file);

int i;
for(i = 0; i < res; i++) {
Expand Down Expand Up @@ -179,7 +177,7 @@ SEXP parse_ndjson_file_as_list_(SEXP filename_, SEXP nread_, SEXP nskip_, SEXP p

if (doc == NULL) {
output_verbose_error(buf, err);
warning("Couldn't parse NDJSON row %i. Inserting 'NULL'\n", i);
warning("Couldn't parse NDJSON row %i. Inserting 'NULL'\n", i + 1);
SET_VECTOR_ELT(list_, i, R_NilValue);
} else {
SET_VECTOR_ELT(list_, i, parse_json_from_str(buf, &opt));
Expand Down Expand Up @@ -311,7 +309,7 @@ SEXP parse_ndjson_file_as_df_(SEXP filename_, SEXP nread_, SEXP nskip_, SEXP npr
yyjson_doc *doc = yyjson_read_opts(buf, strlen(buf), opt.yyjson_read_flag, NULL, &err);
if (doc == NULL) {
output_verbose_error(buf, err);
error("Couldn't parse JSON during probe line %i\n", i);
error("Couldn't parse JSON during probe line %i\n", i + 1);
}

yyjson_val *obj = yyjson_doc_get_root(doc);
Expand Down Expand Up @@ -413,7 +411,7 @@ SEXP parse_ndjson_file_as_df_(SEXP filename_, SEXP nread_, SEXP nskip_, SEXP npr
yyjson_doc *doc = yyjson_read_opts(buf, strlen(buf), opt.yyjson_read_flag, NULL, &err);
if (doc == NULL) {
output_verbose_error(buf, err);
error("Couldn't parse JSON on line %i\n", i);
error("Couldn't parse JSON on line %i\n", i + 1);
}

yyjson_val *obj = yyjson_doc_get_root(doc);
Expand Down
Loading

0 comments on commit 7365385

Please sign in to comment.