Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse streams incrementally. #40

Merged
merged 11 commits into from
Mar 24, 2015
68 changes: 50 additions & 18 deletions src/json.hpp.re2c
Original file line number Diff line number Diff line change
Expand Up @@ -1716,6 +1716,12 @@ class basic_json
return parser(s).parse();
}

/// deserialize from stream
static basic_json parse(std::istream& i)
{
return parser(i).parse();
}

/// deserialize from stream
friend std::istream& operator>>(std::istream& i, basic_json& j)
{
Expand Down Expand Up @@ -3047,11 +3053,20 @@ class basic_json

/// constructor with a given buffer
inline lexer(const string_t& s) noexcept
: m_content(reinterpret_cast<const lexer_char_t*>(s.c_str()))
: m_buffer(s), m_stream(nullptr)
{
m_content = reinterpret_cast<const lexer_char_t*>(s.c_str());
m_start = m_cursor = m_content;
m_limit = m_content + s.size();
}
inline lexer(std::istream* s) noexcept
: m_stream(s)
{
getline(*m_stream, m_buffer);
m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
m_start = m_cursor = m_content;
m_limit = m_content + m_buffer.size();
}

/// default constructor
inline lexer() = default;
Expand Down Expand Up @@ -3177,7 +3192,7 @@ class basic_json
inline token_type scan() noexcept
{
// pointer for backtracking information
const lexer_char_t* m_marker = nullptr;
m_marker = nullptr;

// remember the begin of the token
m_start = m_cursor;
Expand All @@ -3187,10 +3202,11 @@ class basic_json
re2c:define:YYCURSOR = m_cursor;
re2c:define:YYLIMIT = m_limit;
re2c:define:YYMARKER = m_marker;
re2c:define:YYFILL = "{ yyfill(); }";
re2c:yyfill:parameter = 0;
re2c:indent:string = " ";
re2c:indent:top = 1;
re2c:labelprefix = "basic_json_parser_";
re2c:yyfill:enable = 0;

// whitespace
ws = [ \t\n\r]+;
Expand Down Expand Up @@ -3240,6 +3256,28 @@ class basic_json
// anything else is an error
. { return token_type::parse_error; }
*/

}

/// append data from the stream to the internal buffer
void yyfill() noexcept
{
if (not m_stream or not *m_stream) return;

ssize_t offset_start = m_start - m_content;
ssize_t offset_marker = m_marker - m_start;
ssize_t offset_cursor = m_cursor - m_start;

m_buffer.erase(0, offset_start);
std::string line;
std::getline(*m_stream, line);
m_buffer += line;

m_content = reinterpret_cast<const lexer_char_t*>(m_buffer.c_str());
m_start = m_content;
m_marker = m_start + offset_marker;
m_cursor = m_start + offset_cursor;
m_limit = m_start + m_buffer.size() - 1;
}

/// return string representation of last read token
Expand Down Expand Up @@ -3404,10 +3442,16 @@ class basic_json
}

private:
/// optional input stream
std::istream* m_stream;
/// the buffer
string_t m_buffer;
/// the buffer pointer
const lexer_char_t* m_content = nullptr;
/// pointer to he beginning of the current symbol
/// pointer to the beginning of the current symbol
const lexer_char_t* m_start = nullptr;
/// pointer for backtracking information
const lexer_char_t* m_marker = nullptr;
/// pointer to the current symbol
const lexer_char_t* m_cursor = nullptr;
/// pointer to the end of the buffer
Expand All @@ -3421,25 +3465,15 @@ class basic_json
{
public:
/// constructor for strings
inline parser(const string_t& s) : m_buffer(s), m_lexer(m_buffer)
inline parser(const string_t& s) : m_lexer(s)
{
// read first token
get_token();
}

/// a parser reading from an input stream
inline parser(std::istream& _is)
inline parser(std::istream& _is) : m_lexer(&_is)
{
while (_is)
{
string_t input_line;
std::getline(_is, input_line);
m_buffer += input_line;
}

// initializer lexer
m_lexer = lexer(m_buffer);

// read first token
get_token();
}
Expand Down Expand Up @@ -3625,8 +3659,6 @@ class basic_json
}

private:
/// the buffer
string_t m_buffer;
/// the type of the last read token
typename lexer::token_type last_token = lexer::token_type::uninitialized;
/// the lexer
Expand Down