Skip to content

Commit

Permalink
initial state based parser for gemini
Browse files Browse the repository at this point in the history
  • Loading branch information
martinrotter committed Dec 18, 2024
1 parent bdc71f3 commit 65bb665
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 20 deletions.
98 changes: 80 additions & 18 deletions src/librssguard/network-web/gemini/geminiparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,119 @@ QString GeminiParser::geminiToHtml(const QByteArray& gemini_data) {
QString gemini_hypertext =
QString::fromUtf8(gemini_data).replace(QSL("\r\n"), QSL("\n")).replace(QSL("\r"), QSL("\n"));
QStringList lines = gemini_hypertext.split(QL1C('\n'));
bool normal_mode = true;
mode = State::Normal;

static QRegularExpression exp_link(R"(^=>\s+([^\s]+)(?:\s+(\w.+))?$)");
static QRegularExpression exp_heading(R"(^(#{1,6})\s+(.+)$)");
static QRegularExpression exp_list(R"(^\*\s(.+)$)");
static QRegularExpression exp_quote(R"((?:^>$|^>\s?(.+)$))");
static QRegularExpression exp_pre(R"(^```.*$)");
static QRegularExpression exp_text(R"()");

QRegularExpressionMatch mtch;
QString title;

for (const QString& line : lines) {
if ((mtch = exp_pre.match(line)).hasMatch()) {
normal_mode = !normal_mode;
// Begin or end PRE block.
switch (mode) {
case State::Pre:
// Ending of a PRE block.
html += endBlock(State::Normal);
break;

default:
// Beginning of a PRE block.
html += endBlock(State::Normal);
html += beginBlock(State::Pre);
break;
}
continue;
}

if (normal_mode) {
if (mode != State::Pre) {
if ((mtch = exp_link.match(line)).hasMatch()) {
html += endBlock(State::Normal);
html += parseLink(mtch);
}
else if ((mtch = exp_heading.match(line)).hasMatch()) {
html += endBlock(State::Normal);
html += parseHeading(mtch, title.isEmpty() ? &title : nullptr);
}
else if ((mtch = exp_list.match(line)).hasMatch()) {
html += beginBlock(State::List);
html += parseList(mtch);
}
else if ((mtch = exp_quote.match(line)).hasMatch()) {
html += beginBlock(State::Quote);
html += parseQuote(mtch);
}
else {
html += endBlock(State::Normal);
html += parseTextInNormalMode(line);
}
}
else {
// Add new line in PRE mode.
html += parseInPreMode(line);
}
}

html += endBlock(State::Normal);

// IOFactory::writeFile("aa", html.toUtf8());

return QSL("<html>"
"<head><title>%1</title></head>"
"<body>%2</body>"
"</html>").arg(title, html);
"</html>")
.arg(title, html);
}

QString GeminiParser::beginBlock(State new_mode) {
if (new_mode != mode) {
mode = new_mode;

switch (new_mode) {
case State::List:
return "<ul>\n";

case State::Quote:
return "<div align=\"center\" style=\""
"background-color: #E1E5EE;"
"font-style: italic;"
"margin-left: 20px;"
"margin-right: 20px;\">\n";

case State::Pre:
return "<pre style=\"background-color: #E1E5EE;\">\n";
}
}

return QString();
}

QString GeminiParser::endBlock(State new_mode) {
QString to_return;

if (new_mode != mode) {
switch (mode) {
case State::List:
to_return = "</ul>\n";
break;

case State::Quote:
to_return = "</div>\n";
break;

case State::Pre:
to_return = "</pre>\n";
break;
}

mode = new_mode;
}

return to_return;
}

QString GeminiParser::parseLink(const QRegularExpressionMatch& mtch) const {
Expand All @@ -75,29 +144,22 @@ QString GeminiParser::parseHeading(const QRegularExpressionMatch& mtch, QString*
return QSL("<h%1>%2</h%1>\n").arg(QString::number(level), header);
}

QString GeminiParser::parseQuote(const QRegularExpressionMatch &mtch) const {
QString GeminiParser::parseQuote(const QRegularExpressionMatch& mtch) const {
QString text = mtch.captured(1);

return QSL("<p align=\"center\" style=\""
"background-color: #E1E5EE;"
"font-style: italic;"
"margin-left: 20px;"
"margin-right: 20px;"
"\">%1</p>\n").arg(text.isEmpty() ? QString() : QSL("“%1”").arg(text));
return QSL("<div>%1</div>\n").arg(text.isEmpty() ? QString() : QSL("“%1”").arg(text));
}

QString GeminiParser::parseList(const QRegularExpressionMatch &mtch) const {
QString GeminiParser::parseList(const QRegularExpressionMatch& mtch) const {
QString text = mtch.captured(1);

return QSL("<p style=\""
"margin-left: 20px;"
"\">• %1</p>\n").arg(text);
return QSL("<li>%1</li>\n").arg(text);
}

QString GeminiParser::parseTextInNormalMode(const QString &line) const{
QString GeminiParser::parseTextInNormalMode(const QString& line) const {
return QSL("<p>%1</p>\n").arg(line);
}

QString GeminiParser::parseInPreMode(const QString& line) const {
return QSL("<pre>%1</pre>\n").arg(line);
return QSL("%1\n").arg(line.toHtmlEscaped());
}
24 changes: 22 additions & 2 deletions src/librssguard/network-web/gemini/geminiparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,40 @@
#ifndef GEMINIPARSER_H
#define GEMINIPARSER_H

#include <QString>
#include <QRegularExpressionMatch>
#include <QString>

class GeminiParser {
public:
QString geminiToHtml(const QByteArray& gemini_data);

private:
enum class State {
// Regular state.
Normal,

// Inside list.
List,

// Inside quote.
Quote,

// Inside PRE.
Pre
};

QString parseLink(const QRegularExpressionMatch& mtch) const;
QString parseHeading(const QRegularExpressionMatch& mtch, QString *clean_header = nullptr) const;
QString parseHeading(const QRegularExpressionMatch& mtch, QString* clean_header = nullptr) const;
QString parseQuote(const QRegularExpressionMatch& mtch) const;
QString parseList(const QRegularExpressionMatch& mtch) const;
QString parseTextInNormalMode(const QString& line) const;
QString parseInPreMode(const QString& line) const;

QString beginBlock(State new_mode);
QString endBlock(State new_mode);

private:
State mode;
};

#endif // GEMINIPARSER_H

0 comments on commit 65bb665

Please sign in to comment.