From 651bb2134b227a149f1fa590398d13feb22a36f4 Mon Sep 17 00:00:00 2001 From: f0xeri Date: Mon, 4 Mar 2024 01:39:58 +0400 Subject: [PATCH] formatted strings --- check/CheckExpr.cpp | 8 ++++ codegen/CodeGen.cpp | 92 +++++++++++++++++++++++++++++++++++++++++ lexer/Lexer.cpp | 19 ++++++--- parser/AST.hpp | 13 ++++++ parser/ASTFwdDecl.hpp | 4 +- parser/ParseExpr.cpp | 72 +++++++++++++++++++++++++++++++- parser/Parser.hpp | 3 +- source/SourceBuffer.cpp | 4 ++ source/SourceBuffer.hpp | 1 + 9 files changed, 207 insertions(+), 9 deletions(-) diff --git a/check/CheckExpr.cpp b/check/CheckExpr.cpp index 47d2c6d..9a8b1c2 100644 --- a/check/CheckExpr.cpp +++ b/check/CheckExpr.cpp @@ -98,6 +98,14 @@ namespace Slangc::Check { return true; } + bool checkExpr(const FormattedStringExprPtr &expr, Context &context, std::vector &errors) { + bool result = true; + for (const auto &arg: expr->values) { + result &= checkExpr(arg, context, errors); + } + return result; + } + bool checkExpr(const UnaryOperatorExprPtr &expr, Context &context, std::vector &errors) { auto result = checkExpr(expr->expr, context, errors); if (!result) return false; diff --git a/codegen/CodeGen.cpp b/codegen/CodeGen.cpp index 28e1703..a88aa8a 100644 --- a/codegen/CodeGen.cpp +++ b/codegen/CodeGen.cpp @@ -37,6 +37,98 @@ namespace Slangc { return context.builder->CreateGlobalStringPtr(value, "", 0, context.module.get()); } + auto FormattedStringExprNode::codegen(Slangc::CodeGenContext &context, std::vector &errors) -> llvm::Value * { + if (context.debug) context.debugBuilder->emitLocation(loc); + // llvm ir code do something like this: + /* + size_t needed = snprintf(NULL, 0, "%s: %s (%d)", msg, strerror(errno), errno) + 1; + char *buffer = malloc(needed); + sprintf(buffer, "%s: %s (%d)", msg, strerror(errno), errno); + return buffer; + */ + // int snprintf (char* s, size_t n, const char* format, ...); + auto snprintfFunc = context.module->getOrInsertFunction( + "snprintf", + FunctionType::get( + Type::getInt32Ty(*context.llvmContext),{ + PointerType::get(Type::getInt8Ty(*context.llvmContext), 0), + Type::getInt64Ty(*context.llvmContext), + PointerType::get(Type::getInt8Ty(*context.llvmContext), 0) + }, + true + ) + ); + + std::vector args; + std::vector argTypes; + Value* formatString = nullptr; + std::string fmtString; + // build format string and args + for (auto& arg: values) { + auto temp = context.loadValue; + context.loadValue = true; + auto val = processNode(arg, context, errors); + context.loadValue = temp; + auto exprType = getExprType(arg, context.context, errors).value(); + bool charArray = false; + if (auto arr = std::get_if(&exprType)) { + if (auto type = std::get_if(&arr->get()->type)) { + if (type->get()->type == "character") { + charArray = true; + } + } + } + if (auto arr = std::get_if(&arg)) charArray = true; + + if (val->getType()->isFloatingPointTy()) { + val = context.builder->CreateFPExt(val, Type::getDoubleTy(*context.llvmContext)); + fmtString +="%f"; + } + else if (val->getType()->isPointerTy() && charArray) { + fmtString += "%s"; + } + else if (val->getType()->isPointerTy()) { + fmtString += "%p"; + } + else if (val->getType()->isIntegerTy(8)) { + fmtString += "%c"; + } + else if (val->getType()->isIntegerTy(1)) { + val = context.builder->CreateIntCast(val, Type::getInt32Ty(*context.llvmContext), false); + fmtString += "%d"; + } + else if (val->getType()->isIntegerTy()) { + fmtString += "%d"; + } + else { + fmtString += "%s"; + } + + args.push_back(val); + argTypes.push_back(val->getType()); + } + formatString = context.builder->CreateGlobalStringPtr(fmtString); + args.insert(args.begin(), formatString); + args.insert(args.begin(), ConstantInt::get(Type::getInt64Ty(*context.llvmContext), 0)); + args.insert(args.begin(), ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(*context.llvmContext), 0))); + + auto needed = context.builder->CreateCall(snprintfFunc, args); + auto neededPlusOne = context.builder->CreateAdd(needed, ConstantInt::get(Type::getInt32Ty(*context.llvmContext), 1)); + auto mallocFunc = context.module->getOrInsertFunction( + "malloc", + FunctionType::get( + PointerType::get(Type::getInt8Ty(*context.llvmContext), 0), + Type::getInt32Ty(*context.llvmContext), + false + ) + ); + auto buffer = context.builder->CreateCall(mallocFunc, neededPlusOne); + args[0] = buffer; + args[1] = context.builder->CreateIntCast(neededPlusOne, Type::getInt64Ty(*context.llvmContext), false); + auto call = context.builder->CreateCall(snprintfFunc, args); + return buffer; + } + auto TypeExprNode::codegen(CodeGenContext &context, std::vector& errors) -> Value* { return {}; } diff --git a/lexer/Lexer.cpp b/lexer/Lexer.cpp index 2ab45eb..f999549 100644 --- a/lexer/Lexer.cpp +++ b/lexer/Lexer.cpp @@ -153,6 +153,10 @@ namespace Slangc { if (identifierValue.empty()) { return false; } + // formatted string check + if (identifierValue == "f" && sourceText.at(identifierValue.size()) == '"') { + return false; + } auto identifierCol = currentColumn; currentColumn += identifierValue.size(); sourceText.remove_prefix(identifierValue.size()); @@ -208,12 +212,16 @@ namespace Slangc { } bool Lexer::lexString(std::string_view &sourceText) { + bool isFormatted = false; if (sourceText.front() != '"') { - return false; + if (sourceText.starts_with("f\"")) { + isFormatted = true; + } + else return false; } auto stringColumn = currentColumn; //auto stringValueView = takeWhile(sourceText.substr(1), [=](char c) { return c != '"'; }); - auto endIt = sourceText.begin() + 1; + auto endIt = sourceText.begin() + (isFormatted ? 2 : 1); while (endIt != sourceText.end()) { if (*endIt == '"') if (*(endIt - 1) != '\\') { @@ -221,7 +229,8 @@ namespace Slangc { } ++endIt; } - auto stringValueView = sourceText.substr(1, std::distance(sourceText.begin(), endIt - 1)); + // if isFormatted, take full string including f" and closing " + auto stringValueView = sourceText.substr(0, std::distance(sourceText.begin(), endIt + (isFormatted ? 1 : 1))); if (!stringValueView.empty()) { if (stringValueView.back() == sourceText.back()) { @@ -230,8 +239,8 @@ namespace Slangc { } } - currentColumn += stringValueView.size() + 2; - sourceText.remove_prefix(stringValueView.size() + 2); + currentColumn += stringValueView.size(); + sourceText.remove_prefix(stringValueView.size()); auto stringValue = std::string(stringValueView); diff --git a/parser/AST.hpp b/parser/AST.hpp index 5b838a5..30221dd 100644 --- a/parser/AST.hpp +++ b/parser/AST.hpp @@ -133,6 +133,19 @@ namespace Slangc { } }; + struct FormattedStringExprNode { + SourceLoc loc{0, 0}; + bool isConst = false; + std::vector values; + FormattedStringExprNode(SourceLoc loc, std::vector values) : loc(loc), values(std::move(values)) {}; + auto codegen(CodeGenContext &context, std::vector& errors) -> llvm::Value*; + auto getType(const Context& analysis, std::vector& errors) -> std::optional { + return std::make_unique(loc, std::nullopt, + std::make_unique("character"), + std::make_unique(loc, 0)); + } + }; + struct NilExprNode { SourceLoc loc{0, 0}; std::optional type; diff --git a/parser/ASTFwdDecl.hpp b/parser/ASTFwdDecl.hpp index 5fefc61..3eba08b 100644 --- a/parser/ASTFwdDecl.hpp +++ b/parser/ASTFwdDecl.hpp @@ -27,6 +27,7 @@ namespace Slangc { struct OperatorExprNode; struct RealExprNode; struct StringExprNode; + struct FormattedStringExprNode; struct UnaryOperatorExprNode; struct VarExprNode; struct IndexExprNode; @@ -44,6 +45,7 @@ namespace Slangc { using OperatorExprPtr = std::shared_ptr; using RealExprPtr = std::shared_ptr; using StringExprPtr = std::shared_ptr; + using FormattedStringExprPtr = std::shared_ptr; using UnaryOperatorExprPtr = std::shared_ptr; using VarExprPtr = std::shared_ptr; using IndexExprPtr = std::shared_ptr; @@ -53,7 +55,7 @@ namespace Slangc { using ExprPtrVariant = std::variant; using VarExprPtrVariant = std::variant; diff --git a/parser/ParseExpr.cpp b/parser/ParseExpr.cpp index a939e34..fdebdf2 100644 --- a/parser/ParseExpr.cpp +++ b/parser/ParseExpr.cpp @@ -192,8 +192,7 @@ namespace Slangc { else if (tok.type == TokenType::Float) return createExpr(loc, std::stof(tok.value)); else if (tok.type == TokenType::Nil) return createExpr(loc); else if (tok.type == TokenType::String) { - if (tok.value.size() == 1) return createExpr(loc, tok.value[0]); - return createExpr(loc, tok.value); + return parseString(); } else if (tok.type == TokenType::Identifier) { return parseVar(); @@ -252,4 +251,73 @@ namespace Slangc { return expr; } + auto Parser::parseString() -> std::optional { + auto tok = prevToken(); + SourceLoc loc = tok.location; + + if (tok.value.starts_with("f\"")) { + auto value = tok.value.substr(2, tok.value.size() - 3); + std::vector values; + std::string currentValue; + bool closedFmt = true; + for (size_t i = 0; i < value.size(); ++i) { + auto ch = value[i]; + // if {{ then push { to currentValue + if (ch == '{') { + if (value.size() > i + 1 && value[i + 1] == '{') { + currentValue += ch; + i += 1; + } + else if (!currentValue.empty()) { + values.push_back(createExpr(loc, currentValue)); + currentValue.clear(); + closedFmt = false; + } + } + else if (ch == '}') { + if (closedFmt && value.size() > i + 1 && value[i + 1] == '}') { + currentValue += ch; + i += 1; + } + else + if (!currentValue.empty()) { + auto buffer = SourceBuffer::CreateFromString(currentValue); + Lexer lexer(std::move(buffer), errors); + lexer.tokenize(); + Parser parser(filepath, lexer.tokens, driver, context, errors); + auto expr = parser.parseExpr(); + if (expr.has_value() && parser.token == parser.tokens.end() - 1) { + values.push_back(std::move(expr.value())); + } + else { + errors.emplace_back(filename, "Failed to parse expression in formatted string literal.", loc); + hasError = true; + } + currentValue.clear(); + closedFmt = true; + } + } + else { + currentValue += ch; + } + } + if (!currentValue.empty()) { + values.push_back(createExpr(loc, currentValue)); + } + if (!closedFmt) { + errors.emplace_back(filename, "Failed to parse formatted string literal: missing closing '}'.", loc); + hasError = true; + } + return createExpr(loc, std::move(values)); + } + else { + auto value = tok.value.substr(1, tok.value.size() - 2); + if (value.size() == 1) + return createExpr(loc, value[0]); + else + return createExpr(loc, value); + } + return std::nullopt; + } + } // Slangc \ No newline at end of file diff --git a/parser/Parser.hpp b/parser/Parser.hpp index c97bc31..7b5fddb 100644 --- a/parser/Parser.hpp +++ b/parser/Parser.hpp @@ -27,7 +27,7 @@ namespace Slangc { Driver &driver; std::filesystem::path filepath; public: - explicit Parser(std::filesystem::path &filepath, std::vector tokens, Driver &driver, Context &context, std::vector &errors) + Parser(std::filesystem::path &filepath, std::vector tokens, Driver &driver, Context &context, std::vector &errors) : context(context), driver(driver), errors(errors), tokens(std::move(tokens)), filepath(filepath) { token = this->tokens.begin(); filename = filepath.string(); @@ -77,6 +77,7 @@ namespace Slangc { auto parseMethodDecl(const std::string& typeName, size_t vtableIndex) -> std::optional; auto parseClassDecl() -> std::optional; auto parseVarDecl() -> std::optional; + auto parseString() -> std::optional; private: std::vector tokens; diff --git a/source/SourceBuffer.cpp b/source/SourceBuffer.cpp index cd0453c..e5a0e71 100644 --- a/source/SourceBuffer.cpp +++ b/source/SourceBuffer.cpp @@ -41,4 +41,8 @@ namespace Slangc { SourceBuffer::SourceBuffer(std::string filename, std::string text) : filename(std::move(filename)), text(std::move(text)) {} SourceBuffer::SourceBuffer(SourceBuffer &&arg) noexcept : filename(std::move(arg.filename)), text(std::move(arg.text)) {} + + auto SourceBuffer::CreateFromString(std::string_view text) -> SourceBuffer { + return SourceBuffer{"", std::string(text)}; + } } // Slangc \ No newline at end of file diff --git a/source/SourceBuffer.hpp b/source/SourceBuffer.hpp index 35b132a..ffe652c 100644 --- a/source/SourceBuffer.hpp +++ b/source/SourceBuffer.hpp @@ -14,6 +14,7 @@ namespace Slangc { class SourceBuffer { public: static auto CreateFromFile(std::string_view path) -> llvm::Expected; + static auto CreateFromString(std::string_view text) -> SourceBuffer; SourceBuffer() = delete; SourceBuffer(const SourceBuffer& arg) = default; SourceBuffer(SourceBuffer&& arg) noexcept;