diff --git a/src/checker/compiler.h b/src/checker/compiler.h index e966dfa..80a667d 100644 --- a/src/checker/compiler.h +++ b/src/checker/compiler.h @@ -19,10 +19,10 @@ namespace ts::checker { Variable, //const x = true; Function, //function x() {} Class, //class X {} - Inline, //subroutines of conditional type, mapped type, .. [deprecated] + Inline, //subroutines of type argument defaults Type, //type alias, e.g. `foo` in `type foo = string;` TypeArgument, //template variable, e.g. T in function foo(bar: T); - TypeVariable, //type variables in distributive conditional types, mapped types + TypeVariable, //type variables in distributive conditional types, mapped types, reserve a stack entry }; struct SourceMapEntry { @@ -54,18 +54,17 @@ namespace ts::checker { explicit TypeArgumentUsage(unsigned int symbolIndex, unsigned int ip): symbolIndex(symbolIndex), ip(ip) {} }; - struct Frame; struct Subroutine; struct Symbol { string name; + bool active = true; //will be switched to false when it goes out of scope SymbolType type = SymbolType::Type; unsigned int index{}; //symbol index of the current frame unsigned int pos{}; unsigned int end{}; unsigned int declarations = 1; sharedOpt routine = nullptr; - shared frame = nullptr; }; //aka Branch @@ -95,18 +94,21 @@ namespace ts::checker { } typeArgumentUsages.emplace_back(symbol->index, ip); } - }; //A subroutine is a sub program that can be executed by knowing its address. //They are used for example for type alias, mapped type, conditional type (for false and true side) struct Subroutine { vector ops; //OPs, and its parameters + unsigned int lastOpIp; SourceMap sourceMap; string_view identifier{}; unsigned int index{}; + unsigned int slots{}; + unsigned int slotIP{}; unsigned int nameAddress{}; SymbolType type = SymbolType::Type; + vector symbols{}; vector
sections; unsigned int activeSection = 0; @@ -134,6 +136,7 @@ namespace ts::checker { } void pushOp(OP op) { + lastOpIp = ops.size(); ops.push_back(op); if (!isIgnoreNextSectionOP) { @@ -240,17 +243,6 @@ namespace ts::checker { } }; - struct Frame { - bool conditional = false; - shared previous; - unsigned int id = 0; //in a tree the unique id, needed to resolve symbols during runtime. - vector symbols{}; - - Frame() = default; - - Frame(shared previous): previous(std::move(previous)) {} - }; - struct StorageItem { string_view value; unsigned int address{}; @@ -425,45 +417,28 @@ namespace ts::checker { class Program { public: - vector ops; //OPs of "main" - SourceMap sourceMap; //SourceMap of "main" - vector storage; //all kind of literals, as strings unordered_map> storageMap; //used to deduplicated storage entries unsigned int storageIndex{}; - shared frame = make_shared(); //tracks which subroutine is active (end() is), so that pushOp calls are correctly assigned. vector> activeSubroutines; vector> subroutines; -// Optimiser optimiser{&subroutines}; - - //implicit is when a OP itself triggers in the VM a new frame, without having explicitly a OP::Frame - void popFrame() { - this->pushOp(OP::FrameEnd); - popFrameImplicit(); - } - - shared pushFrame(bool implicit = false) { - if (!implicit) this->pushOp(OP::Frame); - auto id = frame->id; - frame = make_shared(frame); - frame->id = id + 1; - return frame; + Program() { + pushSubroutineNameLess(); //main } /** * Creates a new nameless subroutine, used for example in mapped-type, conditional type * @return */ - unsigned int pushSubroutineNameLess(const shared &node) { + unsigned int pushSubroutineNameLess() { auto routine = make_shared(); routine->type = SymbolType::Inline; routine->index = subroutines.size(); - pushFrame(true); //subroutines have implicit stack frames due to call convention subroutines.push_back(routine); activeSubroutines.push_back(subroutines.back()); return routine->index; @@ -481,159 +456,133 @@ namespace ts::checker { /** * Push the subroutine from the symbol as active. This means it will now be populated with OPs. */ - unsigned int pushSubroutine(string_view name) { + unsigned int pushSubroutine(Symbol &symbol) { //find subroutine - for (auto &&s: frame->symbols) { - if (s.name == name) { - pushFrame(true); //subroutines have implicit stack frames due to call convention - activeSubroutines.push_back(s.routine); - return s.routine->index; - } - } - throw runtime_error(fmt::format("no symbol found for {}", name)); + if (!symbol.routine) throw runtime_error(fmt::format("symbol has no routine {}", symbol.name)); + activeSubroutines.push_back(symbol.routine); + return symbol.routine->index; } shared popSubroutine() { if (activeSubroutines.empty()) throw runtime_error("No active subroutine found"); - popFrameImplicit(); auto subroutine = activeSubroutines.back(); if (subroutine->ops.empty()) { throw runtime_error("Routine is empty"); } - subroutine->end(); + for (auto &&symbol: subroutine->symbols) { + if (symbol.type == SymbolType::TypeVariable) { + } + } + subroutine->end(); subroutine->optimise(); - subroutine->ops.push_back(OP::Return); - //if (subroutine->type == SymbolType::Type) { - // //for type functions, we optimise ...T re-usage - // optimiseRestReuse(subroutines, subroutine); - //} - activeSubroutines.pop_back(); return subroutine; } - Symbol *findSymbol(const string_view &identifier) { - Frame *current = frame.get(); + shared ¤tSubroutine() { + return activeSubroutines.back(); + } - while (true) { + Symbol *findSymbol(const string_view &identifier) { + for (auto subroutine = activeSubroutines.rbegin(); subroutine != activeSubroutines.rend(); ++subroutine) { + auto symbols = (*subroutine)->symbols; //we go in reverse to fetch the closest - for (auto it = current->symbols.rbegin(); it != current->symbols.rend(); ++it) { - if (it->name == identifier) { + for (auto it = symbols.rbegin(); it != symbols.rend(); ++it) { + if (it->active && it->name == identifier) { return &*it; } } - if (!current->previous) break; - current = current->previous.get(); } return nullptr; } - /** - * Remove stack without doing it as OP in the VM. Some other command calls popFrame() already, which makes popFrameImplicit() an implicit popFrame. - * e.g. union, class, etc. all call VM::popFrame(). the current CompilerProgram needs to be aware of that, which this function is for. - */ - void popFrameImplicit() { - if (frame->previous) frame = frame->previous; - } - /** * The address is always written using 4 bytes. * * It sometimes is defined in Program as index to the storage or subroutine and thus is a immediate representation of the address. - * In this case it will be replaced in build() with the real address in the binary (hence why we need 4 bytes, so space stays constant). + * In this case it will be replaced in build with the real address in the binary (hence why we need 4 bytes, so space stays constant). */ void pushAddress(unsigned int address, unsigned int offset = 0) { - auto &ops = getOPs(); + auto &ops = activeSubroutines.back()->ops; vm::writeUint32(ops, offset == 0 ? ops.size() : offset, address); } void pushInt32Address(int32_t address, unsigned int offset = 0) { - auto &ops = getOPs(); + auto &ops = activeSubroutines.back()->ops; vm::writeInt32(ops, offset == 0 ? ops.size() : offset, address); } void pushUint32(unsigned int v) { - auto &ops = getOPs(); + auto &ops = activeSubroutines.back()->ops; vm::writeUint32(ops, ops.size(), v); } void pushUint16(unsigned int v, unsigned int offset = 0) { - auto &ops = getOPs(); + auto &ops = activeSubroutines.back()->ops; vm::writeUint16(ops, offset == 0 ? ops.size() : offset, v); } + shared mainSubroutine() { + return subroutines[0]; + } + void pushError(ErrorCode code, const shared &node) { + auto main = mainSubroutine(); //errors need to be part of main - sourceMap.push(0, node->pos, node->end); - ops.push_back(OP::Error); - vm::writeUint16(ops, ops.size(), (unsigned int) code); + main->sourceMap.push(0, node->pos, node->end); + main->ops.push_back(OP::Error); + vm::writeUint16(main->ops, main->ops.size(), (unsigned int) code); } void pushSymbolAddress(Symbol &symbol) { - auto &ops = getOPs(); - unsigned int frameOffset = 0; - auto current = frame; - while (current) { - if (current == symbol.frame) break; - frameOffset++; - current = current->previous; - } - vm::writeUint16(ops, ops.size(), frameOffset); - vm::writeUint16(ops, ops.size(), symbol.index); - } - - vector &getOPs() { - if (activeSubroutines.size()) return activeSubroutines.back()->ops; - return ops; + vm::writeUint16(currentSubroutine()->ops, currentSubroutine()->ops.size(), symbol.index); } void pushSourceMap(const shared &node) { - if (activeSubroutines.size()) { - activeSubroutines.back()->pushSourceMap(node->pos, node->end); - } else { - sourceMap.push(ops.size(), node->pos, node->end); - } + activeSubroutines.back()->pushSourceMap(node->pos, node->end); } void ignoreNextSectionOP() { - if (activeSubroutines.size()) activeSubroutines.back()->ignoreNextSectionOP(); + activeSubroutines.back()->ignoreNextSectionOP(); } void pushSection() { - if (activeSubroutines.size()) activeSubroutines.back()->pushSection(); + activeSubroutines.back()->pushSection(); } void blockTailCall() { - if (activeSubroutines.size()) activeSubroutines.back()->blockTailCall(); + activeSubroutines.back()->blockTailCall(); } void popSection() { - if (activeSubroutines.size()) activeSubroutines.back()->popSection(); + activeSubroutines.back()->popSection(); + } + + void pushSlots() { + currentSubroutine()->slotIP = ip(); + pushOp(OP::Slots); + pushUint16(0); //will be changed in program build } void pushOp(OP op) { - if (activeSubroutines.size()) { - activeSubroutines.back()->pushOp(op); - } else { - ops.push_back(op); - } + activeSubroutines.back()->pushOp(op); } unsigned int subroutineIndex() { - return activeSubroutines.size() ? activeSubroutines.back()->index : 0; + return activeSubroutines.back()->index; } - shared subroutine() { - return activeSubroutines.size() ? activeSubroutines.back() : subroutines[0]; + unsigned int lastOpIp() { + return activeSubroutines.back()->lastOpIp; } unsigned int ip() { - return getOPs().size(); + return activeSubroutines.back()->ops.size(); } void pushOp(OP op, const sharedOpt &node) { @@ -652,6 +601,17 @@ namespace ts::checker { // } // } + unsigned int createSymbolCheckout() { + return currentSubroutine()->symbols.size(); + } + + void restoreSymbolCheckout(unsigned int checkpoint) { + auto symbols = currentSubroutine()->symbols; + for (; checkpoint &node, sharedOpt frameToUse = nullptr) { - if (!frameToUse) frameToUse = frame; - - for (auto &&v: frameToUse->symbols) { + Symbol &pushSymbol(string_view name, SymbolType type, const shared &node) { + auto subroutine = currentSubroutine(); + for (auto &&v: subroutine->symbols) { if (type != SymbolType::TypeVariable && v.name == name) { v.declarations++; return v; } } -// Symbol symbol{ -// .name = name, -// .type = type, -// .index = (unsigned int) frameToUse->symbols.size(), -// .pos = pos, -// .routine = nullptr, -// .frame = frameToUse, -// }; Symbol symbol; symbol.name = string(name); symbol.type = type; - symbol.index = (unsigned int) frameToUse->symbols.size(); + symbol.index = currentSubroutine()->symbols.size(); symbol.pos = node->pos; symbol.end = node->end; - symbol.frame = frameToUse; - frameToUse->symbols.push_back(std::move(symbol)); - return frameToUse->symbols.back(); + if (type == SymbolType::TypeVariable) subroutine->slots++; + subroutine->symbols.push_back(std::move(symbol)); + return subroutine->symbols.back(); } - Symbol &pushSymbolForRoutine(string_view name, SymbolType type, const shared &node, shared frameToUse = nullptr) { - auto &symbol = pushSymbol(name, type, node, frameToUse); + Symbol &pushSymbolForRoutine(string_view name, SymbolType type, const shared &node) { + auto &symbol = pushSymbol(name, type, node); if (symbol.routine) return symbol; auto routine = make_shared(name); @@ -753,7 +704,6 @@ namespace ts::checker { for (auto &&routine: subroutines) { sourceMapSize += routine->sourceMap.map.size() * (4 * 3); } - sourceMapSize += sourceMap.map.size() * (4 * 3); //write sourcemap bin.push_back(OP::SourceMap); @@ -762,7 +712,7 @@ namespace ts::checker { unsigned int bytecodePosOffset = address; bytecodePosOffset += subroutines.size() * (1 + 4 + 4 + 1); //OP::Subroutine + uint32 name address + uint32 routine address + flags - bytecodePosOffset += 1 + 4; //OP::Main + uint32 address + bytecodePosOffset += 1; //OP::Main for (auto &&routine: subroutines) { for (auto &&map: routine->sourceMap.map) { @@ -773,13 +723,7 @@ namespace ts::checker { bytecodePosOffset += routine->ops.size(); } - for (auto &&map: sourceMap.map) { - vm::writeUint32(bin, bin.size(), bytecodePosOffset + map.bytecodePos); - vm::writeUint32(bin, bin.size(), map.sourcePos); - vm::writeUint32(bin, bin.size(), map.sourceEnd); - } - - address += 1 + 4; //OP::Main + uint32 address + address += 1; //OP::Main address += subroutines.size() * (1 + 4 + 4 + 1); //OP::Subroutine + uint32 name address + uint32 routine address + flags //after the storage data follows the subroutine meta-data. @@ -794,16 +738,14 @@ namespace ts::checker { //after subroutine meta-data follows the actual subroutine code, which we jump over. //this marks the end of the header. bin.push_back(OP::Main); - vm::writeUint32(bin, bin.size(), address); for (auto &&routine: subroutines) { + if (routine->slots) { + vm::writeUint16(routine->ops, routine->slotIP + 1, routine->slots); + } bin.insert(bin.end(), routine->ops.begin(), routine->ops.end()); } - //now the main code is added - bin.insert(bin.end(), ops.begin(), ops.end()); - bin.push_back(OP::Halt); - return string(bin.begin(), bin.end()); } }; @@ -815,6 +757,8 @@ namespace ts::checker { handle(file, program); + program.popSubroutine(); //main + return std::move(program); } @@ -891,23 +835,27 @@ namespace ts::checker { case SyntaxKind::TemplateLiteralType: { auto t = to(node); - program.pushFrame(); + unsigned int size = 0; if (t->head->rawText && !t->head->rawText->empty()) { + size++; program.pushOp(OP::StringLiteral, t->head); program.pushStorage(*t->head->rawText); } for (auto &&sub: t->templateSpans->list) { auto span = to(sub); + size++; handle(to(span)->type, program); if (auto a = to(span->literal)) { if (a->rawText && !a->rawText->empty()) { + size++; program.pushOp(OP::StringLiteral, sub); program.pushStorage(a->rawText ? *a->rawText : ""); } } else if (auto a = to(span->literal)) { if (a->rawText && !a->rawText->empty()) { + size++; program.pushOp(OP::StringLiteral, a); program.pushStorage(a->rawText ? *a->rawText : ""); } @@ -915,20 +863,20 @@ namespace ts::checker { } program.pushOp(OP::TemplateLiteral, node); - program.popFrameImplicit(); + program.pushUint16(size); break; } case SyntaxKind::UnionType: { const auto n = to(node); - program.pushFrame(); - + unsigned int size = 0; for (auto &&s: n->types->list) { + size++; handle(s, program); } program.pushOp(OP::Union, node); - program.popFrameImplicit(); + program.pushUint16(size); break; } case SyntaxKind::TypeReference: { @@ -976,7 +924,7 @@ namespace ts::checker { //todo: for functions/variable embed an error that symbol was declared twice in the same scope } else { //populate routine - program.pushSubroutine(n->name->escapedText); + program.pushSubroutine(symbol); //in symbol subroutines we block TailCalls because want to store the result on the routine //but only if it has no typeParameters if (!n->typeParameters || n->typeParameters->length() == 0) { @@ -988,6 +936,7 @@ namespace ts::checker { handle(p, program); } } + program.pushSlots(); handle(n->type, program); program.popSubroutine(); @@ -1017,8 +966,10 @@ namespace ts::checker { case SyntaxKind::TypeParameter: { const auto n = to(node); auto &symbol = program.pushSymbol(n->name->escapedText, SymbolType::TypeArgument, n); + auto subroutine = program.currentSubroutine(); if (n->defaultType) { - program.pushSubroutineNameLess(n->defaultType); + program.pushSubroutineNameLess(); + program.pushSlots(); handle(n->defaultType, program); auto routine = program.popSubroutine(); program.pushOp(instructions::TypeArgumentDefault, n->name); @@ -1037,31 +988,16 @@ namespace ts::checker { //todo: embed error since function is declared twice } else { if (n->typeParameters) { - program.pushSubroutine(id->escapedText); + program.pushSubroutine(symbol); //when there are type parameters, FunctionDeclaration returns a FunctionRef //which indicates the VM that the function needs to be instantiated first. - auto subroutineIndex = program.pushSubroutineNameLess(n); + auto subroutineIndex = program.pushSubroutineNameLess(); for (auto &¶m: n->typeParameters->list) { handle(param, program); } - - //(v: T) - //(v: T) - - //(k: {v: T}, v: T), ({v: ''}, v: 2) => T=string - //(k: {v: T}, v: T), ({v: ''}, v: 2) => T='' - //(k: K, v: T), ({v: ''}, v: 2) => T=number - - //try to infer type parameters from passed function parameters - for (auto &¶m: n->typeParameters->list) { - } - - //todo - //after types are inferred, apply default if still empty - - //after types are set, apply constraint check + program.pushSlots(); for (auto &¶m: n->parameters->list) { handle(param, program); @@ -1069,7 +1005,7 @@ namespace ts::checker { if (n->type) { handle(n->type, program); } else { - //todo: Infer from body + //todo: Infer from body, put into own subroutine so it is cached program.pushOp(OP::Unknown); if (n->body) { } else { @@ -1082,7 +1018,9 @@ namespace ts::checker { program.pushAddress(subroutineIndex); program.popSubroutine(); } else { - program.pushSubroutine(id->escapedText); + program.pushSubroutine(symbol); + program.pushSlots(); + for (auto &¶m: n->parameters->list) { handle(param, program); } @@ -1173,7 +1111,7 @@ namespace ts::checker { } case SyntaxKind::InterfaceDeclaration: { const auto n = to(node); - program.pushFrame(); + unsigned int size = 0; //first all extend expressions if (n->heritageClauses) { @@ -1181,6 +1119,7 @@ namespace ts::checker { auto heritage = to(node); if (heritage->token == SyntaxKind::ExtendsKeyword) { for (auto &&extendType: heritage->types->list) { + size++; handle(extendType, program); } } @@ -1188,23 +1127,25 @@ namespace ts::checker { } for (auto &&member: n->members->list) { + size++; handle(member, program); } program.pushOp(OP::ObjectLiteral, n->name); - program.popFrameImplicit(); + program.pushUint16(size); break; } case SyntaxKind::TypeLiteral: { const auto n = to(node); - program.pushFrame(); + unsigned int size = 0; for (auto &&member: n->members->list) { + size++; handle(member, program); } program.pushOp(OP::ObjectLiteral, node); - program.popFrameImplicit(); + program.pushUint16(size); break; } case SyntaxKind::ParenthesizedExpression: { @@ -1229,10 +1170,13 @@ namespace ts::checker { } case SyntaxKind::ObjectLiteralExpression: { const auto n = to(node); - program.pushFrame(); - for (auto &&sub: n->properties->list) handle(sub, program); + unsigned int size = 0; + for (auto &&sub: n->properties->list) { + size++; + handle(sub, program); + } program.pushOp(OP::ObjectLiteral, node); - program.popFrameImplicit(); + program.pushUint16(size); break; } case SyntaxKind::CallExpression: { @@ -1266,11 +1210,10 @@ namespace ts::checker { const auto n = to(node); //it seems TS does not care about the condition. the result is always a union of false/true branch. //we could improve that though to make sure that const-expressions are handled - program.pushFrame(); handle(n->whenFalse, program); handle(n->whenTrue, program); program.pushOp(OP::Union, node); - program.popFrameImplicit(); + program.pushUint16(2); break; } case SyntaxKind::ConditionalType: { @@ -1282,27 +1225,28 @@ namespace ts::checker { sharedOpt distributiveOverIdentifier = isTypeReferenceNode(n->checkType) && isIdentifier(to(n->checkType)->typeName) ? to(to(n->checkType)->typeName) : nullptr; program.pushSection(); + auto symbolCheckpoint = program.createSymbolCheckout(); unsigned int distributeJumpIp = 0; if (distributiveOverIdentifier) { //program.pushOp(OP::TypeVariable, distributiveOverIdentifier); - handle(n->checkType, program); //LOADS the input type onto the stack. Distribute pops it then. + handle(n->checkType, program); //LOADS the input type onto the stack. Distribute pops it. //in Distribute we block tail calls as the section is called multiple times program.blockTailCall(); - auto frame = program.pushFrame(true); - //Distribute crash implicit TypeVariable on the stack and populates it - program.pushSymbol(distributiveOverIdentifier->escapedText, SymbolType::TypeVariable, distributiveOverIdentifier); + //Distribute creates implicit TypeVariable on the stack and populates it + //todo: we have to move it to the beginning of the subroutine + auto symbol = program.pushSymbol(distributiveOverIdentifier->escapedText, SymbolType::TypeVariable, distributiveOverIdentifier); program.pushOp(OP::Distribute); distributeJumpIp = program.ip(); + program.pushUint16(symbol.index); program.pushAddress(0); } - auto frame = program.pushFrame(); - frame->conditional = true; - + auto symbolInferCheckpoint = program.createSymbolCheckout(); + //checkType and trueType share symbols (from infer) handle(n->checkType, program); handle(n->extendsType, program); program.pushOp(instructions::Extends, n); @@ -1314,6 +1258,7 @@ namespace ts::checker { program.pushSection(); handle(n->trueType, program); + program.restoreSymbolCheckout(symbolInferCheckpoint); program.popSection(); program.ignoreNextSectionOP(); @@ -1331,19 +1276,18 @@ namespace ts::checker { program.pushInt32Address(falseEndIp - trueJumpAddressIp + 1, trueJumpAddressIp); if (distributiveOverIdentifier) { - //auto routine = program.popSubroutine(); - //handle(n->checkType, program); //LOADS the input type onto the stack. Distribute pops it then. - program.pushAddress(falseEndIp - distributeJumpIp + 6, distributeJumpIp); + //change the address from OP::Distribute to jump to the end of all its expressions + program.pushAddress(falseEndIp - distributeJumpIp + 6, distributeJumpIp + 2); + program.ignoreNextSectionOP(); - program.pushOp(OP::FrameReturnJump); + program.pushOp(OP::Jump); program.pushInt32Address(-(program.ip() - distributeJumpIp)); - program.popFrameImplicit(); } else { program.ignoreNextSectionOP(); - program.popFrame(); } program.popSection(); + program.restoreSymbolCheckout(symbolCheckpoint); // debug("ConditionalType {}", !!distributiveOverIdentifier); break; @@ -1367,13 +1311,14 @@ namespace ts::checker { // } //value inference case SyntaxKind::ArrayLiteralExpression: { - program.pushFrame(); + unsigned int size = 0; for (auto &&v: to(node)->elements->list) { + size++; handle(v, program); program.pushOp(OP::TupleMember, v); } program.pushOp(OP::Tuple, node); - program.popFrameImplicit(); + program.pushUint16(size); //todo: handle `as const`, widen if not const break; } @@ -1384,10 +1329,11 @@ namespace ts::checker { break; } case SyntaxKind::TupleType: { - program.pushFrame(); + unsigned int size = 0; auto n = to(node); for (auto &&e: n->elements->list) { if (auto tm = to(e)) { + size++; handle(tm->type, program); if (tm->dotDotDotToken) { program.pushOp(OP::Rest); @@ -1395,16 +1341,18 @@ namespace ts::checker { program.pushOp(OP::TupleMember, tm); if (tm->questionToken) program.pushOp(OP::Optional); } else if (auto ot = to(e)) { + size++; handle(ot->type, program); program.pushOp(OP::TupleMember, ot); program.pushOp(OP::Optional); } else { + size++; handle(e, program); program.pushOp(OP::TupleMember, e); } } program.pushOp(OP::Tuple, node); - program.popFrameImplicit(); + program.pushUint16(size); break; } case SyntaxKind::BinaryExpression: { @@ -1451,9 +1399,10 @@ namespace ts::checker { //todo: embed error since variable is declared twice } else { if (n->type) { - const auto subroutineIndex = program.pushSubroutine(id->escapedText); + const auto subroutineIndex = program.pushSubroutine(symbol); //in symbol subroutines we block TailCalls because want to store the result on the routine program.blockTailCall(); + program.pushSlots(); // program.pushSourceMap(id); handle(n->type, program); @@ -1466,7 +1415,8 @@ namespace ts::checker { program.pushOp(OP::Assign, n->name); } } else { - auto subroutineIndex = program.pushSubroutine(id->escapedText); + auto subroutineIndex = program.pushSubroutine(symbol); + program.pushSlots(); if (n->initializer) { handle(n->initializer, program); diff --git a/src/checker/debug.h b/src/checker/debug.h index ebbcf33..cd29e7b 100644 --- a/src/checker/debug.h +++ b/src/checker/debug.h @@ -112,7 +112,6 @@ namespace ts::checker { result.subroutines.push_back({.name = name, .address = address}); break; } - case OP::FrameReturnJump: case OP::Jump: { auto address = vm::readInt32(bin, i + 1); params += fmt::format(" [{}, +{}]", startI + address, address); @@ -123,10 +122,6 @@ namespace ts::checker { break; } case OP::Main: { - auto address = vm::readUint32(bin, i + 1); - params += fmt::format(" [{}, +{}]", startI + address, address); - vm::eatParams(op, &i); - result.subroutines.push_back({.name = "main", .address = address}); newSubRoutine = true; break; } @@ -134,7 +129,12 @@ namespace ts::checker { newSubRoutine = true; break; } - case OP::Distribute: + case OP::Distribute: { + params += fmt::format(" &{} [{}, +{}]", vm::readUint16(bin, i + 1), startI + vm::readUint32(bin, i + 3), vm::readUint32(bin, i + 3)); + vm::eatParams(op, &i); + newLine = true; + break; + } case OP::JumpCondition: { params += fmt::format(" [{}]", startI + vm::readUint32(bin, i + 1)); vm::eatParams(op, &i); @@ -167,8 +167,13 @@ namespace ts::checker { vm::eatParams(op, &i); break; } + case OP::Union: + case OP::Tuple: + case OP::TemplateLiteral: + case OP::ObjectLiteral: + case OP::Slots: case OP::Loads: { - params += fmt::format(" &{}:{}", vm::readUint16(bin, i + 1), vm::readUint16(bin, i + 3)); + params += fmt::format(" {}", vm::readUint16(bin, i + 1)); vm::eatParams(op, &i); break; } diff --git a/src/checker/instructions.h b/src/checker/instructions.h index e646c03..05a0a34 100644 --- a/src/checker/instructions.h +++ b/src/checker/instructions.h @@ -6,10 +6,9 @@ namespace ts::instructions { enum OP { Noop, Jump, //arbitrary jump, used at the beginning to jump over storage-data (storage-data's addresses are constant) - FrameReturnJump, Halt, SourceMap, //one parameter (size uint32). all subsequent bytes withing the given size is a map op:pos:end, each uint32 - Main, //marks end of meta-data section (subroutine metadata + storage data). has one parameter that points to the actual main code. + Main, //marks end of meta-data section (subroutine metadata + storage data). after this the body section with all subroutine ops follow. Never, Any, @@ -70,14 +69,21 @@ namespace ts::instructions { Instantiate, //instantiates a type on the stack (FunctionRef for example), ExpressionWithTypeArguments /** - * Stack parameter. For each JS variable, JS function, as well as type variables (mapped-type variable for example). + * Reserved new stack entries to be used as type variables. * - * Parameters: - * 1. address on initial stack frame, which should contain its name as a string. - * 3. modifier: const - * 2. position in source code. necessary to determine if a reference is made to a const symbol before it was defined. + * 1 parameter indicating how many stack entries will be reserved. */ - Var, + Slots, + + ///** + // * Stack parameter. For each JS variable, JS function, as well as type variables (mapped-type variable for example). + // * + // * Parameters: + // * 1. address on initial stack frame, which should contain its name as a string. + // * 3. modifier: const + // * 2. position in source code. necessary to determine if a reference is made to a const symbol before it was defined. + // */ + //Var, /** * Makes sure that in the current variable slot is a type placed if nothing was provided as parameter. @@ -93,8 +99,6 @@ namespace ts::instructions { TypeArgumentDefault, //one parameter with the address of the subroutine of the default value TypeArgumentConstraint, //expects an entry on the stack - TypeVariable, - TemplateLiteral, diff --git a/src/checker/module2.h b/src/checker/module2.h index 320fef9..4fc484b 100644 --- a/src/checker/module2.h +++ b/src/checker/module2.h @@ -64,7 +64,6 @@ namespace ts::vm2 { const string code = ""; //for diagnostic messages only vector subroutines; - unsigned int mainAddress; unsigned int sourceMapAddress; unsigned int sourceMapAddressEnd; @@ -188,8 +187,6 @@ namespace ts::vm2 { break; } case OP::Main: { - module->mainAddress = vm::readUint32(bin, i + 1); - module->subroutines.push_back(ModuleSubroutine("main", module->mainAddress, 0)); return; } } diff --git a/src/checker/utils.h b/src/checker/utils.h index 1840bbd..8b580fe 100644 --- a/src/checker/utils.h +++ b/src/checker/utils.h @@ -74,8 +74,9 @@ namespace ts::vm { *i += 4 + 4 + 1; break; } - case OP::Main: - case OP::FrameReturnJump: + case OP::Main: { + break; + } case OP::Jump: { *i += 4; break; @@ -84,9 +85,12 @@ namespace ts::vm { *i += 4; break; } - case OP::Set: - case OP::TypeArgumentDefault: case OP::Distribute: { + *i += 2 + 4; + break; + } + case OP::Set: + case OP::TypeArgumentDefault: { *i += 4; break; } @@ -102,12 +106,17 @@ namespace ts::vm { *i += 2; break; } + case OP::Union: + case OP::Tuple: + case OP::TemplateLiteral: + case OP::ObjectLiteral: + case OP::Slots: case OP::CallExpression: { *i += 2; break; } case OP::Loads: { - *i += 4; + *i += 2; break; } case OP::Parameter: diff --git a/src/checker/vm2.cpp b/src/checker/vm2.cpp index 5eee16a..4a5f51a 100644 --- a/src/checker/vm2.cpp +++ b/src/checker/vm2.cpp @@ -8,7 +8,7 @@ namespace ts::vm2 { void prepare(shared &module) { parseHeader(module); activeSubroutine->module = module.get(); - activeSubroutine->ip = module->mainAddress; + activeSubroutine->ip = module->subroutines[0].address; //first is main activeSubroutine->depth = 0; } @@ -115,7 +115,8 @@ namespace ts::vm2 { if (type == nullptr) return; if (type->refCount == 0) { - debug("type {} not used already!", stringify(type)); + //debug("type {} not used already!", stringify(type)); + return; } type->refCount--; // debug("drop users={} {} ref={}", type->users, stringify(type), (void *) type); @@ -156,31 +157,24 @@ namespace ts::vm2 { } inline void popFrameWithoutGC() { - sp = frame->initialSp; - frame = frames.pop(); + throw std::runtime_error("deprecated"); + //sp = frame->initialSp; + //frame = frames.pop(); } inline std::span popFrame() { - auto start = frame->initialSp + frame->variables; - std::span sub{stack.data() + start, sp - start}; - if (frame->variables>0) { - //we need to GC all variables - for (unsigned int i = 0; ivariables; i++) { - gc(stack[frame->initialSp + i]); - } - } - sp = frame->initialSp; - frame = frames.pop(); //&frames[--frameIdx]; - return sub; - } - - inline void moveFrame(std::vector to) { - auto start = frame->initialSp + frame->variables; -// std::span sub{stack.data() + start, frame->sp - start}; - to.insert(to.begin(), stack.begin() + start, stack.begin() + sp - start); - - frame = frames.pop(); //&frames[--frameIdx]; - sp = frame->initialSp; + throw std::runtime_error("deprecated"); + //auto start = frame->initialSp + frame->variables; + //std::span sub{stack.data() + start, sp - start}; + //if (frame->variables>0) { + // //we need to GC all variables + // for (unsigned int i = 0; ivariables; i++) { + // gc(stack[frame->initialSp + i]); + // } + //} + //sp = frame->initialSp; + //frame = frames.pop(); //&frames[--frameIdx]; + //return sub; } inline void report(DiagnosticMessage message) { @@ -429,7 +423,8 @@ namespace ts::vm2 { } void handleTemplateLiteral() { - auto types = popFrame(); + auto size = activeSubroutine->parseUint16(); + auto types = frame->pop(size); //short path for `{'asd'}` if (types.size() == 1 && types[0]->kind == TypeKind::Literal) { @@ -634,6 +629,8 @@ namespace ts::vm2 { break; } case OP::Return: { + if (activeSubroutine->isMain()) return; + //while (frame->depth > 0) { // for (unsigned int i = 0; ivariables; i++) { // drop(stack[frame->initialSp + i]); @@ -658,6 +655,7 @@ namespace ts::vm2 { if (frame->size()>1) { stack[frame->initialSp] = stack[sp - 1]; } + sp = frame->initialSp + 1; frame = frames.pop(); //&frames[--frameIdx]; if (activeSubroutine->typeArguments == 0) { @@ -692,21 +690,21 @@ namespace ts::vm2 { } break; } - case OP::FrameReturnJump: { - if (frame->size()>frame->variables) { - //there is a return value on the stack, which we need to preserve - auto ret = pop(); - popFrame(); - push(ret); - } else { - //throw away the whole stack - popFrame(); - } - const auto address = activeSubroutine->parseInt32(); - //debug("FrameEndJump to {} ({})", activeSubroutine->ip + address - 4, address); - activeSubroutine->ip += address - 4; //decrease by uint32 too - goto start; - } + //case OP::FrameReturnJump: { + // if (frame->size()>frame->variables) { + // //there is a return value on the stack, which we need to preserve + // auto ret = pop(); + // popFrame(); + // push(ret); + // } else { + // //throw away the whole stack + // popFrame(); + // } + // const auto address = activeSubroutine->parseInt32(); + // //debug("FrameEndJump to {} ({})", activeSubroutine->ip + address - 4, address); + // activeSubroutine->ip += address - 4; //decrease by uint32 too + // goto start; + //} case OP::Jump: { const auto address = activeSubroutine->parseInt32(); //debug("Jump to {} ({})", activeSubroutine->ip + address - 4, address); @@ -743,43 +741,18 @@ namespace ts::vm2 { break; } case OP::Distribute: { + auto slot = activeSubroutine->parseUint16(); //if there is OP::Distribute, then there was always before this OP - // a OP::Loads to push the type on the stack. - //printStack(); - if (!frame->loop) { - //todo: this does not work in a nested Distribute (T extends X ? T extends Y ? 1 : 0 : 0) - // since frame references the outer Distribute - if (frame->flags & FrameFlag::InSingleDistribute) { - //this frame is a Distribute frame already, but frame->loop is empty, - //which means the type on the stack was not a union. We jump thus directly to the end now. - const auto loopEnd = vm::readUint32(bin, activeSubroutine->ip + 1); - activeSubroutine->ip += loopEnd - 1; - //in case of non-union the parameter in this frame should not be GC. - //why? because we do not own it, so GC would lead to removal when refCount=0 - auto res = stack[sp - 1]; - popFrameWithoutGC(); - stack[sp++] = res; - break; - } - - auto type = stack[sp - 1]; - pushFrame(); - //we treat the top of the stack as variable for the next frame - frame->initialSp--; - frame->variables++; - //type->refCount++; - + //a OP::Loads to push the type on the stack. + if (!frame->loop || frame->loop->ip != activeSubroutine->ip) { + //no loop for this distribute created yet + auto type = pop(); if (type->kind == TypeKind::Union) { - //if it's a union, we use the OP:Load slot - frame->loop = loops.push(); // new LoopHelper(type); - frame->loop->set(sp - 1, (TypeRef *) type->type); + frame->createLoop(frame->initialSp + slot, (TypeRef *) type->type); } else { - frame->flags |= FrameFlag::InSingleDistribute; - // If this is a non-union, - // we create a frame and shift it one to the left to consume the type - // all subsequent Loads 0:0 then reference it correctly. - stack[sp - 1] = type; - //jump over parameter, right to the distribute section + frame->createEmptyLoop(); + stack[frame->initialSp + slot] = type; + //jump over parameters, right to the distribute section activeSubroutine->ip += 1 + 4; goto start; } @@ -789,10 +762,8 @@ namespace ts::vm2 { if (!next) { //done //printStack(); - loops.pop(); - frame->loop = nullptr; - auto types = popFrame(); - //pop TypeVariable + auto types = frame->pop(sp - frame->loop->startSP); + frame->popLoop(); if (types.empty()) { push(allocate(TypeKind::Never)); } else if (types.size() == 1) { @@ -808,19 +779,17 @@ namespace ts::vm2 { push(result); } const auto loopEnd = vm::readUint32(bin, activeSubroutine->ip + 1); - activeSubroutine->ip += loopEnd - 1; + activeSubroutine->ip += loopEnd - 1 - 2; } else { - //jump over parameter, right to the distribute section + //jump over parameters, right to the distribute section activeSubroutine->ip += 1 + 4; goto start; } break; } case OP::Loads: { - const auto frameOffset = activeSubroutine->parseUint16(); const auto varIndex = activeSubroutine->parseUint16(); - auto index = frames.at(frames.i - frameOffset)->initialSp + varIndex; - push(stack[index]); + push(stack[frame->initialSp + varIndex]); //debug("Loads {}:{} -> {}", frameOffset, varIndex, stringify(stack[index])); //if (frameOffset == 0) { // push(stack[frame->initialSp + varIndex]); @@ -829,10 +798,10 @@ namespace ts::vm2 { //} break; } - case OP::TypeVariable: { - //all variables will be dropped at the end of the subroutine - push(use(allocate(TypeKind::Unknown))); - frame->variables++; + case OP::Slots: { + auto size = activeSubroutine->parseUint16(); + frame->variables += size; + sp += size; break; } case OP::TypeArgument: { @@ -981,14 +950,15 @@ namespace ts::vm2 { break; } case OP::ObjectLiteral: { + const auto size = activeSubroutine->parseUint16(); auto item = allocate(TypeKind::ObjectLiteral); - auto types = popFrame(); - if (types.empty()) { + if (!size) { item->type = nullptr; push(item); break; } + auto types = frame->pop(size); item->type = useAsRef(types[0]); if (types.size()>1) { auto current = (TypeRef *) item->type; @@ -1002,30 +972,31 @@ namespace ts::vm2 { break; } case OP::Union: { + const auto size = activeSubroutine->parseUint16(); auto item = allocate(TypeKind::Union); //printStack(); - auto types = popFrame(); - if (types.empty()) { + if (!size) { item->type = nullptr; push(item); break; } + auto types = frame->pop(size); + auto first = types[0]; - auto type = types[0]; - if (type->kind == TypeKind::Union) { + if (first->kind == TypeKind::Union) { //if type has no owner, we can steal its children - if (type->refCount == 0) { - item->type = type->type; - type->type = nullptr; + if (first->refCount == 0) { + item->type = first->type; + first->type = nullptr; //since we stole its children, we want it to GC but without its children. their 'users' count belongs now to us. - gc(type); + gc(first); } else { throw std::runtime_error("Can not merge used union"); } } else { - item->type = useAsRef(type); + item->type = useAsRef(first); } - if (types.size()>1) { + if (size>1) { auto current = (TypeRef *) item->type; //set current to the end of the list while (current->next) current = current->next; @@ -1079,14 +1050,15 @@ namespace ts::vm2 { break; } case OP::Tuple: { - auto types = popFrame(); - if (types.empty()) { + const auto size = activeSubroutine->parseUint16(); + if (size == 0) { auto item = allocate(TypeKind::Tuple); item->type = nullptr; push(item); break; } + auto types = frame->pop(size); Type *item; auto firstTupleMember = types[0]; auto firstType = (Type *) firstTupleMember->type; @@ -1192,10 +1164,32 @@ namespace ts::vm2 { break; } default: { - debug("OP {} not handled!", (OP) bin[activeSubroutine->ip]); + debug("[{}] OP {} not handled!", activeSubroutine->ip, (OP) bin[activeSubroutine->ip]); } } activeSubroutine->ip++; } } + + LoopHelper *Frame::createLoop(unsigned int var1, TypeRef *type) { + auto newLoop = loops.push(); + newLoop->set(var1, type); + newLoop->ip = activeSubroutine->ip; + newLoop->startSP = sp; + newLoop->previous = loop; + return loop = newLoop; + } + + LoopHelper *Frame::createEmptyLoop() { + auto newLoop = loops.push(); + newLoop->ip = activeSubroutine->ip; + newLoop->startSP = sp; + newLoop->previous = loop; + return loop = newLoop; + } + + void Frame::popLoop() { + loop = loop->previous; + loops.pop(); + } }; \ No newline at end of file diff --git a/src/checker/vm2.h b/src/checker/vm2.h index 8ce6261..708c05e 100644 --- a/src/checker/vm2.h +++ b/src/checker/vm2.h @@ -67,6 +67,10 @@ namespace ts::vm2 { return val; } + bool isMain() { + return !subroutine; + } + int32_t parseInt32() { auto val = vm::readInt32(module->bin, ip + 1); ip += 4; @@ -93,7 +97,10 @@ namespace ts::vm2 { struct LoopHelper { TypeRef *current; + unsigned int ip = 0; + unsigned int startSP = 0; unsigned int var1 = 0; + LoopHelper *previous = nullptr; void set(unsigned int var1, TypeRef *typeRef) { this->var1 = var1; @@ -109,7 +116,7 @@ namespace ts::vm2 { }; enum FrameFlag: uint8_t { - InSingleDistribute = 1 << 0 + //InSingleDistribute = 1<<0 }; struct Frame { @@ -125,9 +132,19 @@ namespace ts::vm2 { uint8_t flags = 0; LoopHelper *loop = nullptr; + LoopHelper *createLoop(unsigned int var1, TypeRef *type); + LoopHelper *createEmptyLoop(); + + void popLoop(); + unsigned int size() { return sp - initialSp; } + + std::span pop(unsigned int size) { + sp -= size; + return {stack.data() + sp, size}; + } }; template diff --git a/src/tests/test_vm2.cpp b/src/tests/test_vm2.cpp index c6d2214..1cd47a9 100644 --- a/src/tests/test_vm2.cpp +++ b/src/tests/test_vm2.cpp @@ -40,11 +40,9 @@ const v2: number = 123; //only v1, v2 REQUIRE(ts::vm2::pool.active == 2); - ts::bench("first", 1000, [&] { - module->clear(); - run(module); - }); + testBench(code, 0); } + TEST_CASE("vm2TwoTests") { test(R"( const v1: string = "abc"; @@ -97,7 +95,7 @@ const v3: a = 'nope'; TEST_CASE("vm2Base22") { string code = R"( -type a = K | (T extends string ? 'yes': 'no'); +type a = K | (T extends string ? 'yes' : 'no'); const v1: a = 'no'; const v2: a = 'yes'; const v3: a = true; @@ -153,12 +151,12 @@ const var1: a = false; TEST_CASE("gcUnion") { ts::checker::Program program; - program.pushOp(OP::Frame); for (auto i = 0; i<10; i++) { program.pushOp(OP::StringLiteral); program.pushStorage("a" + to_string(i)); } program.pushOp(OP::Union); + program.pushUint16(10); program.pushOp(OP::Halt); auto module = std::make_shared(program.build(), "app.ts", ""); @@ -176,6 +174,7 @@ TEST_CASE("gcTuple") { program.pushOp(OP::TupleMember); } program.pushOp(OP::Tuple); + program.pushUint16(10); program.pushOp(OP::Halt); auto module = std::make_shared(program.build(), "app.ts", ""); @@ -196,6 +195,7 @@ TEST_CASE("gcObject") { program.pushOp(OP::PropertySignature); } program.pushOp(OP::ObjectLiteral); + program.pushUint16(10); program.pushOp(OP::Halt); auto module = std::make_shared(program.build(), "app.ts", ""); @@ -229,6 +229,7 @@ type L = `a${string}`; const var1: L = 'abc'; const var2: L = 'bbc'; )"; + //not implemented yet ts::testBench(code, 1); } @@ -538,7 +539,6 @@ TEST_CASE("vm2BenchOverhead") { } TEST_CASE("vm2Complex1") { - //todo: crashes with BAD_ACCESS. lag wohl daran, dass nicht alles zurückgesetzt wurde string code = R"( type StringToNum = `${A['length']}` extends T ? A['length'] : StringToNum; //yes //type StringToNum = `${A['length']}` extends T ? A['length'] : StringToNum; //no, A refCount too big. @@ -549,12 +549,8 @@ type StringToNum = `${A['length']}` extends T ? A['length'] : StringToNum< const var1: StringToNum<'999', []> = 1002; //const var2: StringToNum<'999'> = 1002; )"; - //todo: fix reuse of A. We need to mark the argument with a flag though, so we know we can for sure steal its ref and just append it. - // Should then be much faster than we currently see. - //todo we have to fix that A.users keeps increasing - //todo: add tail call optimisation - //todo: super instruction for `${A['length']}` and A['length'] test(code, 1); + //todo: this crashes after a lot of runs, I guess something is not correctly reset so it grows forever testBench(code, 1); // testBench(code, 0, 1); // debug("active {}", ts::vm2::pool.active);