diff --git a/libevmasm/Assembly.cpp b/libevmasm/Assembly.cpp index 0d5468e2678e..ad146fe6acd3 100644 --- a/libevmasm/Assembly.cpp +++ b/libevmasm/Assembly.cpp @@ -985,86 +985,73 @@ LinkerObject const& Assembly::assemble() const return assembleEOF(); } -LinkerObject const& Assembly::assembleLegacy() const +void Assembly::handleOperation(AssemblyItem const& item, bytes& bytecode) const { - solAssert(!m_eofVersion.has_value()); - solAssert(!m_invalid); - // Return the already assembled object, if present. - if (!m_assembledObject.bytecode.empty()) - return m_assembledObject; - // Otherwise ensure the object is actually clear. - solAssert(m_assembledObject.linkReferences.empty()); - - LinkerObject& ret = m_assembledObject; + bytecode.push_back(static_cast(item.instruction())); +} - size_t subTagSize = 1; - std::map>> immutableReferencesBySub; - for (auto const& sub: m_subs) +void Assembly::handlePush(AssemblyItem const& item, bytes& bytecode) const +{ + unsigned pushValueSize = numberEncodingSize(item.data()); + if (pushValueSize == 0 && !m_evmVersion.hasPush0()) { - auto const& linkerObject = sub->assemble(); - if (!linkerObject.immutableReferences.empty()) - { - assertThrow( - immutableReferencesBySub.empty(), - AssemblyException, - "More than one sub-assembly references immutables." - ); - immutableReferencesBySub = linkerObject.immutableReferences; - } - for (size_t tagPos: sub->m_tagPositionsInBytecode) - if (tagPos != std::numeric_limits::max() && numberEncodingSize(tagPos) > subTagSize) - subTagSize = numberEncodingSize(tagPos); + pushValueSize = 1; } + bytecode.push_back(static_cast(pushInstruction(pushValueSize))); + if (pushValueSize > 0) + { + bytecode.resize(bytecode.size() + pushValueSize); + bytesRef pushValueBytecodeRef(&bytecode.back() + 1 - pushValueSize, pushValueSize); + toBigEndian(item.data(), pushValueBytecodeRef); + } +} - bool setsImmutables = false; - bool pushesImmutables = false; +void Assembly::handlePushLibraryAddress(AssemblyItem const& item, LinkerObject& ret) const +{ + ret.bytecode.push_back(static_cast(Instruction::PUSH20)); + ret.linkReferences[ret.bytecode.size()] = m_libraries.at(item.data()); + ret.bytecode.resize(ret.bytecode.size() + 20); +} - assertThrow(m_codeSections.size() == 1, AssemblyException, "Expected exactly one code section in non-EOF code."); - AssemblyItems const& items = m_codeSections.front().items; +void Assembly::handleVerbatimBytecode(AssemblyItem const& item, bytes& bytecode) const +{ + bytecode += item.verbatimData(); +} - for (auto const& item: items) - if (item.type() == AssignImmutable) - { - item.setImmutableOccurrences(immutableReferencesBySub[item.data()].second.size()); - setsImmutables = true; - } - else if (item.type() == PushImmutable) - pushesImmutables = true; - if (setsImmutables || pushesImmutables) - assertThrow( - setsImmutables != pushesImmutables, - AssemblyException, - "Cannot push and assign immutables in the same assembly subroutine." - ); +void Assembly::handlePushDeployTimeAddress(bytes& bytecode) const +{ + bytecode.push_back(static_cast(Instruction::PUSH20)); + bytecode.resize(bytecode.size() + 20); +} - unsigned bytesRequiredForCode = codeSize(static_cast(subTagSize)); - m_tagPositionsInBytecode = std::vector(m_usedTags, std::numeric_limits::max()); +void Assembly::handleTag(AssemblyItem const& item, bytes& bytecode, bool addJumpDest) const +{ + solRequire(item.data() != 0, AssemblyException, "Invalid tag position."); + solRequire(item.splitForeignPushTag().first == std::numeric_limits::max(), AssemblyException, "Foreign tag."); + size_t tagId = static_cast(item.data()); + solRequire(bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); + solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits::max(), AssemblyException, "Duplicate tag position."); + m_tagPositionsInBytecode[tagId] = bytecode.size(); + if (addJumpDest) + bytecode.push_back(static_cast(Instruction::JUMPDEST)); +} + +// tagRef, dataRef, subRef, sizeRef +std::tuple>, std::multimap, std::multimap, std::vector> +Assembly::dispatchAssemblyItemsLegacy( + LinkerObject& ret, + std::map>>& immutableReferencesBySub, + AssemblyItems const& items, + unsigned bytesPerTag, + unsigned bytesPerDataRef) const +{ std::map> tagRef; std::multimap dataRef; std::multimap subRef; - std::vector sizeRef; ///< Pointers to code locations where the size of the program is inserted - unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode); - // Adjust bytesPerTag for references to sub assemblies. - for (AssemblyItem const& item: items) - if (item.type() == PushTag) - { - auto [subId, tagId] = item.splitForeignPushTag(); - if (subId == std::numeric_limits::max()) - continue; - assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id"); - auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId); - assertThrow(subTagPosition != std::numeric_limits::max(), AssemblyException, "Reference to tag without position."); - bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition)); - } - uint8_t tagPush = static_cast(pushInstruction(bytesPerTag)); - - unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast(m_auxiliaryData.size()); - for (auto const& sub: m_subs) - bytesRequiredIncludingData += static_cast(sub->assemble().bytecode.size()); + std::vector sizeRef; ///< Pointers to code locations where the size of the program is inserted> - unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData); + uint8_t tagPush = static_cast(pushInstruction(bytesPerTag)); uint8_t dataRefPush = static_cast(pushInstruction(bytesPerDataRef)); - ret.bytecode.reserve(bytesRequiredIncludingData); for (AssemblyItem const& item: items) { @@ -1075,24 +1062,11 @@ LinkerObject const& Assembly::assembleLegacy() const switch (item.type()) { case Operation: - ret.bytecode.push_back(static_cast(item.instruction())); + handleOperation(item, ret.bytecode); break; case Push: - { - unsigned b = numberEncodingSize(item.data()); - if (b == 0 && !m_evmVersion.hasPush0()) - { - b = 1; - } - ret.bytecode.push_back(static_cast(pushInstruction(b))); - if (b > 0) - { - ret.bytecode.resize(ret.bytecode.size() + b); - bytesRef byr(&ret.bytecode.back() + 1 - b, b); - toBigEndian(item.data(), byr); - } + handlePush(item, ret.bytecode); break; - } case PushTag: { ret.bytecode.push_back(tagPush); @@ -1131,9 +1105,7 @@ LinkerObject const& Assembly::assembleLegacy() const break; } case PushLibraryAddress: - ret.bytecode.push_back(static_cast(Instruction::PUSH20)); - ret.linkReferences[ret.bytecode.size()] = m_libraries.at(item.data()); - ret.bytecode.resize(ret.bytecode.size() + 20); + handlePushLibraryAddress(item, ret); break; case PushImmutable: ret.bytecode.push_back(static_cast(Instruction::PUSH32)); @@ -1145,7 +1117,7 @@ LinkerObject const& Assembly::assembleLegacy() const ret.bytecode.resize(ret.bytecode.size() + 32); break; case VerbatimBytecode: - ret.bytecode += item.verbatimData(); + handleVerbatimBytecode(item, ret.bytecode); break; case AssignImmutable: { @@ -1174,25 +1146,97 @@ LinkerObject const& Assembly::assembleLegacy() const break; } case PushDeployTimeAddress: - ret.bytecode.push_back(static_cast(Instruction::PUSH20)); - ret.bytecode.resize(ret.bytecode.size() + 20); + handlePushDeployTimeAddress(ret.bytecode); break; case Tag: - { - assertThrow(item.data() != 0, AssemblyException, "Invalid tag position."); - assertThrow(item.splitForeignPushTag().first == std::numeric_limits::max(), AssemblyException, "Foreign tag."); - size_t tagId = static_cast(item.data()); - assertThrow(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); - assertThrow(m_tagPositionsInBytecode[tagId] == std::numeric_limits::max(), AssemblyException, "Duplicate tag position."); - m_tagPositionsInBytecode[tagId] = ret.bytecode.size(); - ret.bytecode.push_back(static_cast(Instruction::JUMPDEST)); + handleTag(item, ret.bytecode, true); break; - } default: assertThrow(false, InvalidOpcode, "Unexpected opcode while assembling."); } } + return {tagRef, dataRef, subRef, sizeRef}; +} + +LinkerObject const& Assembly::assembleLegacy() const +{ + solAssert(!m_eofVersion.has_value()); + solAssert(!m_invalid); + // Return the already assembled object, if present. + if (!m_assembledObject.bytecode.empty()) + return m_assembledObject; + // Otherwise ensure the object is actually clear. + solAssert(m_assembledObject.linkReferences.empty()); + + LinkerObject& ret = m_assembledObject; + + size_t subTagSize = 1; + std::map>> immutableReferencesBySub; + for (auto const& sub: m_subs) + { + auto const& linkerObject = sub->assemble(); + if (!linkerObject.immutableReferences.empty()) + { + assertThrow( + immutableReferencesBySub.empty(), + AssemblyException, + "More than one sub-assembly references immutables." + ); + immutableReferencesBySub = linkerObject.immutableReferences; + } + for (size_t tagPos: sub->m_tagPositionsInBytecode) + if (tagPos != std::numeric_limits::max() && numberEncodingSize(tagPos) > subTagSize) + subTagSize = numberEncodingSize(tagPos); + } + + bool setsImmutables = false; + bool pushesImmutables = false; + + assertThrow(m_codeSections.size() == 1, AssemblyException, "Expected exactly one code section in non-EOF code."); + AssemblyItems const& items = m_codeSections.front().items; + + for (auto const& item: items) + if (item.type() == AssignImmutable) + { + item.setImmutableOccurrences(immutableReferencesBySub[item.data()].second.size()); + setsImmutables = true; + } + else if (item.type() == PushImmutable) + pushesImmutables = true; + if (setsImmutables || pushesImmutables) + assertThrow( + setsImmutables != pushesImmutables, + AssemblyException, + "Cannot push and assign immutables in the same assembly subroutine." + ); + + unsigned bytesRequiredForCode = codeSize(static_cast(subTagSize)); + m_tagPositionsInBytecode = std::vector(m_usedTags, std::numeric_limits::max()); + unsigned bytesPerTag = numberEncodingSize(bytesRequiredForCode); + // Adjust bytesPerTag for references to sub assemblies. + for (AssemblyItem const& item: items) + if (item.type() == PushTag) + { + auto [subId, tagId] = item.splitForeignPushTag(); + if (subId == std::numeric_limits::max()) + continue; + assertThrow(subId < m_subs.size(), AssemblyException, "Invalid sub id"); + auto subTagPosition = m_subs[subId]->m_tagPositionsInBytecode.at(tagId); + assertThrow(subTagPosition != std::numeric_limits::max(), AssemblyException, "Reference to tag without position."); + bytesPerTag = std::max(bytesPerTag, numberEncodingSize(subTagPosition)); + } + + unsigned bytesRequiredIncludingData = bytesRequiredForCode + 1 + static_cast(m_auxiliaryData.size()); + for (auto const& sub: m_subs) + bytesRequiredIncludingData += static_cast(sub->assemble().bytecode.size()); + + unsigned bytesPerDataRef = numberEncodingSize(bytesRequiredIncludingData); + ret.bytecode.reserve(bytesRequiredIncludingData); + + auto const [tagRef, dataRef, subRef, sizeRef] = + dispatchAssemblyItemsLegacy(ret, immutableReferencesBySub, items, bytesPerTag, bytesPerDataRef); + if (!immutableReferencesBySub.empty()) throw langutil::Error( @@ -1306,6 +1350,48 @@ std::map Assembly::findReferencedContainers() const return replacements; } +void Assembly::dispatchAssemblyItemsEOF(LinkerObject& ret, std::vector& codeSectionSizeOffsets) const +{ + for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) + { + auto const sectionStart = ret.bytecode.size(); + + for (AssemblyItem const& item: codeSection.items) + { + // store position of the invalid jump destination + if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) + m_tagPositionsInBytecode[0] = ret.bytecode.size(); + + switch (item.type()) + { + case Operation: + handleOperation(item, ret.bytecode); + break; + case Push: + handlePush(item, ret.bytecode); + break; + case PushLibraryAddress: + handlePushLibraryAddress(item, ret); + break; + case VerbatimBytecode: + handleVerbatimBytecode(item, ret.bytecode); + break; + case PushDeployTimeAddress: + handlePushDeployTimeAddress(ret.bytecode); + break; + case Tag: + handleTag(item, ret.bytecode, false); + break; + default: + solRequire(false, InvalidOpcode, "Unexpected opcode while assembling."); + } + } + + auto sectionEnd = ret.bytecode.size(); + setBigEndianUint16(ret.bytecode, codeSectionSizeOffsets[codeSectionIndex], sectionEnd - sectionStart); + } +} + LinkerObject const& Assembly::assembleEOF() const { solAssert(m_eofVersion.has_value() && m_eofVersion == 1); @@ -1326,67 +1412,7 @@ LinkerObject const& Assembly::assembleEOF() const m_tagPositionsInBytecode = std::vector(m_usedTags, std::numeric_limits::max()); - for (auto&& [codeSectionIndex, codeSection]: m_codeSections | ranges::views::enumerate) - { - auto const sectionStart = ret.bytecode.size(); - - for (AssemblyItem const& item: codeSection.items) - { - // store position of the invalid jump destination - if (item.type() != Tag && m_tagPositionsInBytecode[0] == std::numeric_limits::max()) - m_tagPositionsInBytecode[0] = ret.bytecode.size(); - - switch (item.type()) - { - case Operation: - ret.bytecode.push_back(static_cast(item.instruction())); - break; - case Push: - { - unsigned pushValueSize = numberEncodingSize(item.data()); - if (pushValueSize == 0 && !m_evmVersion.hasPush0()) - { - pushValueSize = 1; - } - ret.bytecode.push_back(static_cast(pushInstruction(pushValueSize))); - if (pushValueSize > 0) - { - ret.bytecode.resize(ret.bytecode.size() + pushValueSize); - bytesRef pushValueBytecodeRef(&ret.bytecode.back() + 1 - pushValueSize, pushValueSize); - toBigEndian(item.data(), pushValueBytecodeRef); - } - break; - } - case PushLibraryAddress: - ret.bytecode.push_back(static_cast(Instruction::PUSH20)); - ret.linkReferences[ret.bytecode.size()] = m_libraries.at(item.data()); - ret.bytecode.resize(ret.bytecode.size() + 20); - break; - case VerbatimBytecode: - ret.bytecode += item.verbatimData(); - break; - case PushDeployTimeAddress: - ret.bytecode.push_back(static_cast(Instruction::PUSH20)); - ret.bytecode.resize(ret.bytecode.size() + 20); - break; - case Tag: - { - solRequire(item.data() != 0, AssemblyException, "Invalid tag position."); - solRequire(item.splitForeignPushTag().first == std::numeric_limits::max(), AssemblyException, "Foreign tag."); - size_t tagId = static_cast(item.data()); - solRequire(ret.bytecode.size() < 0xffffffffL, AssemblyException, "Tag too large."); - solRequire(m_tagPositionsInBytecode[tagId] == std::numeric_limits::max(), AssemblyException, "Duplicate tag position."); - m_tagPositionsInBytecode[tagId] = ret.bytecode.size(); - break; - } - default: - solRequire(false, InvalidOpcode, "Unexpected opcode while assembling."); - } - } - - auto sectionEnd = ret.bytecode.size(); - setBigEndianUint16(ret.bytecode, codeSectionSizeOffsets[codeSectionIndex], sectionEnd - sectionStart); - } + dispatchAssemblyItemsEOF(ret, codeSectionSizeOffsets); for (auto i: referencedSubIds) ret.bytecode += m_subs[i]->assemble().bytecode; diff --git a/libevmasm/Assembly.h b/libevmasm/Assembly.h index 41e735db612f..b499b6bbccd6 100644 --- a/libevmasm/Assembly.h +++ b/libevmasm/Assembly.h @@ -235,6 +235,28 @@ class Assembly /// Returns map from m_subs to an index of subcontainer in the final EOF bytecode std::map findReferencedContainers() const; + /// Assembly items types handlers to be used by the dispatching fucntions for EOF or legacy. + void handleOperation(AssemblyItem const& item, bytes& bytecode) const; + void handlePush(AssemblyItem const& item, bytes& bytecode) const; + void handlePushLibraryAddress(AssemblyItem const& item, LinkerObject& ret) const; + void handleVerbatimBytecode(AssemblyItem const& item, bytes& bytecode) const; + void handlePushDeployTimeAddress(bytes& bytecode) const; + void handleTag(AssemblyItem const& item, bytes& bytecode, bool addJumpDest) const; + + /// Iterates and dispatches assembly items and returns needed references types for further bytecode contruction. + /// Returns tagRef, dataRef, subRef, sizeRef + std::tuple>, std::multimap, std::multimap, std::vector> + dispatchAssemblyItemsLegacy( + LinkerObject& ret, + std::map>>& immutableReferencesBySub, + AssemblyItems const& items, + unsigned bytesPerTag, + unsigned bytesPerDataRef) const; + + /// Iterates and dispatches assembly items for all code section generating bytecode accordigly. + /// Fills code section sizes in EOF header. + void dispatchAssemblyItemsEOF(LinkerObject& ret, std::vector& codeSectionSizeOffsets) const; + protected: /// 0 is reserved for exception unsigned m_usedTags = 1;