Skip to content

Commit fbbe2dd

Browse files
authored
Merge pull request #15368 from ethereum/ethdebug_instructions_and_source_ranges
Add support for instructions and source ranges.
2 parents dbe214e + f3705de commit fbbe2dd

File tree

120 files changed

+747
-1293
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+747
-1293
lines changed

Changelog.md

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ Language Features:
66

77
Compiler Features:
88
* Error Reporting: Errors reported during code generation now point at the location of the contract when more fine-grained location is not available.
9+
* ethdebug: Experimental support for instructions and source locations.
910
* EVM: Support for the EVM version "Osaka".
1011
* EVM Assembly Import: Allow enabling opcode-based optimizer.
1112
* General: The experimental EOF backend implements a subset of EOF sufficient to compile arbitrary high-level Solidity syntax via IR with optimization enabled.

libevmasm/Assembly.cpp

+102-45
Original file line numberDiff line numberDiff line change
@@ -1281,6 +1281,21 @@ LinkerObject const& Assembly::assembleLegacy() const
12811281
uint8_t tagPush = static_cast<uint8_t>(pushInstruction(bytesPerTag));
12821282
uint8_t dataRefPush = static_cast<uint8_t>(pushInstruction(bytesPerDataRef));
12831283

1284+
LinkerObject::CodeSectionLocation codeSectionLocation;
1285+
codeSectionLocation.start = 0;
1286+
size_t assemblyItemIndex = 0;
1287+
auto assembleInstruction = [&](auto&& _addInstruction) {
1288+
size_t start = ret.bytecode.size();
1289+
_addInstruction();
1290+
size_t end = ret.bytecode.size();
1291+
codeSectionLocation.instructionLocations.emplace_back(
1292+
LinkerObject::InstructionLocation{
1293+
.start = start,
1294+
.end = end,
1295+
.assemblyItemIndex = assemblyItemIndex
1296+
}
1297+
);
1298+
};
12841299
for (AssemblyItem const& item: items)
12851300
{
12861301
// store position of the invalid jump destination
@@ -1290,63 +1305,81 @@ LinkerObject const& Assembly::assembleLegacy() const
12901305
switch (item.type())
12911306
{
12921307
case Operation:
1293-
ret.bytecode += assembleOperation(item);
1308+
assembleInstruction([&](){
1309+
ret.bytecode += assembleOperation(item);
1310+
});
12941311
break;
12951312
case Push:
1296-
ret.bytecode += assemblePush(item);
1313+
assembleInstruction([&](){
1314+
ret.bytecode += assemblePush(item);
1315+
});
12971316
break;
12981317
case PushTag:
12991318
{
1300-
ret.bytecode.push_back(tagPush);
1301-
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1302-
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1319+
assembleInstruction([&](){
1320+
ret.bytecode.push_back(tagPush);
1321+
tagRefs[ret.bytecode.size()] = item.splitForeignPushTag();
1322+
ret.bytecode.resize(ret.bytecode.size() + bytesPerTag);
1323+
});
13031324
break;
13041325
}
13051326
case PushData:
1306-
ret.bytecode.push_back(dataRefPush);
1307-
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1308-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1327+
assembleInstruction([&]() {
1328+
ret.bytecode.push_back(dataRefPush);
1329+
dataRefs.insert(std::make_pair(h256(item.data()), ret.bytecode.size()));
1330+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1331+
});
13091332
break;
13101333
case PushSub:
1311-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1312-
ret.bytecode.push_back(dataRefPush);
1313-
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1314-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1334+
assembleInstruction([&]() {
1335+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1336+
ret.bytecode.push_back(dataRefPush);
1337+
subRefs.insert(std::make_pair(static_cast<size_t>(item.data()), ret.bytecode.size()));
1338+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1339+
});
13151340
break;
13161341
case PushSubSize:
13171342
{
1318-
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1319-
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1320-
item.setPushedValue(u256(s));
1321-
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1322-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1323-
ret.bytecode.resize(ret.bytecode.size() + b);
1324-
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1325-
toBigEndian(s, byr);
1343+
assembleInstruction([&](){
1344+
assertThrow(item.data() <= std::numeric_limits<size_t>::max(), AssemblyException, "");
1345+
auto s = subAssemblyById(static_cast<size_t>(item.data()))->assemble().bytecode.size();
1346+
item.setPushedValue(u256(s));
1347+
unsigned b = std::max<unsigned>(1, numberEncodingSize(s));
1348+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(b)));
1349+
ret.bytecode.resize(ret.bytecode.size() + b);
1350+
bytesRef byr(&ret.bytecode.back() + 1 - b, b);
1351+
toBigEndian(s, byr);
1352+
});
13261353
break;
13271354
}
13281355
case PushProgramSize:
13291356
{
1330-
ret.bytecode.push_back(dataRefPush);
1331-
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1332-
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1357+
assembleInstruction([&](){
1358+
ret.bytecode.push_back(dataRefPush);
1359+
sizeRefs.push_back(static_cast<unsigned>(ret.bytecode.size()));
1360+
ret.bytecode.resize(ret.bytecode.size() + bytesPerDataRef);
1361+
});
13331362
break;
13341363
}
13351364
case PushLibraryAddress:
13361365
{
1337-
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1338-
ret.bytecode += bytecode;
1339-
ret.linkReferences.insert(linkRef);
1366+
assembleInstruction([&]() {
1367+
auto const [bytecode, linkRef] = assemblePushLibraryAddress(item, ret.bytecode.size());
1368+
ret.bytecode += bytecode;
1369+
ret.linkReferences.insert(linkRef);
1370+
});
13401371
break;
13411372
}
13421373
case PushImmutable:
1343-
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1344-
// Maps keccak back to the "identifier" std::string of that immutable.
1345-
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1346-
// Record the bytecode offset of the PUSH32 argument.
1347-
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1348-
// Advance bytecode by 32 bytes (default initialized).
1349-
ret.bytecode.resize(ret.bytecode.size() + 32);
1374+
assembleInstruction([&]() {
1375+
ret.bytecode.push_back(static_cast<uint8_t>(Instruction::PUSH32));
1376+
// Maps keccak back to the "identifier" std::string of that immutable.
1377+
ret.immutableReferences[item.data()].first = m_immutables.at(item.data());
1378+
// Record the bytecode offset of the PUSH32 argument.
1379+
ret.immutableReferences[item.data()].second.emplace_back(ret.bytecode.size());
1380+
// Advance bytecode by 32 bytes (default initialized).
1381+
ret.bytecode.resize(ret.bytecode.size() + 32);
1382+
});
13501383
break;
13511384
case VerbatimBytecode:
13521385
ret.bytecode += assembleVerbatimBytecode(item);
@@ -1359,35 +1392,59 @@ LinkerObject const& Assembly::assembleLegacy() const
13591392
{
13601393
if (i != offsets.size() - 1)
13611394
{
1362-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1363-
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1395+
assembleInstruction([&]() {
1396+
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1397+
});
1398+
assembleInstruction([&]() {
1399+
ret.bytecode.push_back(uint8_t(Instruction::DUP2));
1400+
});
13641401
}
1365-
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1366-
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1367-
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1368-
ret.bytecode += offsetBytes;
1369-
ret.bytecode.push_back(uint8_t(Instruction::ADD));
1370-
ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
1402+
assembleInstruction([&]() {
1403+
// TODO: should we make use of the constant optimizer methods for pushing the offsets?
1404+
bytes offsetBytes = toCompactBigEndian(u256(offsets[i]));
1405+
ret.bytecode.push_back(static_cast<uint8_t>(pushInstruction(static_cast<unsigned>(offsetBytes.size()))));
1406+
ret.bytecode += offsetBytes;
1407+
});
1408+
assembleInstruction([&]() {
1409+
ret.bytecode.push_back(uint8_t(Instruction::ADD));
1410+
});
1411+
assembleInstruction([&]() {
1412+
ret.bytecode.push_back(uint8_t(Instruction::MSTORE));
1413+
});
13711414
}
13721415
if (offsets.empty())
13731416
{
1374-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1375-
ret.bytecode.push_back(uint8_t(Instruction::POP));
1417+
assembleInstruction([&]() {
1418+
ret.bytecode.push_back(uint8_t(Instruction::POP));
1419+
});
1420+
assembleInstruction([&]() {
1421+
ret.bytecode.push_back(uint8_t(Instruction::POP));
1422+
});
13761423
}
13771424
immutableReferencesBySub.erase(item.data());
13781425
break;
13791426
}
13801427
case PushDeployTimeAddress:
1381-
ret.bytecode += assemblePushDeployTimeAddress();
1428+
assembleInstruction([&]() {
1429+
ret.bytecode += assemblePushDeployTimeAddress();
1430+
});
13821431
break;
13831432
case Tag:
1384-
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1433+
assembleInstruction([&](){
1434+
ret.bytecode += assembleTag(item, ret.bytecode.size(), true);
1435+
});
13851436
break;
13861437
default:
13871438
solAssert(false, "Unexpected opcode while assembling.");
13881439
}
1440+
1441+
++assemblyItemIndex;
13891442
}
13901443

1444+
codeSectionLocation.end = ret.bytecode.size();
1445+
1446+
ret.codeSectionLocations.emplace_back(std::move(codeSectionLocation));
1447+
13911448
if (!immutableReferencesBySub.empty())
13921449
throw
13931450
langutil::Error(

libevmasm/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ set(sources
44
Assembly.h
55
AssemblyItem.cpp
66
AssemblyItem.h
7+
Ethdebug.cpp
8+
Ethdebug.h
79
EVMAssemblyStack.cpp
810
EVMAssemblyStack.h
911
BlockDeduplicator.cpp

libevmasm/Ethdebug.cpp

+115
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
/*
2+
This file is part of solidity.
3+
4+
solidity is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
solidity is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with solidity. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
// SPDX-License-Identifier: GPL-3.0
18+
19+
#include <libevmasm/Ethdebug.h>
20+
21+
using namespace solidity;
22+
using namespace solidity::evmasm;
23+
using namespace solidity::evmasm::ethdebug;
24+
25+
namespace
26+
{
27+
28+
Json programInstructions(Assembly const* _assembly, LinkerObject const& _linkerObject, unsigned _sourceId)
29+
{
30+
// e.g. interfaces don't have a valid assembly object.
31+
if (_assembly)
32+
{
33+
solUnimplementedAssert(_assembly->eofVersion() == std::nullopt, "ethdebug does not yet support EOF.");
34+
solUnimplementedAssert(_assembly->codeSections().size() == 1, "ethdebug does not yet support multiple code-sections.");
35+
for (auto const& instruction: _assembly->codeSections()[0].items)
36+
solUnimplementedAssert(instruction.type() != VerbatimBytecode, "Verbatim bytecode is currently not supported by ethdebug.");
37+
}
38+
39+
solAssert(_linkerObject.codeSectionLocations.size() == 1);
40+
solAssert(_linkerObject.codeSectionLocations[0].end <= _linkerObject.bytecode.size());
41+
Json instructions = Json::array();
42+
for (size_t i = 0; i < _linkerObject.codeSectionLocations[0].instructionLocations.size(); ++i)
43+
{
44+
solAssert(_assembly);
45+
LinkerObject::InstructionLocation currentInstruction = _linkerObject.codeSectionLocations[0].instructionLocations[i];
46+
size_t start = currentInstruction.start;
47+
size_t end = currentInstruction.end;
48+
size_t assemblyItemIndex = currentInstruction.assemblyItemIndex;
49+
solAssert(end <= _linkerObject.bytecode.size());
50+
solAssert(start < end);
51+
solAssert(assemblyItemIndex < _assembly->codeSections().at(0).items.size());
52+
Json operation = Json::object();
53+
operation["mnemonic"] = instructionInfo(static_cast<Instruction>(_linkerObject.bytecode[start]), _assembly->evmVersion()).name;
54+
static size_t constexpr instructionSize = 1;
55+
if (start + instructionSize < end)
56+
{
57+
bytes const argumentData(
58+
_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(start) + instructionSize,
59+
_linkerObject.bytecode.begin() + static_cast<std::ptrdiff_t>(end)
60+
);
61+
solAssert(!argumentData.empty());
62+
operation["arguments"] = Json::array({util::toHex(argumentData, util::HexPrefix::Add)});
63+
}
64+
langutil::SourceLocation const& location = _assembly->codeSections().at(0).items.at(assemblyItemIndex).location();
65+
Json instruction = Json::object();
66+
instruction["offset"] = start;
67+
instruction["operation"] = operation;
68+
69+
instruction["context"] = Json::object();
70+
instruction["context"]["code"] = Json::object();
71+
instruction["context"]["code"]["source"] = Json::object();
72+
instruction["context"]["code"]["source"]["id"] = static_cast<int>(_sourceId);
73+
74+
instruction["context"]["code"]["range"] = Json::object();
75+
instruction["context"]["code"]["range"]["offset"] = location.start;
76+
instruction["context"]["code"]["range"]["length"] = location.end - location.start;
77+
instructions.emplace_back(instruction);
78+
}
79+
80+
return instructions;
81+
}
82+
83+
} // anonymous namespace
84+
85+
Json ethdebug::program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject)
86+
{
87+
Json result = Json::object();
88+
result["contract"] = Json::object();
89+
result["contract"]["name"] = _name;
90+
result["contract"]["definition"] = Json::object();
91+
result["contract"]["definition"]["source"] = Json::object();
92+
result["contract"]["definition"]["source"]["id"] = _sourceId;
93+
result["environment"] = (!_assembly || _assembly->isCreation()) ? "create" : "call";
94+
result["instructions"] = programInstructions(_assembly, _linkerObject, _sourceId);
95+
return result;
96+
}
97+
98+
Json ethdebug::resources(std::vector<std::string> const& _sources, std::string const& _version)
99+
{
100+
Json sources = Json::array();
101+
for (size_t id = 0; id < _sources.size(); ++id)
102+
{
103+
Json source = Json::object();
104+
source["id"] = id;
105+
source["path"] = _sources[id];
106+
sources.push_back(source);
107+
}
108+
Json result = Json::object();
109+
result["compilation"] = Json::object();
110+
result["compilation"]["compiler"] = Json::object();
111+
result["compilation"]["compiler"]["name"] = "solc";
112+
result["compilation"]["compiler"]["version"] = _version;
113+
result["compilation"]["sources"] = sources;
114+
return result;
115+
}

libevmasm/Ethdebug.h

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/*
2+
This file is part of solidity.
3+
4+
solidity is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
solidity is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with solidity. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
// SPDX-License-Identifier: GPL-3.0
18+
19+
#pragma once
20+
21+
#include <libsolutil/JSON.h>
22+
23+
#include <libevmasm/Assembly.h>
24+
#include <libevmasm/LinkerObject.h>
25+
26+
namespace solidity::evmasm::ethdebug
27+
{
28+
29+
// returns ethdebug/format/program.
30+
Json program(std::string_view _name, unsigned _sourceId, Assembly const* _assembly, LinkerObject const& _linkerObject);
31+
32+
// returns ethdebug/format/info/resources
33+
Json resources(std::vector<std::string> const& _sources, std::string const& _version);
34+
35+
} // namespace solidity::evmasm::ethdebug

0 commit comments

Comments
 (0)