Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving lexer into a 'Lex' namespace. #3170

Merged
merged 3 commits into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion language_server/language_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ void LanguageServer::OnDocumentSymbol(
llvm::MemoryBuffer::getMemBufferCopy(files_.at(file)));

auto buf = SourceBuffer::CreateFromFile(vfs, file);
auto lexed = TokenizedBuffer::Lex(*buf, NullDiagnosticConsumer());
auto lexed = Lex::TokenizedBuffer::Lex(*buf, NullDiagnosticConsumer());
auto parsed = Parse::Tree::Parse(lexed, NullDiagnosticConsumer(), nullptr);
std::vector<clang::clangd::DocumentSymbol> result;
for (const auto& node : parsed.postorder()) {
Expand Down
6 changes: 3 additions & 3 deletions toolchain/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,10 +406,10 @@ auto Driver::Compile(const CompileOptions& options) -> bool {
}
CARBON_VLOG() << "*** file:\n```\n" << source->text() << "\n```\n";

CARBON_VLOG() << "*** TokenizedBuffer::Lex ***\n";
auto tokenized_source = TokenizedBuffer::Lex(*source, *consumer);
CARBON_VLOG() << "*** Lex::TokenizedBuffer::Lex ***\n";
auto tokenized_source = Lex::TokenizedBuffer::Lex(*source, *consumer);
bool has_errors = tokenized_source.has_errors();
CARBON_VLOG() << "*** TokenizedBuffer::Lex done ***\n";
CARBON_VLOG() << "*** Lex::TokenizedBuffer::Lex done ***\n";
if (options.dump_tokens) {
CARBON_VLOG() << "Finishing output.";
consumer->Flush();
Expand Down
4 changes: 2 additions & 2 deletions toolchain/lexer/character_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include "llvm/ADT/StringExtras.h"

namespace Carbon {
namespace Carbon::Lex {

// TODO: These definitions need to be updated to match whatever Unicode lexical
// rules we pick. The function interfaces will need to change to accommodate
Expand Down Expand Up @@ -71,6 +71,6 @@ inline auto IsSpace(char c) -> bool {
return IsHorizontalWhitespace(c) || IsVerticalWhitespace(c);
}

} // namespace Carbon
} // namespace Carbon::Lex

#endif // CARBON_TOOLCHAIN_LEXER_CHARACTER_SET_H_
4 changes: 2 additions & 2 deletions toolchain/lexer/lex_helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "toolchain/lexer/lex_helpers.h"

namespace Carbon {
namespace Carbon::Lex {

auto CanLexInteger(DiagnosticEmitter<const char*>& emitter,
llvm::StringRef text) -> bool {
Expand All @@ -29,4 +29,4 @@ auto CanLexInteger(DiagnosticEmitter<const char*>& emitter,
return true;
}

} // namespace Carbon
} // namespace Carbon::Lex
4 changes: 2 additions & 2 deletions toolchain/lexer/lex_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@

#include "toolchain/diagnostics/diagnostic_emitter.h"

namespace Carbon {
namespace Carbon::Lex {

// Should guard calls to getAsInteger due to performance issues with large
// integers. Emits an error if the text cannot be lexed.
auto CanLexInteger(DiagnosticEmitter<const char*>& emitter,
llvm::StringRef text) -> bool;

} // namespace Carbon
} // namespace Carbon::Lex

#endif // CARBON_TOOLCHAIN_LEXER_LEX_HELPERS_H_
4 changes: 2 additions & 2 deletions toolchain/lexer/numeric_literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "toolchain/lexer/character_set.h"
#include "toolchain/lexer/lex_helpers.h"

namespace Carbon {
namespace Carbon::Lex {

// Adapts Radix for use with formatv.
// NOTE: clangd may see this as unused, but it will be invoked by diagnostics.
Expand Down Expand Up @@ -461,4 +461,4 @@ auto LexedNumericLiteral::ComputeValue(
.exponent = parser.GetExponent()};
}

} // namespace Carbon
} // namespace Carbon::Lex
4 changes: 2 additions & 2 deletions toolchain/lexer/numeric_literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "llvm/ADT/StringRef.h"
#include "toolchain/diagnostics/diagnostic_emitter.h"

namespace Carbon {
namespace Carbon::Lex {

// A numeric literal token that has been extracted from a source buffer.
class LexedNumericLiteral {
geoffromer marked this conversation as resolved.
Show resolved Hide resolved
Expand Down Expand Up @@ -71,6 +71,6 @@ class LexedNumericLiteral {
int exponent_;
};

} // namespace Carbon
} // namespace Carbon::Lex

#endif // CARBON_TOOLCHAIN_LEXER_NUMERIC_LITERAL_H_
2 changes: 2 additions & 0 deletions toolchain/lexer/numeric_literal_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
namespace Carbon::Testing {
namespace {

using Lex::LexedNumericLiteral;

static void BM_Lex_Float(benchmark::State& state) {
for (auto _ : state) {
CARBON_CHECK(LexedNumericLiteral::Lex("0.000001"));
Expand Down
2 changes: 1 addition & 1 deletion toolchain/lexer/numeric_literal_fuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace Carbon::Testing {
// NOLINTNEXTLINE: Match the documented fuzzer entry point declaration style.
extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
std::size_t size) {
auto token = LexedNumericLiteral::Lex(
auto token = Lex::LexedNumericLiteral::Lex(
llvm::StringRef(reinterpret_cast<const char*>(data), size));
if (!token) {
// Lexically not a numeric literal.
Expand Down
1 change: 1 addition & 0 deletions toolchain/lexer/numeric_literal_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
namespace Carbon::Testing {
namespace {

using Lex::LexedNumericLiteral;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we put the lexer tests in a Carbon::Lex::Testing namespace (or just Carbon::Lex; I'm not clear on what the Testing namespace adds), and avoid these using declarations? It can sometimes make sense to put the tests in a different namespace than the code under test, if the client is expected to be in a different namespace, but that doesn't seem to be the case here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'd discussed this and decided on the approach as part of CommandLine discussion, and the resolution was to make tests look a bit like calling code. Here's some context:

https://discord.com/channels/655572317891461132/655578254970716160/1141155947901882500

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But that's my point: this makes the tests look unlike the calling code, because LexedNumericLiteral is only ever used inside the Lex namespace, except in the tests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As came up in that thread -- using Carbon::Lex::Testing creates a bunch of problems because then any common Carbon testing utilities won't actually be in the namespace at all.

The Testing isn't to isolate the test (the anonymous namespace does that), it is to access testing utilities, and those have to live in a single namespace.

Given the tradeoffs, I'm not sure there is a significantly better namespace to use here, and the using declarations seem reasonably minimal?

Copy link
Contributor Author

@jonmeow jonmeow Aug 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another trade-offs I might add on top of chandlerc's comments is that we would definitely have Carbon::Testing regardless -- for testing utilities, as noted -- and so one of the stated leanings was to have only that.

@geoffromer I'm blocking on your comment, but noting the trade-offs and discussion on #toolchain (and existing code in the style), would you consider it okay to merge? (Changes to other tests are beyond the scope of this PR, but could be cleaned up pursuant to further discussion of the style)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I still don't really understand the tradeoffs being made here, but yeah, I'll take this to Discord.

using ::testing::_;
using ::testing::Field;
using ::testing::Matcher;
Expand Down
4 changes: 2 additions & 2 deletions toolchain/lexer/string_literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "toolchain/lexer/character_set.h"
#include "toolchain/lexer/lex_helpers.h"

namespace Carbon {
namespace Carbon::Lex {

using LexerDiagnosticEmitter = DiagnosticEmitter<const char*>;

Expand Down Expand Up @@ -455,4 +455,4 @@ auto LexedStringLiteral::ComputeValue(LexerDiagnosticEmitter& emitter) const
indent);
}

} // namespace Carbon
} // namespace Carbon::Lex
4 changes: 2 additions & 2 deletions toolchain/lexer/string_literal.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "llvm/ADT/StringRef.h"
#include "toolchain/diagnostics/diagnostic_emitter.h"

namespace Carbon {
namespace Carbon::Lex {

class LexedStringLiteral {
public:
Expand Down Expand Up @@ -70,6 +70,6 @@ class LexedStringLiteral {
bool is_terminated_;
};

} // namespace Carbon
} // namespace Carbon::Lex

#endif // CARBON_TOOLCHAIN_LEXER_STRING_LITERAL_H_
2 changes: 2 additions & 0 deletions toolchain/lexer/string_literal_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
namespace Carbon::Testing {
namespace {

using Lex::LexedStringLiteral;

static void BM_ValidString(benchmark::State& state, std::string_view introducer,
std::string_view terminator) {
std::string x(introducer);
Expand Down
2 changes: 1 addition & 1 deletion toolchain/lexer/string_literal_fuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace Carbon::Testing {
// NOLINTNEXTLINE: Match the documented fuzzer entry point declaration style.
extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
std::size_t size) {
auto token = LexedStringLiteral::Lex(
auto token = Lex::LexedStringLiteral::Lex(
llvm::StringRef(reinterpret_cast<const char*>(data), size));
if (!token) {
// Lexically not a string literal.
Expand Down
2 changes: 2 additions & 0 deletions toolchain/lexer/string_literal_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
namespace Carbon::Testing {
namespace {

using Lex::LexedStringLiteral;

class StringLiteralTest : public ::testing::Test {
protected:
StringLiteralTest() : error_tracker(ConsoleDiagnosticConsumer()) {}
Expand Down
4 changes: 2 additions & 2 deletions toolchain/lexer/token_kind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "toolchain/lexer/token_kind.h"

namespace Carbon {
namespace Carbon::Lex {

CARBON_DEFINE_ENUM_CLASS_NAMES(TokenKind) = {
#define CARBON_TOKEN(TokenName) CARBON_ENUM_CLASS_NAME_STRING(TokenName)
Expand Down Expand Up @@ -79,4 +79,4 @@ constexpr int8_t TokenKind::ExpectedParseTreeSize[] = {
#include "toolchain/lexer/token_kind.def"
};

} // namespace Carbon
} // namespace Carbon::Lex
8 changes: 4 additions & 4 deletions toolchain/lexer/token_kind.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadicDetails.h"

namespace Carbon {
namespace Carbon::Lex {

CARBON_DEFINE_RAW_ENUM_CLASS(TokenKind, uint8_t) {
#define CARBON_TOKEN(TokenName) CARBON_RAW_ENUM_ENUMERATOR(TokenName)
Expand Down Expand Up @@ -138,15 +138,15 @@ constexpr TokenKind TokenKind::KeywordTokensStorage[] = {
constexpr llvm::ArrayRef<TokenKind> TokenKind::KeywordTokens =
KeywordTokensStorage;

} // namespace Carbon
} // namespace Carbon::Lex

namespace llvm {

// We use formatv primarily for diagnostics. In these cases, it's expected that
// the spelling in source code should be used.
template <>
struct format_provider<Carbon::TokenKind> {
static void format(const Carbon::TokenKind& kind, raw_ostream& out,
struct format_provider<Carbon::Lex::TokenKind> {
static void format(const Carbon::Lex::TokenKind& kind, raw_ostream& out,
StringRef /*style*/) {
auto spelling = kind.fixed_spelling();
if (!spelling.empty()) {
Expand Down
1 change: 1 addition & 0 deletions toolchain/lexer/token_kind_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
namespace Carbon::Testing {
namespace {

using Lex::TokenKind;
using ::testing::MatchesRegex;

// We restrict symbols to punctuation characters that are expected to be widely
Expand Down
4 changes: 2 additions & 2 deletions toolchain/lexer/tokenized_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include <x86intrin.h>
#endif

namespace Carbon {
namespace Carbon::Lex {

// TODO: Move Overload and VariantMatch somewhere more central.

Expand Down Expand Up @@ -1213,4 +1213,4 @@ auto TokenizedBuffer::TokenLocationTranslator::GetLocation(Token token)
return SourceBufferLocationTranslator(buffer_).GetLocation(token_start);
}

} // namespace Carbon
} // namespace Carbon::Lex
6 changes: 2 additions & 4 deletions toolchain/lexer/tokenized_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
#include "toolchain/lexer/token_kind.h"
#include "toolchain/source/source_buffer.h"

namespace Carbon {

class TokenizedBuffer;
namespace Carbon::Lex {

// A buffer of tokenized Carbon source code.
//
Expand Down Expand Up @@ -422,6 +420,6 @@ using LexerDiagnosticEmitter = DiagnosticEmitter<const char*>;
// A diagnostic emitter that uses tokens as its source of location information.
using TokenDiagnosticEmitter = DiagnosticEmitter<TokenizedBuffer::Token>;

} // namespace Carbon
} // namespace Carbon::Lex

#endif // CARBON_TOOLCHAIN_LEXER_TOKENIZED_BUFFER_H_
3 changes: 3 additions & 0 deletions toolchain/lexer/tokenized_buffer_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
namespace Carbon::Testing {
namespace {

using Lex::TokenizedBuffer;
using Lex::TokenKind;

// A large value for measurement stability without making benchmarking too slow.
// Needs to be a multiple of 100 so we can easily divide it up into percentages,
// and 1% itself needs to not be too tiny. This makes 100,000 a great balance.
Expand Down
4 changes: 2 additions & 2 deletions toolchain/lexer/tokenized_buffer_fuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
/*RequiresNullTerminator=*/false)));
auto source = SourceBuffer::CreateFromFile(fs, TestFileName);

auto buffer = TokenizedBuffer::Lex(*source, NullDiagnosticConsumer());
auto buffer = Lex::TokenizedBuffer::Lex(*source, NullDiagnosticConsumer());
if (buffer.has_errors()) {
return 0;
}
Expand All @@ -41,7 +41,7 @@ extern "C" int LLVMFuzzerTestOneInput(const unsigned char* data,
//
// TODO: We should enhance this to do more sanity checks on the resulting
// token stream.
for (TokenizedBuffer::Token token : buffer.tokens()) {
for (Lex::TokenizedBuffer::Token token : buffer.tokens()) {
int line_number = buffer.GetLineNumber(token);
CARBON_CHECK(line_number > 0) << "Invalid line number!";
CARBON_CHECK(line_number < INT_MAX) << "Invalid line number!";
Expand Down
2 changes: 2 additions & 0 deletions toolchain/lexer/tokenized_buffer_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
namespace Carbon::Testing {
namespace {

using Lex::TokenizedBuffer;
using Lex::TokenKind;
using ::testing::_;
using ::testing::ElementsAre;
using ::testing::Eq;
Expand Down
11 changes: 6 additions & 5 deletions toolchain/lexer/tokenized_buffer_test_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ struct ExpectedToken {
return output;
}

TokenKind kind;
Lex::TokenKind kind;
int line = -1;
int column = -1;
int indent_column = -1;
Expand All @@ -54,7 +54,7 @@ struct ExpectedToken {
// mismatches first.
// NOLINTNEXTLINE: Expands from GoogleTest.
MATCHER_P(HasTokens, raw_all_expected, "") {
const TokenizedBuffer& buffer = arg;
const Lex::TokenizedBuffer& buffer = arg;
llvm::ArrayRef<ExpectedToken> all_expected = raw_all_expected;

bool matches = true;
Expand All @@ -68,7 +68,7 @@ MATCHER_P(HasTokens, raw_all_expected, "") {
int index = buffer_it - buffer.tokens().begin();
auto token = *buffer_it++;

TokenKind actual_kind = buffer.GetKind(token);
Lex::TokenKind actual_kind = buffer.GetKind(token);
if (actual_kind != expected.kind) {
*result_listener << "\nToken " << index << " is a " << actual_kind
<< ", expected a " << expected.kind << ".";
Expand Down Expand Up @@ -119,8 +119,9 @@ MATCHER_P(HasTokens, raw_all_expected, "") {
}

CARBON_CHECK(!expected.string_contents ||
expected.kind == TokenKind::StringLiteral);
if (expected.string_contents && actual_kind == TokenKind::StringLiteral) {
expected.kind == Lex::TokenKind::StringLiteral);
if (expected.string_contents &&
actual_kind == Lex::TokenKind::StringLiteral) {
llvm::StringRef actual_contents = buffer.GetStringLiteral(token);
if (actual_contents != *expected.string_contents) {
*result_listener << "\nToken " << index << " has contents `"
Expand Down
12 changes: 6 additions & 6 deletions toolchain/parser/parse_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@

namespace Carbon::Parse {

auto Tree::Parse(TokenizedBuffer& tokens, DiagnosticConsumer& consumer,
auto Tree::Parse(Lex::TokenizedBuffer& tokens, DiagnosticConsumer& consumer,
llvm::raw_ostream* vlog_stream) -> Tree {
TokenizedBuffer::TokenLocationTranslator translator(&tokens);
TokenDiagnosticEmitter emitter(translator, consumer);
Lex::TokenizedBuffer::TokenLocationTranslator translator(&tokens);
Lex::TokenDiagnosticEmitter emitter(translator, consumer);

// Delegate to the parser.
Tree tree(tokens);
Expand All @@ -30,7 +30,7 @@ auto Tree::Parse(TokenizedBuffer& tokens, DiagnosticConsumer& consumer,

// The package should always be the first token, if it's present. Any other
// use is invalid.
if (context.PositionIs(TokenKind::Package)) {
if (context.PositionIs(Lex::TokenKind::Package)) {
context.PushState(State::Package);
}

Expand Down Expand Up @@ -95,7 +95,7 @@ auto Tree::node_kind(Node n) const -> NodeKind {
return node_impls_[n.index].kind;
}

auto Tree::node_token(Node n) const -> TokenizedBuffer::Token {
auto Tree::node_token(Node n) const -> Lex::TokenizedBuffer::Token {
CARBON_CHECK(n.is_valid());
return node_impls_[n.index].token;
}
Expand Down Expand Up @@ -281,7 +281,7 @@ auto Tree::Verify() const -> ErrorOr<Success> {
tokens_->expected_parse_tree_size()) {
return Error(
llvm::formatv("Tree has {0} nodes and no errors, but "
"TokenizedBuffer expected {1} nodes for {2} tokens.",
"Lex::TokenizedBuffer expected {1} nodes for {2} tokens.",
node_impls_.size(), tokens_->expected_parse_tree_size(),
tokens_->size()));
}
Expand Down
Loading