Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding some shape to toolchain semantic analysis #1092

Merged
merged 24 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# - performance-unnecessary-value-param is disabled because it duplicate
# modernize-pass-by-value.
Checks:
-*, bugprone-*, -bugprone-easily-swappable-parameters,
-*, bugprone-*, -bugprone-branch-clone, -bugprone-easily-swappable-parameters,
-bugprone-narrowing-conversions, google-*, -google-readability-todo,
misc-definitions-in-headers, misc-misplaced-const, misc-redundant-expression,
misc-static-assert, misc-unconventional-assign-operator,
Expand Down
6 changes: 6 additions & 0 deletions toolchain/parser/parse_tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ auto ParseTree::postorder() const -> llvm::iterator_range<PostorderIterator> {

auto ParseTree::postorder(Node n) const
-> llvm::iterator_range<PostorderIterator> {
CHECK(n.is_valid());
// The postorder ends after this node, the root, and begins at the start of
// its subtree.
int end_index = n.index_ + 1;
Expand All @@ -47,6 +48,7 @@ auto ParseTree::postorder(Node n) const

auto ParseTree::children(Node n) const
-> llvm::iterator_range<SiblingIterator> {
CHECK(n.is_valid());
int end_index = n.index_ - node_impls_[n.index_].subtree_size;
return {SiblingIterator(*this, Node(n.index_ - 1)),
SiblingIterator(*this, Node(end_index))};
Expand All @@ -59,18 +61,22 @@ auto ParseTree::roots() const -> llvm::iterator_range<SiblingIterator> {
}

auto ParseTree::node_has_error(Node n) const -> bool {
CHECK(n.is_valid());
return node_impls_[n.index_].has_error;
}

auto ParseTree::node_kind(Node n) const -> ParseNodeKind {
CHECK(n.is_valid());
return node_impls_[n.index_].kind;
}

auto ParseTree::node_token(Node n) const -> TokenizedBuffer::Token {
CHECK(n.is_valid());
return node_impls_[n.index_].token;
}

auto ParseTree::GetNodeText(Node n) const -> llvm::StringRef {
CHECK(n.is_valid());
return tokens_->GetTokenText(node_impls_[n.index_].token);
}

Expand Down
9 changes: 8 additions & 1 deletion toolchain/parser/parse_tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ class ParseTree {
class Parser;
friend Parser;

// Allow the location translator access for tokens_.
friend class ParseTreeNodeLocationTranslator;

// The in-memory representation of data used for a particular node in the
// tree.
struct NodeImpl {
Expand Down Expand Up @@ -259,6 +262,10 @@ class ParseTree::Node {
// Prints the node index.
auto Print(llvm::raw_ostream& output) const -> void;

// Returns true if the node is valid; in other words, it was not default
// initialized.
auto is_valid() -> bool { return index_ != -1; }

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth separating the validity changes into a separate PR? Not a big deal, looks trivial either way, but would be good to add a unittest to parse_tree that covers this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Split to #1139

private:
friend ParseTree;
friend Parser;
Expand All @@ -270,7 +277,7 @@ class ParseTree::Node {
explicit Node(int index) : index_(index) {}

// The index of this node's implementation in the postorder sequence.
int32_t index_;
int32_t index_ = -1;
};

// A random-access iterator to the depth-first postorder sequence of parse nodes
Expand Down
40 changes: 33 additions & 7 deletions toolchain/semantics/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,45 @@
package(default_visibility = ["//visibility:public"])

cc_library(
name = "semantics",
srcs = ["semantics.cpp"],
hdrs = ["semantics.h"],
name = "function",
hdrs = ["function.h"],
deps = ["//toolchain/parser:parse_tree"],
)

cc_library(
name = "semantics_ir",
srcs = ["semantics_ir.cpp"],
hdrs = ["semantics_ir.h"],
deps = [
":function",
"//common:check",
"//toolchain/lexer:tokenized_buffer",
"//toolchain/parser:parse_tree",
"@llvm-project//llvm:Support",
],
)

cc_library(
name = "semantics_ir_factory",
srcs = ["semantics_ir_factory.cpp"],
hdrs = ["semantics_ir_factory.h"],
deps = [
":semantics_ir",
"//common:check",
"//toolchain/lexer:tokenized_buffer",
"//toolchain/parser:parse_node_kind",
"//toolchain/parser:parse_tree",
"@llvm-project//llvm:Support",
],
)

cc_test(
name = "semantics_test",
name = "semantics_ir_factory_test",
size = "small",
srcs = ["semantics_test.cpp"],
srcs = ["semantics_ir_factory_test.cpp"],
deps = [
":semantics",
"//toolchain/diagnostics:diagnostic_emitter",
":semantics_ir_factory",
"//toolchain/diagnostics:mocks",
"//toolchain/lexer:tokenized_buffer",
"//toolchain/parser:parse_tree",
"//toolchain/source:source_buffer",
Expand Down
31 changes: 31 additions & 0 deletions toolchain/semantics/function.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef TOOLCHAIN_SEMANTICS_FUNCTION_H_
#define TOOLCHAIN_SEMANTICS_FUNCTION_H_

#include "toolchain/parser/parse_tree.h"

namespace Carbon::Semantics {

// Semantic information for a function.
class Function {
public:
Function(ParseTree::Node decl_node, ParseTree::Node name_node)
: decl_node_(decl_node), name_node_(name_node) {}

auto decl_node() const -> ParseTree::Node { return decl_node_; }
auto name_node() const -> ParseTree::Node { return name_node_; }

private:
// The FunctionDeclaration node.
ParseTree::Node decl_node_;

// The function's DeclaredName node.
ParseTree::Node name_node_;
};

} // namespace Carbon::Semantics

#endif // TOOLCHAIN_SEMANTICS_FUNCTION_H_
15 changes: 0 additions & 15 deletions toolchain/semantics/semantics.cpp

This file was deleted.

25 changes: 0 additions & 25 deletions toolchain/semantics/semantics.h

This file was deleted.

28 changes: 28 additions & 0 deletions toolchain/semantics/semantics_ir.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "toolchain/semantics/semantics_ir.h"

#include "common/check.h"
#include "llvm/Support/FormatVariadic.h"
#include "toolchain/lexer/tokenized_buffer.h"

namespace Carbon {

void SemanticsIR::Block::Add(llvm::StringRef name, Token named_entity) {
ordering_.push_back(named_entity);
name_lookup_.insert({name, named_entity});
}

auto SemanticsIR::AddFunction(Block& block, ParseTree::Node decl_node,
ParseTree::Node name_node)
-> Semantics::Function& {
int32_t index = functions_.size();
functions_.push_back(Semantics::Function(decl_node, name_node));
block.Add(parse_tree_->GetNodeText(name_node),
Token(Token::Kind::Function, index));
return functions_[index];
}

} // namespace Carbon
78 changes: 78 additions & 0 deletions toolchain/semantics/semantics_ir.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef TOOLCHAIN_SEMANTICS_SEMANTICS_IR_H_
#define TOOLCHAIN_SEMANTICS_SEMANTICS_IR_H_

#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "toolchain/parser/parse_tree.h"
#include "toolchain/semantics/function.h"

namespace Carbon {

// Provides semantic analysis on a ParseTree.
class SemanticsIR {
public:
chandlerc marked this conversation as resolved.
Show resolved Hide resolved
// Provides a link back to an entity in a name scope.
class Token {
public:
Token() : Token(Kind::Invalid, -1) {}
chandlerc marked this conversation as resolved.
Show resolved Hide resolved

private:
friend class SemanticsIR;

// The kind of token. These correspond to the lists on SemanticsIR which
// will be indexed into.
enum class Kind {
Invalid,
Function,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whether here or in a future step, I might call this a FunctionDeclaration or if you want a verb formulation DeclareFunction. Again, not at all blocking, just a thought for going forward.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll figure out separately if this needs to be separated out -- I was still considering how to determine the body (tempting is having them be the same, with a separation only on the body, particularly if that's how they work syntactically)

};

Token(Kind kind, int32_t index) : kind_(kind), index_(index) {
// TODO: kind_ and index_ are currently unused, this suppresses the
// warning.
kind_ = kind;
index_ = index;
}

Kind kind_;

// The index of the named entity within its list.
int32_t index_;
};

struct Block {
public:
void Add(llvm::StringRef name, Token named_entity);

private:
llvm::SmallVector<Token> ordering_;
llvm::StringMap<Token> name_lookup_;
};
chandlerc marked this conversation as resolved.
Show resolved Hide resolved

private:
friend class SemanticsIRFactory;

explicit SemanticsIR(const ParseTree& parse_tree)
: parse_tree_(&parse_tree) {}

// Creates a function, adds it to the enclosing scope, and returns a reference
// for further mutations. On a name collision, it will not be added to the
// scope, but will still be returned.
auto AddFunction(Block& block, ParseTree::Node decl_node,
ParseTree::Node name_node) -> Semantics::Function&;

// Indexed by Token::Function.
llvm::SmallVector<Semantics::Function, 0> functions_;

// The file-level block.
Block root_block_;

const ParseTree* parse_tree_;
};

} // namespace Carbon

#endif // TOOLCHAIN_SEMANTICS_SEMANTICS_IR_H_
58 changes: 58 additions & 0 deletions toolchain/semantics/semantics_ir_factory.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "toolchain/semantics/semantics_ir_factory.h"

#include "common/check.h"
#include "llvm/Support/FormatVariadic.h"
#include "toolchain/lexer/tokenized_buffer.h"
#include "toolchain/parser/parse_node_kind.h"

namespace Carbon {

auto SemanticsIRFactory::Build(const ParseTree& parse_tree) -> SemanticsIR {
SemanticsIRFactory builder(parse_tree);
builder.ProcessRoots();
return builder.semantics_;
}

void SemanticsIRFactory::ProcessRoots() {
for (ParseTree::Node node : semantics_.parse_tree_->roots()) {
switch (semantics_.parse_tree_->node_kind(node)) {
case ParseNodeKind::FunctionDeclaration():
ProcessFunctionNode(semantics_.root_block_, node);
break;
case ParseNodeKind::FileEnd():
// No action needed.
break;
default:
FATAL() << "Unhandled node kind: "
<< semantics_.parse_tree_->node_kind(node).name();
}
}
}

void SemanticsIRFactory::ProcessFunctionNode(SemanticsIR::Block& block,
ParseTree::Node decl_node) {
llvm::Optional<Semantics::Function> fn;
for (ParseTree::Node node : semantics_.parse_tree_->children(decl_node)) {
switch (semantics_.parse_tree_->node_kind(node)) {
case ParseNodeKind::DeclaredName():
fn = semantics_.AddFunction(block, decl_node, node);
break;
case ParseNodeKind::ParameterList():
// TODO: Maybe something like Semantics::AddVariable passed to
// Function::AddParameter.
break;
case ParseNodeKind::CodeBlock():
// TODO: Should accumulate the definition into the code block.
break;
default:
FATAL() << "Unhandled node kind: "
<< semantics_.parse_tree_->node_kind(node).name();
}
}
}

} // namespace Carbon
Loading