-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement some token-based formatting structure. (#4386)
Here I'm trying to add some simple formatting based on the token kind, aiming mainly to keep the implementation short for now. This approach won't generalize to arbitrary structures (e.g., it doesn't discern between braces for a function body and a struct literal). I think we'll probably want to build a parse tree and associate parse kinds with tokens in order to format, additionally doing something less linear. But my essential goal at present is to just get a proof-of-concept that the basics can yield something that looks okay.
- Loading branch information
Showing
8 changed files
with
260 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#include "toolchain/format/formatter.h" | ||
|
||
namespace Carbon::Format { | ||
|
||
auto Formatter::Run() -> bool { | ||
if (tokens_->has_errors()) { | ||
// TODO: Error recovery. | ||
return false; | ||
} | ||
|
||
auto comments = tokens_->comments(); | ||
auto comment_it = comments.begin(); | ||
|
||
// If there are no tokens or comments, format as empty. | ||
if (tokens_->size() == 0 && comment_it == comments.end()) { | ||
*out_ << "\n"; | ||
return true; | ||
} | ||
|
||
for (auto token : tokens_->tokens()) { | ||
auto token_kind = tokens_->GetKind(token); | ||
|
||
while (comment_it != comments.end() && | ||
tokens_->IsAfterComment(token, *comment_it)) { | ||
RequireEmptyLine(); | ||
PrepareForSpacedContent(); | ||
// TODO: We do need to adjust the indent of multi-line comments. | ||
*out_ << tokens_->GetCommentText(*comment_it); | ||
// Comment text includes a terminating newline, so just update the state. | ||
line_state_ = LineState::Empty; | ||
++comment_it; | ||
} | ||
|
||
switch (token_kind) { | ||
case Lex::TokenKind::FileStart: | ||
break; | ||
|
||
case Lex::TokenKind::FileEnd: | ||
RequireEmptyLine(); | ||
break; | ||
|
||
case Lex::TokenKind::OpenCurlyBrace: | ||
PrepareForSpacedContent(); | ||
*out_ << "{"; | ||
// Check for `{}`. | ||
if (NextToken(token) != tokens_->GetMatchedClosingToken(token)) { | ||
RequireEmptyLine(); | ||
} | ||
indent_ += 2; | ||
break; | ||
|
||
case Lex::TokenKind::CloseCurlyBrace: | ||
indent_ -= 2; | ||
PrepareForPackedContent(); | ||
*out_ << "}"; | ||
RequireEmptyLine(); | ||
break; | ||
|
||
case Lex::TokenKind::Semi: | ||
PrepareForPackedContent(); | ||
*out_ << ";"; | ||
RequireEmptyLine(); | ||
break; | ||
|
||
default: | ||
if (token_kind.IsOneOf( | ||
{Lex::TokenKind::CloseParen, Lex::TokenKind::Colon, | ||
Lex::TokenKind::ColonExclaim, Lex::TokenKind::Comma})) { | ||
PrepareForPackedContent(); | ||
} else { | ||
PrepareForSpacedContent(); | ||
} | ||
*out_ << tokens_->GetTokenText(token); | ||
line_state_ = token_kind.is_opening_symbol() | ||
? LineState::HasSeparator | ||
: LineState::NeedsSeparator; | ||
break; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
auto Formatter::PrepareForPackedContent() -> void { | ||
if (line_state_ == LineState::Empty) { | ||
out_->indent(indent_); | ||
line_state_ = LineState::HasSeparator; | ||
} | ||
} | ||
|
||
auto Formatter::RequireEmptyLine() -> void { | ||
if (line_state_ != LineState::Empty) { | ||
*out_ << "\n"; | ||
line_state_ = LineState::Empty; | ||
} | ||
} | ||
|
||
auto Formatter::PrepareForSpacedContent() -> void { | ||
if (line_state_ == LineState::NeedsSeparator) { | ||
*out_ << " "; | ||
line_state_ = LineState::HasSeparator; | ||
} else { | ||
PrepareForPackedContent(); | ||
} | ||
} | ||
|
||
} // namespace Carbon::Format |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
||
#ifndef CARBON_TOOLCHAIN_FORMAT_FORMATTER_H_ | ||
#define CARBON_TOOLCHAIN_FORMAT_FORMATTER_H_ | ||
|
||
#include <cstdint> | ||
|
||
#include "common/ostream.h" | ||
#include "toolchain/lex/tokenized_buffer.h" | ||
|
||
namespace Carbon::Format { | ||
|
||
// Implements Format(); see format.h. It's intended to be constructed and | ||
// `Run()` once, then destructed. | ||
// | ||
// TODO: This will probably need to work less linearly in the future, for | ||
// example to handle smart wrapping of arguments. This is a simple | ||
// implementation that only handles simple code. Before adding too much more | ||
// complexity, it should be rewritten. | ||
// | ||
// TODO: Add retention of blank lines between original code. | ||
// | ||
// TODO: Add support for formatting line ranges (will need flags too). | ||
class Formatter { | ||
public: | ||
explicit Formatter(const Lex::TokenizedBuffer* tokens, llvm::raw_ostream* out) | ||
: tokens_(tokens), out_(out) {} | ||
|
||
// See class comments. | ||
auto Run() -> bool; | ||
|
||
private: | ||
// Tracks the status of the current line of output. | ||
enum class LineState : uint8_t { | ||
// There is no output for the current line. | ||
Empty, | ||
// The current line has content (possibly just an indent), and does not need | ||
// a separator added. | ||
HasSeparator, | ||
// The current line has content, and will need a separator, typically a | ||
// single space or newline. | ||
NeedsSeparator, | ||
}; | ||
|
||
// Ensure output is on an empty line, setting line_state_ to Empty. May output | ||
// a newline, dependent on line state. Does not indent, allowing blank lines. | ||
auto RequireEmptyLine() -> void; | ||
|
||
// Ensures there is a separator before adding new content. May do | ||
// `PrepareForPackedContent` or output a separator space, dependent on line | ||
// state. Always results in line_state_ being HasSeparator; the caller is | ||
// responsible for adjusting state if needed. | ||
auto PrepareForSpacedContent() -> void; | ||
|
||
// Requires that the current line is indented, but not necessarily a separator | ||
// space. May output spaces for `indent_`, dependent on line state. Only | ||
// guarantees the line_state_ is not Empty; the caller is responsible for | ||
// adjusting state if needed. | ||
auto PrepareForPackedContent() -> void; | ||
|
||
// Returns the next token index. | ||
static auto NextToken(Lex::TokenIndex token) -> Lex::TokenIndex { | ||
return *(Lex::TokenIterator(token) + 1); | ||
} | ||
|
||
// The tokens being formatted. | ||
const Lex::TokenizedBuffer* tokens_; | ||
|
||
// The output stream for formatted content. | ||
llvm::raw_ostream* out_; | ||
|
||
// The state of the line currently written to output. | ||
LineState line_state_ = LineState::Empty; | ||
|
||
// The current code indent level, to be added to new lines. | ||
int indent_ = 0; | ||
}; | ||
|
||
} // namespace Carbon::Format | ||
|
||
#endif // CARBON_TOOLCHAIN_FORMAT_FORMATTER_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM | ||
// Exceptions. See /LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// AUTOUPDATE | ||
// TIP: To test this file alone, run: | ||
// TIP: bazel test //toolchain/testing:file_test --test_arg=--file_tests=toolchain/format/testdata/basics/braces.carbon | ||
// TIP: To dump output, run: | ||
// TIP: bazel run //toolchain/testing:file_test -- --dump_output --file_tests=toolchain/format/testdata/basics/braces.carbon | ||
|
||
// --- test.carbon | ||
|
||
fn F() { | ||
|
||
} | ||
|
||
fn G() -> i32 { | ||
|
||
return 3; | ||
|
||
} | ||
|
||
|
||
fn H(x: i32, y: i32) -> i32 { | ||
|
||
var z: i32 = x + y; | ||
return z; | ||
|
||
} | ||
|
||
class C { | ||
class D { | ||
class E {} | ||
} | ||
} | ||
|
||
// --- AUTOUPDATE-SPLIT | ||
|
||
// CHECK:STDOUT: fn F () {} | ||
// CHECK:STDOUT: fn G () -> i32 { | ||
// CHECK:STDOUT: return 3; | ||
// CHECK:STDOUT: } | ||
// CHECK:STDOUT: fn H (x: i32, y: i32) -> i32 { | ||
// CHECK:STDOUT: var z: i32 = x + y; | ||
// CHECK:STDOUT: return z; | ||
// CHECK:STDOUT: } | ||
// CHECK:STDOUT: class C { | ||
// CHECK:STDOUT: class D { | ||
// CHECK:STDOUT: class E {} | ||
// CHECK:STDOUT: } | ||
// CHECK:STDOUT: } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,5 +10,3 @@ | |
|
||
// --- test.carbon | ||
// --- AUTOUPDATE-SPLIT | ||
|
||
// CHECK:STDOUT: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters