Skip to content

Commit

Permalink
Change spans to use byte offsets instead of char offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
brson committed Nov 16, 2012
1 parent 8cba337 commit 81d2015
Show file tree
Hide file tree
Showing 12 changed files with 161 additions and 89 deletions.
2 changes: 1 addition & 1 deletion src/librustc/middle/trans/debuginfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ type compile_unit_md = {name: ~str};
type subprogram_md = {id: ast::node_id};
type local_var_md = {id: ast::node_id};
type tydesc_md = {hash: uint};
type block_md = {start: codemap::Loc<CharPos>, end: codemap::Loc<CharPos>};
type block_md = {start: codemap::Loc, end: codemap::Loc};
type argument_md = {id: ast::node_id};
type retval_md = {id: ast::node_id};

Expand Down
8 changes: 4 additions & 4 deletions src/libsyntax/ast_util.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use codemap::{span, CharPos};
use codemap::{span, BytePos};
use ast::*;

pure fn spanned<T>(+lo: CharPos, +hi: CharPos, +t: T) -> spanned<T> {
pure fn spanned<T>(+lo: BytePos, +hi: BytePos, +t: T) -> spanned<T> {
respan(mk_sp(lo, hi), move t)
}

Expand All @@ -14,12 +14,12 @@ pure fn dummy_spanned<T>(+t: T) -> spanned<T> {
}

/* assuming that we're not in macro expansion */
pure fn mk_sp(+lo: CharPos, +hi: CharPos) -> span {
pure fn mk_sp(+lo: BytePos, +hi: BytePos) -> span {
span {lo: lo, hi: hi, expn_info: None}
}

// make this a const, once the compiler supports it
pure fn dummy_sp() -> span { return mk_sp(CharPos(0), CharPos(0)); }
pure fn dummy_sp() -> span { return mk_sp(BytePos(0), BytePos(0)); }



Expand Down
4 changes: 2 additions & 2 deletions src/libsyntax/attr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use either::Either;
use diagnostic::span_handler;
use ast_util::{spanned, dummy_spanned};
use parse::comments::{doc_comment_style, strip_doc_comment_decoration};
use codemap::CharPos;
use codemap::BytePos;

// Constructors
export mk_name_value_item_str;
Expand Down Expand Up @@ -76,7 +76,7 @@ fn mk_attr(item: @ast::meta_item) -> ast::attribute {
}

fn mk_sugared_doc_attr(text: ~str,
+lo: CharPos, +hi: CharPos) -> ast::attribute {
+lo: BytePos, +hi: BytePos) -> ast::attribute {
let lit = spanned(lo, hi, ast::lit_str(@text));
let attr = {
style: doc_comment_style(text),
Expand Down
145 changes: 106 additions & 39 deletions src/libsyntax/codemap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ impl CharPos: to_bytes::IterBytes {
}

pub struct span {
lo: CharPos,
hi: CharPos,
lo: BytePos,
hi: BytePos,
expn_info: Option<@ExpnInfo>
}

Expand All @@ -141,8 +141,10 @@ impl<D: Deserializer> span: Deserializable<D> {
}
}

pub struct Loc<A: Pos> {
file: @FileMap, line: uint, col: A
// XXX col shouldn't be CharPos because col is not an absolute location in the
// codemap, and BytePos and CharPos always represent absolute positions
pub struct Loc {
file: @FileMap, line: uint, col: CharPos
}

/// An absolute offset within the CodeMap (not a relative offset within a
Expand Down Expand Up @@ -178,12 +180,24 @@ pub enum FileSubstr {
pub FssExternal({filename: ~str, line: uint, col: CharPos})
}

/// Identifies an offset of a multi-byte character in a FileMap
pub struct MultiByteChar {
/// The absolute offset of the character in the CodeMap
pos: BytePos,
/// The number of bytes, >=2
bytes: uint,
/// The complete number of 'extra' bytes through this character in the
/// FileMap
sum: uint
}

pub struct FileMap {
name: FileName,
substr: FileSubstr,
src: @~str,
start_pos: FilePos,
mut lines: ~[FilePos]
mut lines: ~[FilePos],
multibyte_chars: DVec<MultiByteChar>
}

pub impl FileMap {
Expand All @@ -194,7 +208,8 @@ pub impl FileMap {
return FileMap {
name: filename, substr: substr, src: src,
start_pos: start_pos,
mut lines: ~[]
mut lines: ~[],
multibyte_chars: DVec()
};
}

Expand All @@ -219,6 +234,21 @@ pub impl FileMap {
str::slice(*self.src, begin, end)
}

pub fn record_multibyte_char(&self, pos: BytePos, bytes: uint) {
assert bytes >=2 && bytes <= 4;
let sum = if self.multibyte_chars.len() > 0 {
self.multibyte_chars.last().sum
} else {
0
};
let sum = sum + bytes;
let mbc = MultiByteChar {
pos: pos,
bytes: bytes,
sum: sum
};
self.multibyte_chars.push(mbc);
}
}

pub struct CodeMap {
Expand Down Expand Up @@ -254,12 +284,11 @@ pub impl CodeMap {
pos.line, pos.col.to_uint());
}
pub fn lookup_char_pos(&self, +pos: CharPos) -> Loc<CharPos> {
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
return self.lookup_pos(pos, lookup);
pub fn lookup_char_pos(&self, +pos: BytePos) -> Loc {
return self.lookup_pos(pos);
}
pub fn lookup_char_pos_adj(&self, +pos: CharPos)
pub fn lookup_char_pos_adj(&self, +pos: BytePos)
-> {filename: ~str, line: uint, col: CharPos, file: Option<@FileMap>}
{
let loc = self.lookup_char_pos(pos);
Expand All @@ -272,7 +301,7 @@ pub impl CodeMap {
}
FssInternal(sp) => {
self.lookup_char_pos_adj(
sp.lo + (pos - loc.file.start_pos.ch))
sp.lo + (pos - loc.file.start_pos.byte))
}
FssExternal(eloc) => {
{filename: /* FIXME (#2543) */ copy eloc.filename,
Expand All @@ -284,14 +313,13 @@ pub impl CodeMap {
}
pub fn adjust_span(&self, sp: span) -> span {
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
let line = self.lookup_line(sp.lo, lookup);
let line = self.lookup_line(sp.lo);
match (line.fm.substr) {
FssNone => sp,
FssInternal(s) => {
self.adjust_span(span {
lo: s.lo + (sp.lo - line.fm.start_pos.ch),
hi: s.lo + (sp.hi - line.fm.start_pos.ch),
lo: s.lo + (sp.lo - line.fm.start_pos.byte),
hi: s.lo + (sp.hi - line.fm.start_pos.byte),
expn_info: sp.expn_info
})
}
Expand Down Expand Up @@ -321,18 +349,6 @@ pub impl CodeMap {
return @FileLines {file: lo.file, lines: lines};
}

fn lookup_byte_offset(&self, +chpos: CharPos)
-> {fm: @FileMap, pos: BytePos} {
pure fn lookup(pos: FilePos) -> uint { return pos.ch.to_uint(); }
let {fm, line} = self.lookup_line(chpos, lookup);
let line_offset = fm.lines[line].byte - fm.start_pos.byte;
let col = chpos - fm.lines[line].ch;
let col_offset = str::count_bytes(*fm.src,
line_offset.to_uint(),
col.to_uint());
{fm: fm, pos: line_offset + BytePos(col_offset)}
}

pub fn span_to_snippet(&self, sp: span) -> ~str {
let begin = self.lookup_byte_offset(sp.lo);
let end = self.lookup_byte_offset(sp.hi);
Expand All @@ -351,15 +367,14 @@ pub impl CodeMap {
}

priv impl CodeMap {
fn lookup_line<A: Pos>(&self, pos: A, lookup: LookupFn)
-> {fm: @FileMap, line: uint}
{

fn lookup_filemap_idx(&self, +pos: BytePos) -> uint {
let len = self.files.len();
let mut a = 0u;
let mut b = len;
while b - a > 1u {
let m = (a + b) / 2u;
if lookup(self.files[m].start_pos) > pos.to_uint() {
if self.files[m].start_pos.byte > pos {
b = m;
} else {
a = m;
Expand All @@ -369,22 +384,40 @@ priv impl CodeMap {
fail fmt!("position %u does not resolve to a source location",
pos.to_uint())
}
let f = self.files[a];
a = 0u;
b = vec::len(f.lines);

return a;
}

fn lookup_line(&self, +pos: BytePos)
-> {fm: @FileMap, line: uint}
{
let idx = self.lookup_filemap_idx(pos);
let f = self.files[idx];
let mut a = 0u;
let mut b = vec::len(f.lines);
while b - a > 1u {
let m = (a + b) / 2u;
if lookup(f.lines[m]) > pos.to_uint() { b = m; } else { a = m; }
if f.lines[m].byte > pos { b = m; } else { a = m; }
}
return {fm: f, line: a};
}

fn lookup_pos<A: Pos Num>(&self, pos: A, lookup: LookupFn) -> Loc<A> {
let {fm: f, line: a} = self.lookup_line(pos, lookup);
fn lookup_pos(&self, +pos: BytePos) -> Loc {
let {fm: f, line: a} = self.lookup_line(pos);
let line = a + 1u; // Line numbers start at 1
let chpos = self.bytepos_to_local_charpos(pos);
let linebpos = f.lines[a].byte;
let linechpos = self.bytepos_to_local_charpos(linebpos);
debug!("codemap: byte pos %? is on the line at byte pos %?",
pos, linebpos);
debug!("codemap: char pos %? is on the line at char pos %?",
chpos, linechpos);
debug!("codemap: byte is on line: %?", line);
assert chpos >= linechpos;
return Loc {
file: f,
line: a + 1u,
col: pos - from_uint(lookup(f.lines[a]))
line: line,
col: chpos - linechpos
};
}

Expand All @@ -394,6 +427,40 @@ priv impl CodeMap {
return fmt!("%s:%u:%u: %u:%u", lo.file.name,
lo.line, lo.col.to_uint(), hi.line, hi.col.to_uint())
}

fn lookup_byte_offset(&self, +bpos: BytePos)
-> {fm: @FileMap, pos: BytePos} {
let idx = self.lookup_filemap_idx(bpos);
let fm = self.files[idx];
let offset = bpos - fm.start_pos.byte;
return {fm: fm, pos: offset};
}

// Converts an absolute BytePos to a CharPos relative to the file it is
// located in
fn bytepos_to_local_charpos(&self, +bpos: BytePos) -> CharPos {
debug!("codemap: converting %? to char pos", bpos);
let idx = self.lookup_filemap_idx(bpos);
let map = self.files[idx];

// The number of extra bytes due to multibyte chars in the FileMap
let mut total_extra_bytes = 0;

for map.multibyte_chars.each |mbc| {
debug!("codemap: %?-byte char at %?", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
total_extra_bytes += mbc.bytes;
// We should never see a byte position in the middle of a
// character
assert bpos == mbc.pos
|| bpos.to_uint() >= mbc.pos.to_uint() + mbc.bytes;
} else {
break;
}
}

CharPos(bpos.to_uint() - total_extra_bytes)
}
}

//
Expand Down
14 changes: 7 additions & 7 deletions src/libsyntax/ext/qquote.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use parse::parser;
use parse::parser::{Parser, parse_from_source_str};
use dvec::DVec;
use parse::token::ident_interner;
use codemap::CharPos;
use codemap::{CharPos, BytePos};

use fold::*;
use visit::*;
Expand All @@ -16,13 +16,13 @@ use io::*;
use codemap::span;

struct gather_item {
lo: CharPos,
hi: CharPos,
lo: BytePos,
hi: BytePos,
e: @ast::expr,
constr: ~str
}

type aq_ctxt = @{lo: CharPos, gather: DVec<gather_item>};
type aq_ctxt = @{lo: BytePos, gather: DVec<gather_item>};
enum fragment {
from_expr(@ast::expr),
from_ty(@ast::Ty)
Expand Down Expand Up @@ -115,7 +115,7 @@ impl @ast::pat: qq_helper {
fn get_fold_fn() -> ~str {~"fold_pat"}
}

fn gather_anti_quotes<N: qq_helper>(lo: CharPos, node: N) -> aq_ctxt
fn gather_anti_quotes<N: qq_helper>(lo: BytePos, node: N) -> aq_ctxt
{
let v = @{visit_expr: |node, &&cx, v| visit_aq(node, ~"from_expr", cx, v),
visit_ty: |node, &&cx, v| visit_aq(node, ~"from_ty", cx, v),
Expand Down Expand Up @@ -227,7 +227,7 @@ fn finish<T: qq_helper>
let mut str2 = ~"";
enum state {active, skip(uint), blank};
let mut state = active;
let mut i = CharPos(0u);
let mut i = BytePos(0u);
let mut j = 0u;
let g_len = cx.gather.len();
for str::chars_each(*str) |ch| {
Expand All @@ -244,7 +244,7 @@ fn finish<T: qq_helper>
blank if is_space(ch) => str::push_char(&mut str2, ch),
blank => str::push_char(&mut str2, ' ')
}
i += CharPos(1u);
i += BytePos(1u);
if (j < g_len && i == cx.gather[j].hi) {
assert ch == ')';
state = active;
Expand Down
6 changes: 3 additions & 3 deletions src/libsyntax/ext/tt/macro_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use dvec::DVec;
use ast::{matcher, match_tok, match_seq, match_nonterminal, ident};
use ast_util::mk_sp;
use std::map::HashMap;
use codemap::CharPos;
use codemap::BytePos;

/* This is an Earley-like parser, without support for in-grammar nonterminals,
only by calling out to the main rust parser for named nonterminals (which it
Expand Down Expand Up @@ -103,7 +103,7 @@ type matcher_pos = ~{
mut up: matcher_pos_up, // mutable for swapping only
matches: ~[DVec<@named_match>],
match_lo: uint, match_hi: uint,
sp_lo: CharPos,
sp_lo: BytePos,
};

fn copy_up(&& mpu: matcher_pos_up) -> matcher_pos {
Expand All @@ -123,7 +123,7 @@ fn count_names(ms: &[matcher]) -> uint {
}

#[allow(non_implicitly_copyable_typarams)]
fn initial_matcher_pos(ms: ~[matcher], sep: Option<Token>, lo: CharPos)
fn initial_matcher_pos(ms: ~[matcher], sep: Option<Token>, lo: BytePos)
-> matcher_pos {
let mut match_idx_hi = 0u;
for ms.each() |elt| {
Expand Down
Loading

0 comments on commit 81d2015

Please sign in to comment.