Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for non-decimal floating point literals. #12323

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 19 additions & 14 deletions src/doc/rust.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,21 +319,23 @@ r##"foo #"# bar"##; // foo #"# bar
#### Number literals

~~~~ {.ebnf .gram}
num_lit : nonzero_dec [ dec_digit | '_' ] * num_suffix ?
| '0' [ [ dec_digit | '_' ] * num_suffix ?
| 'b' [ '1' | '0' | '_' ] + int_suffix ?
| 'o' [ oct_digit | '_' ] + int_suffix ?
| 'x' [ hex_digit | '_' ] + int_suffix ? ] ;
num_lit : radix_lit num_suffix ;

radix_lit : nonzero_dec [ dec_digit | '_' ] *
| '0' [ [ dec_digit | '_' ] *
| 'b' [ '1' | '0' | '_' ] +
| 'o' [ oct_digit | '_' ] +
| 'x' [ hex_digit | '_' ] + ] ;

num_suffix : int_suffix | float_suffix ;

int_suffix : 'u' int_suffix_size ?
| 'i' int_suffix_size ? ;
int_suffix_size : [ '8' | '1' '6' | '3' '2' | '6' '4' ] ;

float_suffix : [ exponent | '.' dec_lit exponent ? ] ? float_suffix_ty ? ;
float_suffix : [ exponent | '.' radix_lit exponent ? ] ? float_suffix_ty ? ;
float_suffix_ty : 'f' [ '3' '2' | '6' '4' ] ;
exponent : ['E' | 'e'] ['-' | '+' ] ? dec_lit ;
exponent : ['E' | 'e' | 'p' | 'P'] ['-' | '+' ] ? dec_lit ;
dec_lit : [ dec_digit | '_' ] + ;
~~~~

Expand All @@ -343,7 +345,7 @@ as they are differentiated by suffixes.

##### Integer literals

An _integer literal_ has one of four forms:
An _radix literal_ has one of four forms:

* A _decimal literal_ starts with a *decimal digit* and continues with any
mixture of *decimal digits* and _underscores_.
Expand All @@ -354,9 +356,9 @@ An _integer literal_ has one of four forms:
* A _binary literal_ starts with the character sequence `U+0030` `U+0062`
(`0b`) and continues as any mixture binary digits and underscores.

An integer literal may be followed (immediately, without any spaces) by an
_integer suffix_, which changes the type of the literal. There are two kinds
of integer literal suffix:
An integer literal consists of a radix literal and may be followed
(immediately, without any spaces) by an _integer suffix_, which changes the
type of the literal. There are two kinds of integer literal suffix:

* The `i` and `u` suffixes give the literal type `int` or `uint`,
respectively.
Expand Down Expand Up @@ -389,10 +391,11 @@ Examples of integer literals of various forms:

A _floating-point literal_ has one of two forms:

* Two _decimal literals_ separated by a period
* Two _radix literals_ separated by a period
character `U+002E` (`.`), with an optional _exponent_ trailing after the
second decimal literal.
* A single _decimal literal_ followed by an _exponent_.
second decimal literal. Both radix literals must have the same base.
* A single _radix literal_ followed by an _exponent_.
* If the float literal is hexadecimal, an _exponent_ must be supplied.

By default, a floating-point literal has a generic type, but will fall back to
`f64`. A floating-point literal may be followed (immediately, without any
Expand All @@ -406,6 +409,8 @@ Examples of floating-point literals of various forms:
123.0; // type f64
0.1; // type f64
0.1f32; // type f32
0x4.0x432p-4_f32; // type f32
0b1.0b10111011011000; // type f64
12E+99_f64; // type f64
~~~~

Expand Down
127 changes: 92 additions & 35 deletions src/libsyntax/parse/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,7 @@ fn scan_exponent(rdr: &StringReader, start_bpos: BytePos) -> Option<~str> {
// \x00 hits the `return None` case immediately, so this is fine.
let mut c = rdr.curr.get().unwrap_or('\x00');
let mut rslt = ~"";
if c == 'e' || c == 'E' {
rslt.push_char(c);
if c == 'e' || c == 'E' || c == 'p' || c == 'P' {
bump(rdr);
c = rdr.curr.get().unwrap_or('\x00');
if c == '-' || c == '+' {
Expand Down Expand Up @@ -476,40 +475,32 @@ fn scan_digits(rdr: &StringReader, radix: uint) -> ~str {
};
}

fn check_float_base(rdr: &StringReader, start_bpos: BytePos, last_bpos: BytePos,
base: uint) {
match base {
16u => fatal_span(rdr, start_bpos, last_bpos,
~"hexadecimal float literal is not supported"),
8u => fatal_span(rdr, start_bpos, last_bpos,
~"octal float literal is not supported"),
2u => fatal_span(rdr, start_bpos, last_bpos,
~"binary float literal is not supported"),
_ => ()
}
}

fn scan_number(c: char, rdr: &StringReader) -> token::Token {
let mut num_str;
let mut base = 10u;
let mut c = c;
let mut n = nextch(rdr).unwrap_or('\x00');
let start_bpos = rdr.last_pos.get();
fn scan_radix(rdr: &StringReader) -> uint {
let c = rdr.curr.get().unwrap_or('\x00');
let n = nextch(rdr).unwrap_or('\x00');
if c == '0' && n == 'x' {
bump(rdr);
bump(rdr);
base = 16u;
return 16u;
} else if c == '0' && n == 'o' {
bump(rdr);
bump(rdr);
base = 8u;
return 8u;
} else if c == '0' && n == 'b' {
bump(rdr);
bump(rdr);
base = 2u;
return 2u;
}
return 10u;
}

fn scan_number(rdr: &StringReader) -> token::Token {
let mut num_str;
let start_bpos = rdr.last_pos.get();
let mut base = scan_radix(rdr);
num_str = scan_digits(rdr, base);
c = rdr.curr.get().unwrap_or('\x00');
let mut c = rdr.curr.get().unwrap_or('\x00');
let mut n:char;
nextch(rdr);
if c == 'u' || c == 'i' {
enum Result { Signed(ast::IntTy), Unsigned(ast::UintTy) }
Expand Down Expand Up @@ -558,19 +549,71 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
}
}
let mut is_float = false;
let mut dec_part = ~"";
if rdr.curr_is('.') && !(ident_start(nextch(rdr)) || nextch_is(rdr, '.')) {
is_float = true;
bump(rdr);
let dec_part = scan_digits(rdr, 10u);
num_str.push_char('.');
num_str.push_str(dec_part);
let mantissa_base = scan_radix(rdr);
if mantissa_base != base {
//The ability to switch base, while conceivably useful, is much more
//likely to be triggered by accident.
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
~"float literals must have consistent base before and after decimal point");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If they have to be equal, why not just not require the second one? so 0xab.cdef and 0b101.0101 etc. would all be valid?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was mentioned in the issue. Because 10.method() is valid, there needs to be a way to disambiguate, ideally without requiring long lookahead. For the binary and octal cases it could be dropped though.

}
base = mantissa_base;
dec_part = scan_digits(rdr, mantissa_base);
}
let mut exp_part = ~"";
match scan_exponent(rdr, start_bpos) {
Some(ref s) => {
Some(s) => {
is_float = true;
num_str.push_str(*s);
exp_part = s;
}
None => ()
None => {
if is_float && base > 10 {
//otherwise we have ambiguity: 0x1.0xffff_f32 gets parsed as
//0x1.fffff32, which will create confusing results.
fatal_span(rdr, start_bpos, rdr.last_pos.get(),
~"hexadecimal float literals must contain exponent");
}
}
}
if is_float {
if base == 10 || base == 16 {
num_str.push_char('.');
num_str.push_str( if dec_part.len() > 0 {dec_part} else {~"0"} );
if exp_part.len() != 0 {
num_str.push_char(if base == 10 {'e'} else {'p'});
num_str.push_str(exp_part);
}
} else {
num_str = from_str_radix::<u64>(num_str, base).unwrap().to_str_radix(16);
let mut i = 0;
let len = dec_part.len();
let step = match base { 8 => 2, 2 => 4, _ => fail!("Impossible base for float")};
let mut dec_str = ~"";
while i < len {
let chunk = if i + step > len {
let mut chunk = dec_part.slice_from(i).to_str();
for _ in range(0, i + step - len) {
chunk.push_char('0');
}
chunk
} else {
dec_part.slice(i, i + step).to_str()
};
dec_str.push_str(from_str_radix::<u8>(chunk, base).unwrap_or(0).to_str());
i += step;
}
num_str.push_char('.');
num_str.push_str(dec_str);
num_str.push_char('p');
num_str.push_str(if exp_part.len() > 0 {exp_part} else {~"0"});
}
if base != 10 {
num_str.unshift_char('x');
num_str.unshift_char('0');
}
}

if rdr.curr_is('f') {
Expand All @@ -580,12 +623,10 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
if c == '3' && n == '2' {
bump(rdr);
bump(rdr);
check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF32);
} else if c == '6' && n == '4' {
bump(rdr);
bump(rdr);
check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
return token::LIT_FLOAT(str_to_ident(num_str), ast::TyF64);
/* FIXME (#2252): if this is out of range for either a
32-bit or 64-bit float, it won't be noticed till the
Expand All @@ -596,7 +637,6 @@ fn scan_number(c: char, rdr: &StringReader) -> token::Token {
}
}
if is_float {
check_float_base(rdr, start_bpos, rdr.last_pos.get(), base);
return token::LIT_FLOAT_UNSUFFIXED(str_to_ident(num_str));
} else {
if num_str.len() == 0u {
Expand Down Expand Up @@ -687,7 +727,7 @@ fn next_token_inner(rdr: &StringReader) -> token::Token {
})
}
if is_dec_digit(c) {
return scan_number(c.unwrap(), rdr);
return scan_number(rdr);
}
fn binop(rdr: &StringReader, op: token::BinOp) -> token::Token {
bump(rdr);
Expand Down Expand Up @@ -1005,6 +1045,7 @@ mod test {
use diagnostic;
use parse::token;
use parse::token::{str_to_ident};
use ast;

// represents a testing reader (incl. both reader and interner)
struct Env {
Expand Down Expand Up @@ -1139,4 +1180,20 @@ mod test {
assert_eq!(tok,token::LIT_CHAR('a' as u32));
}

#[test] fn hex_floats() {
let env = setup(~"0x1.0xffffffp100_f32");
let TokenAndSpan {tok, sp: _} =
env.string_reader.next_token();
let id = token::str_to_ident("0x1.ffffffp100");
assert_eq!(tok,token::LIT_FLOAT(id, ast::TyF32));
}

#[test] fn bin_floats() {
let env = setup(~"0b1.0b0000_0001_0010_0011_1p100_f32");
let TokenAndSpan {tok, sp: _} =
env.string_reader.next_token();
let id = token::str_to_ident("0x1.01238p100");
assert_eq!(tok,token::LIT_FLOAT(id, ast::TyF32));
}

}
13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-1.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-2.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-3.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-4.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-5.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-6.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-7.rs

This file was deleted.

13 changes: 0 additions & 13 deletions src/test/compile-fail/lex-bad-fp-base-8.rs

This file was deleted.

Loading