From 9025fe7e99ae76c1426be68dc80f09e235631a80 Mon Sep 17 00:00:00 2001 From: Shunsuke Shibayama Date: Wed, 12 Jul 2023 16:31:39 +0900 Subject: [PATCH] feat: add bin/oct/hex literal --- crates/erg_compiler/context/eval.rs | 2 +- crates/erg_compiler/ty/value.rs | 29 ++++++++++++++---- crates/erg_parser/lex.rs | 47 ++++++++++++++++++++++++++++- crates/erg_parser/token.rs | 10 ++++-- tests/should_ok/decimal.er | 8 +++++ tests/test.rs | 5 +++ 6 files changed, 91 insertions(+), 10 deletions(-) create mode 100644 tests/should_ok/decimal.er diff --git a/crates/erg_compiler/context/eval.rs b/crates/erg_compiler/context/eval.rs index 443a2659f..5b11b862b 100644 --- a/crates/erg_compiler/context/eval.rs +++ b/crates/erg_compiler/context/eval.rs @@ -48,7 +48,7 @@ pub fn type_from_token_kind(kind: TokenKind) -> Type { use TokenKind::*; match kind { - NatLit => Type::Nat, + NatLit | BinLit | OctLit | HexLit => Type::Nat, IntLit => Type::Int, RatioLit => Type::Ratio, StrLit | DocComment => Type::Str, diff --git a/crates/erg_compiler/ty/value.rs b/crates/erg_compiler/ty/value.rs index 7dabf5cf8..e24da8650 100644 --- a/crates/erg_compiler/ty/value.rs +++ b/crates/erg_compiler/ty/value.rs @@ -920,12 +920,29 @@ impl ValueObj { pub fn from_str(t: Type, mut content: Str) -> Option { match t { Type::Int => content.replace('_', "").parse::().ok().map(Self::Int), - Type::Nat => content - .trim_start_matches('-') // -0 -> 0 - .replace('_', "") - .parse::() - .ok() - .map(Self::Nat), + Type::Nat => { + let content = content + .trim_start_matches('-') // -0 -> 0 + .replace('_', ""); + if content.len() <= 1 { + return content.parse::().ok().map(Self::Nat); + } + match &content[0..=1] { + pre @ ("0b" | "0B") => { + let content = content.trim_start_matches(pre); + u64::from_str_radix(content, 2).ok().map(Self::Nat) + } + pre @ ("0o" | "0O") => { + let content = content.trim_start_matches(pre); + u64::from_str_radix(content, 8).ok().map(Self::Nat) + } + pre @ ("0x" | "0X") => { + let content = content.trim_start_matches(pre); + u64::from_str_radix(content, 16).ok().map(Self::Nat) + } + _ => content.parse::().ok().map(Self::Nat), + } + } Type::Float => content .replace('_', "") .parse::() diff --git a/crates/erg_parser/lex.rs b/crates/erg_parser/lex.rs index 8d2afcf16..31b74df6e 100644 --- a/crates/erg_parser/lex.rs +++ b/crates/erg_parser/lex.rs @@ -621,6 +621,18 @@ impl Lexer /*<'a>*/ { n if n.is_ascii_digit() || n == '_' => { num.push(self.consume().unwrap()); } + 'b' | 'B' => { + num.push(self.consume().unwrap()); + return self.lex_bin(num); + } + 'o' | 'O' => { + num.push(self.consume().unwrap()); + return self.lex_oct(num); + } + 'x' | 'X' => { + num.push(self.consume().unwrap()); + return self.lex_hex(num); + } c if Self::is_valid_continue_symbol_ch(c) => { // exponent (e.g. 10e+3) if c == 'e' @@ -682,6 +694,39 @@ impl Lexer /*<'a>*/ { } } + fn lex_bin(&mut self, mut num: String) -> LexResult { + while let Some(cur) = self.peek_cur_ch() { + if cur == '0' || cur == '1' || cur == '_' { + num.push(self.consume().unwrap()); + } else { + break; + } + } + Ok(self.emit_token(BinLit, &num)) + } + + fn lex_oct(&mut self, mut num: String) -> LexResult { + while let Some(cur) = self.peek_cur_ch() { + if matches!(cur, '0'..='7') || cur == '_' { + num.push(self.consume().unwrap()); + } else { + break; + } + } + Ok(self.emit_token(OctLit, &num)) + } + + fn lex_hex(&mut self, mut num: String) -> LexResult { + while let Some(cur) = self.peek_cur_ch() { + if cur.is_ascii_hexdigit() || cur == '_' { + num.push(self.consume().unwrap()); + } else { + break; + } + } + Ok(self.emit_token(HexLit, &num)) + } + /// int_part_and_point must be like `12.` fn lex_ratio(&mut self, intpart_and_point: String) -> LexResult { let mut num = intpart_and_point; @@ -1547,7 +1592,7 @@ impl Iterator for Lexer /*<'a>*/ { None, ))) } - // IntLit or RatioLit + // IntLit (or Bin/Oct/Hex) or RatioLit Some(n) if n.is_ascii_digit() => Some(self.lex_num(n)), // Symbol (includes '_') Some(c) if Self::is_valid_start_symbol_ch(c) => Some(self.lex_symbol(c)), diff --git a/crates/erg_parser/token.rs b/crates/erg_parser/token.rs index 811835ed3..7ba76d3fe 100644 --- a/crates/erg_parser/token.rs +++ b/crates/erg_parser/token.rs @@ -24,6 +24,12 @@ pub enum TokenKind { NatLit, /// e.g. -1, -2 IntLit, + /// e.g. 0b101 + BinLit, + /// e.g. 0o777 + OctLit, + /// e.g. 0xdeadbeef + HexLit, RatioLit, BoolLit, StrLit, @@ -232,8 +238,8 @@ impl TokenKind { pub const fn category(&self) -> TokenCategory { match self { Symbol => TokenCategory::Symbol, - NatLit | IntLit | RatioLit | StrLit | BoolLit | NoneLit | EllipsisLit | InfLit - | DocComment => TokenCategory::Literal, + NatLit | BinLit | OctLit | HexLit | IntLit | RatioLit | StrLit | BoolLit | NoneLit + | EllipsisLit | InfLit | DocComment => TokenCategory::Literal, StrInterpLeft => TokenCategory::StrInterpLeft, StrInterpMid => TokenCategory::StrInterpMid, StrInterpRight => TokenCategory::StrInterpRight, diff --git a/tests/should_ok/decimal.er b/tests/should_ok/decimal.er new file mode 100644 index 000000000..3a6ae2ab3 --- /dev/null +++ b/tests/should_ok/decimal.er @@ -0,0 +1,8 @@ +b = 0b010010 +assert b == 18 + +o = 0o22 +assert o == 18 + +h = 0x12 +assert h == 18 diff --git a/tests/test.rs b/tests/test.rs index d10cb0f89..8ec0d6062 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -52,6 +52,11 @@ fn exec_control_expr() -> Result<(), ()> { expect_success("tests/should_ok/control_expr.er", 3) } +#[test] +fn exec_decimal() -> Result<(), ()> { + expect_success("tests/should_ok/decimal.er", 0) +} + #[test] fn exec_default_param() -> Result<(), ()> { expect_success("tests/should_ok/default_param.er", 0)