diff --git a/README.md b/README.md index 9e29f5235..fd9dc9a36 100644 --- a/README.md +++ b/README.md @@ -46,15 +46,15 @@ Candy blurs the line between those stages, for example, by replacing compile-tim ```candy foo a = # If you pass a = 0, needs (isInt a) - math.logarithm a # then this fails because logarithm only works on positive numbers. + math.logarithm a # then this panics: The `input` must be a positive number. efficientTextReverse text = needs (isText text) - needs (isPalindrome text) "efficientTextReverse only works on palindromes" + needs (isPalindrome text) "Only palindromes can be efficiently reversed." text greetBackwards name = # If you pass name = "Test", - "Hello, {efficientTextReverse name}" # then this fails because efficientTextReverse only works on palindromes. + "Hello, {efficientTextReverse name}" # then this panics: Only palindromes can be efficiently reversed. ``` To get a more in-depth introduction, read the [language document](language.md). @@ -135,16 +135,13 @@ We already have a language server that provides some tooling. ## Short-term TODOs -- fix fault attribution - new name? - add caching while compile-time evaluating code - tags - pattern matching -- add CI - add tests - add a more lightweight tracer that only tracks stack traces - text interpolation -- optimize: eliminate common subtrees - optimize: inline functions - minimize inputs found through fuzzing - fuzz parser @@ -155,6 +152,14 @@ We already have a language server that provides some tooling. - distinguish packages from normal modules - complain about comment lines with too much indentation - develop guidelines about how to format reasons +- disallow passing named closures as parameters? or auto-propagate caller's fault to called parameters? +- replace occurrences of `Id::complicated_responsibility()` +- fix usage of pipes in indented code such as this: + ```candy + foo + bar | baz + ## Currently, this is parsed as `baz (foo bar)`. + ``` ## How to use Candy diff --git a/compiler/src/compiler/README.md b/compiler/src/compiler/README.md index a9e885b6d..e35af7b5c 100644 --- a/compiler/src/compiler/README.md +++ b/compiler/src/compiler/README.md @@ -8,8 +8,9 @@ These are the compiler stages: * RCST ("Raw Concrete Syntax Tree"): A tree that represents the syntax of the code, including every single character and whitespace. * CST ("Concrete Syntax Tree"): Similar to RCST, but tree nodes also have IDs and know what ranges in the source file they correspond to. * AST ("Abstract Syntax Tree"): A tree where unnecessary cruft is removed and some invariants are validated. -* HIR ("High-level Intermediate Representation"): The canonical representation of source code in single-static-assignment form (SSA). -* LIR ("Low-level Intermediate Representation"): An instruction code for a stack-based virtual machine. +* HIR ("High-Level Intermediate Representation"): The canonical representation of source code in single-static-assignment form (SSA). +* MIR ("Mid-Level Intermediate Representation"): A representation with desugaring and explicit tracking of responsibilities. Tailored for applying optimizations. +* LIR ("Low-Level Intermediate Representation"): An instruction code for a stack-based virtual machine. Note that if an error occurs in a compilation stage, we don't immediately abort but rather just try to contain the error in a subtree of the code and emit an error node. This means that even if you have a syntax error (missing parentheses, etc.), the tooling in other parts of the source still works – including auto-completion, edit-time evaluation, formatting, etc. diff --git a/compiler/src/compiler/ast_to_hir.rs b/compiler/src/compiler/ast_to_hir.rs index 0566e33c1..8c9299402 100644 --- a/compiler/src/compiler/ast_to_hir.rs +++ b/compiler/src/compiler/ast_to_hir.rs @@ -337,29 +337,29 @@ impl<'a> Context<'a> { })) if name == "needs" => { let expression = match &self.lower_call_arguments(&call.arguments[..])[..] { [condition, reason] => Expression::Needs { - condition: Box::new(condition.clone()), - reason: Box::new(reason.clone()), + condition: condition.clone(), + reason: reason.clone(), }, [condition] => Expression::Needs { - condition: Box::new(condition.clone()), - reason: Box::new(self.push( + condition: condition.clone(), + reason: self.push( None, Expression::Text( match self.db.ast_id_to_span(call.arguments[0].id.clone()) { Some(span) => format!( - "`{}` was not satisfied", - &self - .db - .get_module_content_as_string( - call.arguments[0].id.module.clone() - ) - .unwrap()[span], - ), + "`{}` was not satisfied", + &self + .db + .get_module_content_as_string( + call.arguments[0].id.module.clone() + ) + .unwrap()[span], + ), None => "the needs of a function were not met".to_string(), }, ), None, - )), + ), }, _ => { return self.push_error( @@ -459,7 +459,7 @@ impl<'a> Context<'a> { impl<'a> Context<'a> { fn generate_sparkles(&mut self) { - let mut sparkles_map = im::HashMap::new(); + let mut sparkles_map = HashMap::new(); for builtin_function in builtin_functions::VALUES.iter() { let symbol = self.push( @@ -517,7 +517,7 @@ impl<'a> Context<'a> { // HirId(~:test.candy:100): HirId(~:test.candy:101), // ] - let mut exports = im::HashMap::new(); + let mut exports = HashMap::new(); for (name, id) in self.public_identifiers.clone() { exports.insert( self.push( diff --git a/compiler/src/compiler/cst_to_ast.rs b/compiler/src/compiler/cst_to_ast.rs index 4963bed4f..c6083809a 100644 --- a/compiler/src/compiler/cst_to_ast.rs +++ b/compiler/src/compiler/cst_to_ast.rs @@ -58,7 +58,7 @@ fn ast(db: &dyn CstToAst, module: Module) -> Option { let cst = cst.unwrap_whitespace_and_comment(); context.lower_csts(&cst) } - Err(InvalidModuleError::DoesNotExist) => return None, + Err(InvalidModuleError::DoesNotExist | InvalidModuleError::IsToolingModule) => return None, Err(InvalidModuleError::InvalidUtf8) => { vec![Ast { id: context.create_next_id_without_mapping(), diff --git a/compiler/src/compiler/error.rs b/compiler/src/compiler/error.rs index 684599dfc..7ecb6e901 100644 --- a/compiler/src/compiler/error.rs +++ b/compiler/src/compiler/error.rs @@ -1,6 +1,6 @@ use super::{ast::AstError, hir::HirError, rcst::RcstError}; use crate::module::Module; -use std::ops::Range; +use std::{fmt::Display, ops::Range}; #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub struct CompilerError { @@ -16,3 +16,84 @@ pub enum CompilerErrorPayload { Ast(AstError), Hir(HirError), } + +impl Display for CompilerErrorPayload { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let message = match self { + CompilerErrorPayload::InvalidUtf8 => "The module contains invalid UTF-8.".to_string(), + CompilerErrorPayload::Rcst(error) => match error { + RcstError::CurlyBraceNotClosed => "The curly brace is not closed.", + RcstError::IdentifierContainsNonAlphanumericAscii => { + "This identifier contains non-alphanumeric ASCII characters." + } + RcstError::IntContainsNonDigits => { + "This integer contains characters that are not digits." + } + RcstError::ListItemMissesValue => "This list item is missing a value.", + RcstError::ListNotClosed => "The list is not closed.", + RcstError::OpeningParenthesisWithoutExpression => { + "Here's an opening parenthesis without an expression after it." + } + RcstError::ParenthesisNotClosed => "This parenthesis isn't closed.", + RcstError::PipeMissesCall => "There should be a call after this pipe.", + RcstError::StructFieldMissesColon => "This struct field misses a colon.", + RcstError::StructFieldMissesKey => "This struct field misses a key.", + RcstError::StructFieldMissesValue => "This struct field misses a value.", + RcstError::StructNotClosed => "This struct is not closed.", + RcstError::SymbolContainsNonAlphanumericAscii => { + "This symbol contains non-alphanumeric ASCII characters." + } + RcstError::TextNotClosed => "This text isn't closed.", + RcstError::TextNotSufficientlyIndented => "This text isn't sufficiently indented.", + RcstError::TooMuchWhitespace => "There is too much whitespace here.", + RcstError::UnexpectedCharacters => "This is an unexpected character.", + RcstError::UnparsedRest => "The parser couldn't parse this rest.", + RcstError::WeirdWhitespace => "This is weird whitespace.", + RcstError::WeirdWhitespaceInIndentation => { + "This is weird whitespace. Make sure to use indent using two spaces." + } + } + .to_string(), + CompilerErrorPayload::Ast(error) => match error { + AstError::ExpectedParameter => "A parameter should come here.", + AstError::LambdaWithoutClosingCurlyBrace => { + "This lambda doesn't have a closing curly brace." + } + AstError::ListItemWithoutComma => "This list item should be followed by a comma.", + AstError::ListWithNonListItem => "This is not a list item.", + AstError::ListWithoutClosingParenthesis => { + "This list doesn't have a closing parenthesis." + } + AstError::ParenthesizedWithoutClosingParenthesis => { + "This expression is parenthesized, but the closing parenthesis is missing." + } + AstError::StructKeyWithoutColon => "This struct key should be followed by a colon.", + AstError::StructValueWithoutComma => { + "This struct value should be followed by a comma." + } + AstError::StructWithNonStructField => "Structs should only contain struct key.", + AstError::StructWithoutClosingBrace => { + "This struct doesn't have a closing bracket." + } + AstError::TextWithoutClosingQuote => "This text never ends.", + AstError::UnexpectedPunctuation => "This punctuation was unexpected.", + } + .to_string(), + CompilerErrorPayload::Hir(error) => match error { + HirError::NeedsWithWrongNumberOfArguments { num_args } => { + format!("`needs` accepts one or two arguments, but was called with {num_args} arguments. Its parameters are the `condition` and an optional `message`.") + } + HirError::PublicAssignmentInNotTopLevel => { + "Public assignments (:=) can only be used in top-level code.".to_string() + } + HirError::PublicAssignmentWithSameName { name } => { + format!("There already exists a public assignment (:=) named `{name}`.") + } + HirError::UnknownReference { name } => { + format!("Here, you reference `{name}`, but that name is not in scope.") + } + }, + }; + write!(f, "{message}") + } +} diff --git a/compiler/src/compiler/hir.rs b/compiler/src/compiler/hir.rs index 82e959f11..83de12f1c 100644 --- a/compiler/src/compiler/hir.rs +++ b/compiler/src/compiler/hir.rs @@ -1,12 +1,15 @@ use super::{ast_to_hir::AstToHir, error::CompilerError}; -use crate::{builtin_functions::BuiltinFunction, module::Module}; -use im::HashMap; +use crate::{ + builtin_functions::BuiltinFunction, + module::{Module, ModuleKind, Package}, +}; use itertools::Itertools; use linked_hash_map::LinkedHashMap; use num_bigint::BigUint; use std::{ - collections::HashSet, + collections::HashMap, fmt::{self, Display, Formatter}, + hash, sync::Arc, }; use tracing::info; @@ -86,8 +89,8 @@ impl Expression { } Expression::Builtin(_) => {} Expression::Needs { condition, reason } => { - ids.push(*condition.clone()); - ids.push(*reason.clone()); + ids.push(condition.clone()); + ids.push(reason.clone()); } Expression::Error { .. } => {} } @@ -112,6 +115,36 @@ impl Id { Self { module, keys } } + /// An ID that can be used to blame the tooling. For example, when calling + /// the `main` function, we want to be able to blame the platform for + /// passing a wrong environment. + fn tooling(name: String) -> Self { + Self { + module: Module { + package: Package::Tooling(name), + path: vec![], + kind: ModuleKind::Code, + }, + keys: vec![], + } + } + pub fn platform() -> Self { + Self::tooling("platform".to_string()) + } + pub fn fuzzer() -> Self { + Self::tooling("fuzzer".to_string()) + } + /// TODO: Currently, when a higher-order function calls a closure passed as + /// a parameter, that's registered as a normal call instruction, making the + /// callsite in the higher-order function responsible for the successful + /// fulfillment of the passed function's `needs`. We probably want to change + /// how that works so that the caller of the higher-order function is at + /// fault when passing a panicking function. After we did that, we should be + /// able to remove this ID. + pub fn complicated_responsibility() -> Self { + Self::tooling("complicated-responsibility".to_string()) + } + pub fn is_root(&self) -> bool { self.keys.is_empty() } @@ -138,7 +171,7 @@ impl Display for Id { } } -#[derive(Clone, PartialEq, Eq, Hash, Debug)] +#[derive(Clone, PartialEq, Eq, Debug)] pub enum Expression { Int(BigUint), Text(String), @@ -157,8 +190,8 @@ pub enum Expression { relative_path: Id, }, Needs { - condition: Box, - reason: Box, + condition: Id, + reason: Id, }, Error { child: Option, @@ -170,6 +203,12 @@ impl Expression { Expression::Symbol("Nothing".to_string()) } } +#[allow(clippy::derive_hash_xor_eq)] +impl hash::Hash for Expression { + fn hash(&self, state: &mut H) { + core::mem::discriminant(self).hash(state); + } +} #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct Lambda { @@ -177,34 +216,16 @@ pub struct Lambda { pub body: Body, pub fuzzable: bool, } -impl Lambda { - pub fn captured_ids(&self, my_id: &Id) -> Vec { - let mut captured = vec![]; - self.body.collect_all_ids(&mut captured); - captured - .into_iter() - .filter(|potentially_captured_id| { - !my_id.is_same_module_and_any_parent_of(potentially_captured_id) - }) - .collect::>() - .into_iter() - .collect_vec() - } -} -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq)] pub struct Body { pub expressions: LinkedHashMap, pub identifiers: HashMap, } -impl Body { - #[allow(dead_code)] - pub fn return_value(&self) -> Id { - self.expressions - .keys() - .last() - .expect("no expressions") - .clone() +#[allow(clippy::derive_hash_xor_eq)] +impl hash::Hash for Body { + fn hash(&self, state: &mut H) { + self.expressions.hash(state); } } @@ -329,7 +350,7 @@ impl fmt::Display for Lambda { } impl fmt::Display for Body { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for (id, expression) in self.expressions.iter() { + for (id, expression) in &self.expressions { writeln!(f, "{id} = {expression}")?; } Ok(()) @@ -355,7 +376,7 @@ impl Expression { } } impl Body { - fn find(&self, id: &Id) -> Option<&Expression> { + pub fn find(&self, id: &Id) -> Option<&Expression> { if let Some(expression) = self.expressions.get(id) { Some(expression) } else { diff --git a/compiler/src/compiler/hir_to_lir.rs b/compiler/src/compiler/hir_to_lir.rs deleted file mode 100644 index 6c77774a8..000000000 --- a/compiler/src/compiler/hir_to_lir.rs +++ /dev/null @@ -1,268 +0,0 @@ -use super::{ - ast_to_hir::AstToHir, - cst::CstDb, - error::CompilerError, - hir::{self, Body, Expression}, - lir::{Instruction, Lir, StackOffset}, -}; -use crate::{builtin_functions::BuiltinFunction, module::Module}; -use itertools::Itertools; -use num_bigint::BigUint; -use std::sync::Arc; -use tracing::{span, Level}; - -#[salsa::query_group(HirToLirStorage)] -pub trait HirToLir: CstDb + AstToHir { - fn lir(&self, module: Module) -> Option>; -} - -fn lir(db: &dyn HirToLir, module: Module) -> Option> { - let (hir, _) = db.hir(module)?; - let instructions = compile_lambda(&[], &[], &hir); - Some(Arc::new(Lir { instructions })) -} - -fn compile_lambda(captured: &[hir::Id], parameters: &[hir::Id], body: &Body) -> Vec { - let mut context = LoweringContext::default(); - for captured in captured { - context.stack.push(captured.clone()); - } - for parameter in parameters { - context.stack.push(parameter.clone()); - } - - for (id, expression) in &body.expressions { - context.compile_expression(id, expression); - } - - context.emit_pop_multiple_below_top(body.expressions.len() - 1); - context.emit_pop_multiple_below_top(parameters.len()); - context.emit_pop_multiple_below_top(captured.len()); - context.emit_return(); - - assert_eq!(context.stack.len(), 1); // The stack should only contain the return value. - - context.instructions -} - -#[derive(Default)] -struct LoweringContext { - stack: Vec, - instructions: Vec, -} -impl LoweringContext { - fn compile_expression(&mut self, id: &hir::Id, expression: &Expression) { - let span = span!(Level::TRACE, "Compiling expression", ?expression); - let _enter = span.enter(); - - match expression { - Expression::Int(int) => self.emit_create_int(id.clone(), int.clone()), - Expression::Text(text) => self.emit_create_text(id.clone(), text.clone()), - Expression::Reference(reference) => { - self.emit_push_from_stack(reference.clone()); - self.stack.replace_top_id(id.clone()); - } - Expression::Symbol(symbol) => self.emit_create_symbol(id.clone(), symbol.clone()), - Expression::List(items) => { - for item in items { - self.emit_push_from_stack(item.clone()); - } - self.emit_create_list(id.clone(), items.len()); - } - Expression::Struct(entries) => { - for (key, value) in entries { - self.emit_push_from_stack(key.clone()); - self.emit_push_from_stack(value.clone()); - } - self.emit_create_struct(id.clone(), entries.len()); - } - Expression::Lambda(lambda) => { - let captured = lambda.captured_ids(id); - let instructions = compile_lambda(&captured, &lambda.parameters, &lambda.body); - - self.emit_create_closure( - id.clone(), - captured - .iter() - .map(|id| self.stack.find_id(id)) - .collect_vec(), - lambda.parameters.len(), - instructions, - !lambda.fuzzable, - ); - if lambda.fuzzable { - self.emit_register_fuzzable_closure(id.clone()); - } - } - Expression::Call { - function, - arguments, - } => { - for argument in arguments { - self.emit_push_from_stack(argument.clone()); - } - - self.emit_push_from_stack(function.clone()); - self.emit_start_responsibility(id.clone()); - self.emit_trace_call_starts(id.clone(), arguments.len()); - self.emit_call(id.clone(), arguments.len()); - self.emit_trace_call_ends(); - self.emit_end_responsibility(); - } - Expression::Builtin(builtin) => { - self.emit_create_builtin(id.clone(), *builtin); - } - Expression::UseModule { - current_module, - relative_path, - } => { - self.emit_push_from_stack(relative_path.clone()); - self.emit_use_module(id.clone(), current_module.clone()); - } - Expression::Needs { condition, reason } => { - self.emit_push_from_stack(*condition.clone()); - self.emit_push_from_stack(*reason.clone()); - self.emit_trace_needs_starts(id.clone()); - self.emit_needs(id.clone()); - self.emit_trace_needs_ends(); - } - Expression::Error { errors, .. } => { - self.emit_errors(id.clone(), errors.clone()); - } - }; - self.emit_trace_value_evaluated(id.clone()); - } - - fn emit_create_int(&mut self, id: hir::Id, int: BigUint) { - self.emit(Instruction::CreateInt(int)); - self.stack.push(id); - } - fn emit_create_text(&mut self, id: hir::Id, text: String) { - self.emit(Instruction::CreateText(text)); - self.stack.push(id); - } - fn emit_create_symbol(&mut self, id: hir::Id, symbol: String) { - self.emit(Instruction::CreateSymbol(symbol)); - self.stack.push(id); - } - fn emit_create_list(&mut self, id: hir::Id, num_items: usize) { - self.emit(Instruction::CreateList { num_items }); - self.stack.pop_multiple(num_items); - self.stack.push(id); - } - fn emit_create_struct(&mut self, id: hir::Id, num_fields: usize) { - self.emit(Instruction::CreateStruct { num_fields }); - self.stack.pop_multiple(2 * num_fields); - self.stack.push(id); - } - fn emit_create_closure( - &mut self, - id: hir::Id, - captured: Vec, - num_args: usize, - instructions: Vec, - is_curly: bool, - ) { - self.emit(Instruction::CreateClosure { - id: id.clone(), - captured, - num_args, - body: instructions, - is_curly, - }); - self.stack.push(id); - } - fn emit_create_builtin(&mut self, id: hir::Id, builtin: BuiltinFunction) { - self.emit(Instruction::CreateBuiltin(builtin)); - self.stack.push(id); - } - fn emit_pop_multiple_below_top(&mut self, n: usize) { - self.emit(Instruction::PopMultipleBelowTop(n)); - let top = self.stack.pop().unwrap(); - self.stack.pop_multiple(n); - self.stack.push(top); - } - fn emit_push_from_stack(&mut self, id: hir::Id) { - let offset = self.stack.find_id(&id); - self.emit(Instruction::PushFromStack(offset)); - self.stack.push(id); - } - fn emit_call(&mut self, id: hir::Id, num_args: usize) { - self.emit(Instruction::Call { num_args }); - self.stack.pop(); // closure/builtin - self.stack.pop_multiple(num_args); - self.stack.push(id); - } - fn emit_return(&mut self) { - self.emit(Instruction::Return); - } - fn emit_use_module(&mut self, id: hir::Id, current_module: Module) { - self.stack.pop(); // relative path - self.emit(Instruction::UseModule { current_module }); - self.stack.push(id); // exported definitions - } - fn emit_start_responsibility(&mut self, responsible: hir::Id) { - self.emit(Instruction::StartResponsibility(responsible)); - } - fn emit_end_responsibility(&mut self) { - self.emit(Instruction::EndResponsibility); - } - fn emit_needs(&mut self, id: hir::Id) { - self.stack.pop(); // reason - self.stack.pop(); // condition - self.emit(Instruction::Needs); - self.stack.push(id); // Nothing - } - fn emit_register_fuzzable_closure(&mut self, id: hir::Id) { - self.emit(Instruction::RegisterFuzzableClosure(id)); - } - fn emit_trace_value_evaluated(&mut self, id: hir::Id) { - self.emit(Instruction::TraceValueEvaluated(id)); - } - fn emit_trace_call_starts(&mut self, id: hir::Id, num_args: usize) { - self.emit(Instruction::TraceCallStarts { id, num_args }); - } - fn emit_trace_call_ends(&mut self) { - self.emit(Instruction::TraceCallEnds); - } - fn emit_trace_needs_starts(&mut self, id: hir::Id) { - self.emit(Instruction::TraceNeedsStarts { id }); - } - fn emit_trace_needs_ends(&mut self) { - self.emit(Instruction::TraceNeedsEnds); - } - fn emit_errors(&mut self, id: hir::Id, errors: Vec) { - self.emit(Instruction::Error { - id: id.clone(), - errors, - }); - self.stack.push(id); - } - - fn emit(&mut self, instruction: Instruction) { - self.instructions.push(instruction); - } -} - -trait StackExt { - fn pop_multiple(&mut self, n: usize); - fn find_id(&self, id: &hir::Id) -> StackOffset; - fn replace_top_id(&mut self, id: hir::Id); -} -impl StackExt for Vec { - fn pop_multiple(&mut self, n: usize) { - for _ in 0..n { - self.pop(); - } - } - fn find_id(&self, id: &hir::Id) -> StackOffset { - self.iter() - .rev() - .position(|it| it == id) - .unwrap_or_else(|| panic!("Id {} not found in stack: {}", id, self.iter().join(" "))) - } - fn replace_top_id(&mut self, id: hir::Id) { - self.pop().unwrap(); - self.push(id); - } -} diff --git a/compiler/src/compiler/hir_to_mir.rs b/compiler/src/compiler/hir_to_mir.rs new file mode 100644 index 000000000..1d5e332ae --- /dev/null +++ b/compiler/src/compiler/hir_to_mir.rs @@ -0,0 +1,430 @@ +use super::{ + ast_to_hir::AstToHir, + cst::CstDb, + error::CompilerError, + hir, + mir::{Body, Expression, Id, Mir}, +}; +use crate::{ + builtin_functions::BuiltinFunction, + language_server::utils::LspPositionConversion, + module::{Module, ModuleKind, Package}, + utils::IdGenerator, +}; +use itertools::Itertools; +use std::{collections::HashMap, sync::Arc}; + +#[salsa::query_group(HirToMirStorage)] +pub trait HirToMir: CstDb + AstToHir + LspPositionConversion { + fn mir(&self, module: Module, config: TracingConfig) -> Option>; +} + +#[derive(PartialEq, Eq, Hash, Debug, Clone)] +pub struct TracingConfig { + pub register_fuzzables: bool, + pub trace_calls: bool, + pub trace_evaluated_expressions: bool, +} + +impl TracingConfig { + pub fn none() -> Self { + Self { + register_fuzzables: false, + trace_calls: false, + trace_evaluated_expressions: false, + } + } +} + +fn mir(db: &dyn HirToMir, module: Module, config: TracingConfig) -> Option> { + let (hir, _) = db.hir(module.clone())?; + let mir = compile_module(db, module, &hir, &config); + Some(Arc::new(mir)) +} + +fn compile_module( + db: &dyn HirToMir, + module: Module, + hir: &hir::Body, + config: &TracingConfig, +) -> Mir { + let mut id_generator = IdGenerator::start_at(0); + let mut body = Body::default(); + let mut mapping = HashMap::::new(); + + body.push_with_new_id( + &mut id_generator, + Expression::ModuleStarts { + module: module.clone(), + }, + ); + + let needs_function = generate_needs_function(&mut id_generator); + let needs_function = body.push_with_new_id(&mut id_generator, needs_function); + + let module_hir_id = body.push_with_new_id( + &mut id_generator, + Expression::HirId(hir::Id::new(module, vec![])), + ); + for (id, expression) in &hir.expressions { + compile_expression( + db, + &mut id_generator, + &mut body, + &mut mapping, + needs_function, + module_hir_id, + id, + expression, + config, + ); + } + + let return_value = body.return_value(); + body.push_with_new_id(&mut id_generator, Expression::ModuleEnds); + body.push_with_new_id(&mut id_generator, Expression::Reference(return_value)); + + Mir { id_generator, body } +} + +/// In the MIR, there's no longer the concept of needs. Instead, HIR IDs are +/// first-class expressions and there's a `panic` expression that takes a HIR +/// ID that's responsible. +/// +/// This function generates the `needs` function. Unlike regular functions, it +/// also expects a HIR ID as a normal parameter. +/// +/// Here's a high-level pseudocode of the generated `needs` function: +/// +/// ```pseudocode +/// needs = { condition reason responsibleForCondition (responsibleForCall) -> +/// isConditionBool = builtinIfElse +/// builtinEquals condition True +/// { True } +/// { builtinEquals condition False } +/// builtinIfElse isConditionBool { Nothing } { +/// panic "The condition must be either `True` or `False`." responsibleForCall +/// } +/// +/// builtinIfElse (builtinEquals (builtinTypeOf reason) Text) { Nothing} { +/// panic "The `reason` must be a text." responsibleForCall +/// } +/// +/// builtinIfElse condition { Nothing } { panic reason responsibleForCondition } +/// } +/// ``` +fn generate_needs_function(id_generator: &mut IdGenerator) -> Expression { + Expression::build_lambda(id_generator, |body, responsible_for_call| { + let condition = body.new_parameter(); + let reason = body.new_parameter(); + let responsible_for_condition = body.new_parameter(); + + // Common stuff. + let needs_code = body.push(Expression::HirId(hir::Id::new( + Module { + package: Package::Anonymous { + url: "$generated".to_string(), + }, + path: vec![], + kind: ModuleKind::Code, + }, + vec!["needs".to_string()], + ))); + let builtin_equals = body.push(Expression::Builtin(BuiltinFunction::Equals)); + let builtin_if_else = body.push(Expression::Builtin(BuiltinFunction::IfElse)); + let nothing_symbol = body.push(Expression::Symbol("Nothing".to_string())); + let lambda_returning_nothing = body.push_lambda(|body, _| { + body.push(Expression::Reference(nothing_symbol)); + }); + + // Make sure the condition is a bool. + let true_symbol = body.push(Expression::Symbol("True".to_string())); + let false_symbol = body.push(Expression::Symbol("False".to_string())); + let is_condition_true = body.push(Expression::Call { + function: builtin_equals, + arguments: vec![condition, true_symbol], + responsible: needs_code, + }); + let is_condition_false = body.push(Expression::Call { + function: builtin_equals, + arguments: vec![condition, false_symbol], + responsible: needs_code, + }); + let lambda_returning_true = body.push_lambda(|body, _| { + body.push(Expression::Reference(true_symbol)); + }); + let lambda_returning_whether_condition_is_false = body.push_lambda(|body, _| { + body.push(Expression::Reference(is_condition_false)); + }); + let is_condition_bool = body.push(Expression::Call { + function: builtin_if_else, + arguments: vec![ + is_condition_true, + lambda_returning_true, + lambda_returning_whether_condition_is_false, + ], + responsible: needs_code, + }); + let on_invalid_condition = body.push_lambda(|body, _| { + let panic_reason = body.push(Expression::Text( + "The `condition` must be either `True` or `False`.".to_string(), + )); + body.push(Expression::Panic { + reason: panic_reason, + responsible: responsible_for_call, + }); + }); + body.push(Expression::Call { + function: builtin_if_else, + arguments: vec![ + is_condition_bool, + lambda_returning_nothing, + on_invalid_condition, + ], + responsible: needs_code, + }); + + // Make sure the reason is a text. + let builtin_type_of = body.push(Expression::Builtin(BuiltinFunction::TypeOf)); + let type_of_reason = body.push(Expression::Call { + function: builtin_type_of, + arguments: vec![reason], + responsible: responsible_for_call, + }); + let text_symbol = body.push(Expression::Symbol("Text".to_string())); + let is_reason_text = body.push(Expression::Call { + function: builtin_equals, + arguments: vec![type_of_reason, text_symbol], + responsible: responsible_for_call, + }); + let on_invalid_reason = body.push_lambda(|body, _| { + let panic_reason = + body.push(Expression::Text("The `reason` must be a text.".to_string())); + body.push(Expression::Panic { + reason: panic_reason, + responsible: responsible_for_call, + }); + }); + body.push(Expression::Call { + function: builtin_if_else, + arguments: vec![is_reason_text, lambda_returning_nothing, on_invalid_reason], + responsible: needs_code, + }); + + // The core logic of the needs. + let panic_lambda = body.push_lambda(|body, _| { + body.push(Expression::Panic { + reason, + responsible: responsible_for_condition, + }); + }); + body.push(Expression::Call { + function: builtin_if_else, + arguments: vec![condition, lambda_returning_nothing, panic_lambda], + responsible: needs_code, + }); + }) +} + +// Nothing to see here. +#[allow(clippy::too_many_arguments)] +fn compile_expression( + db: &dyn HirToMir, + id_generator: &mut IdGenerator, + body: &mut Body, + mapping: &mut HashMap, + needs_function: Id, + responsible_for_needs: Id, + hir_id: &hir::Id, + expression: &hir::Expression, + config: &TracingConfig, +) { + let expression = match expression { + hir::Expression::Int(int) => Expression::Int(int.clone().into()), + hir::Expression::Text(text) => Expression::Text(text.clone()), + hir::Expression::Reference(reference) => Expression::Reference(mapping[reference]), + hir::Expression::Symbol(symbol) => Expression::Symbol(symbol.clone()), + hir::Expression::Builtin(builtin) => Expression::Builtin(*builtin), + hir::Expression::List(items) => { + Expression::List(items.iter().map(|item| mapping[item]).collect()) + } + hir::Expression::Struct(fields) => Expression::Struct( + fields + .iter() + .map(|(key, value)| (mapping[key], mapping[value])) + .collect(), + ), + hir::Expression::Lambda(hir::Lambda { + parameters: original_parameters, + body: original_body, + fuzzable, + }) => { + let mut parameters = vec![]; + let responsible_parameter: Id = id_generator.generate(); + let mut lambda_body = Body::default(); + + for original_parameter in original_parameters { + let parameter = id_generator.generate(); + parameters.push(parameter); + mapping.insert(original_parameter.clone(), parameter); + } + + let responsible = if *fuzzable { + responsible_parameter + } else { + // This is a lambda with curly braces, so whoever is responsible + // for `needs` in the current scope is also responsible for + // `needs` in the lambda. + responsible_for_needs + }; + + for (id, expression) in &original_body.expressions { + compile_expression( + db, + id_generator, + &mut lambda_body, + mapping, + needs_function, + responsible, + id, + expression, + config, + ); + } + + let lambda = body.push_with_new_id( + id_generator, + Expression::Lambda { + parameters, + responsible_parameter, + body: lambda_body, + }, + ); + if config.register_fuzzables && *fuzzable { + let hir_definition = + body.push_with_new_id(id_generator, Expression::HirId(hir_id.clone())); + body.push_with_new_id( + id_generator, + Expression::TraceFoundFuzzableClosure { + hir_definition, + closure: lambda, + }, + ); + } + Expression::Reference(lambda) + } + hir::Expression::Call { + function, + arguments, + } => { + let responsible = + body.push_with_new_id(id_generator, Expression::HirId(hir_id.clone())); + let arguments = arguments + .iter() + .map(|argument| mapping[argument]) + .collect_vec(); + + if config.trace_calls { + let hir_call = + body.push_with_new_id(id_generator, Expression::HirId(hir_id.clone())); + body.push_with_new_id( + id_generator, + Expression::TraceCallStarts { + hir_call, + function: mapping[function], + arguments: arguments.clone(), + responsible, + }, + ); + } + let call = body.push_with_new_id( + id_generator, + Expression::Call { + function: mapping[function], + arguments, + responsible, + }, + ); + if config.trace_calls { + body.push_with_new_id( + id_generator, + Expression::TraceCallEnds { return_value: call }, + ); + } + Expression::Reference(call) + } + hir::Expression::UseModule { + current_module, + relative_path, + } => Expression::UseModule { + current_module: current_module.clone(), + relative_path: mapping[relative_path], + // The `UseModule` expression only exists in the generated `use` + // function. If a use fails, that's also the fault of the caller. + // Essentially, the `UseModule` expression works exactly like a + // `needs`. + responsible: responsible_for_needs, + }, + hir::Expression::Needs { condition, reason } => { + let responsible = + body.push_with_new_id(id_generator, Expression::HirId(hir_id.clone())); + Expression::Call { + function: needs_function, + arguments: vec![mapping[condition], mapping[reason], responsible_for_needs], + responsible, + } + } + hir::Expression::Error { errors, .. } => { + let reason = body.push_with_new_id( + id_generator, + Expression::Text(if errors.len() == 1 { + format!( + "The code still contains an error: {}", + errors.iter().next().unwrap().format_nicely(db) + ) + } else { + format!( + "The code still contains errors:\n{}", + errors + .iter() + .map(|error| format!("- {}", error.format_nicely(db))) + .join("\n"), + ) + }), + ); + let responsible = + body.push_with_new_id(id_generator, Expression::HirId(hir_id.clone())); + Expression::Panic { + reason, + responsible, + } + } + }; + + let id = body.push_with_new_id(id_generator, expression); + mapping.insert(hir_id.clone(), id); + + if config.trace_evaluated_expressions { + let hir_expression = body.push_with_new_id(id_generator, Expression::HirId(hir_id.clone())); + body.push_with_new_id( + id_generator, + Expression::TraceExpressionEvaluated { + hir_expression, + value: id, + }, + ); + body.push_with_new_id(id_generator, Expression::Reference(id)); + } +} + +impl CompilerError { + fn format_nicely(&self, db: &dyn HirToMir) -> String { + let (start_line, start_col) = db.offset_to_lsp(self.module.clone(), self.span.start); + let (end_line, end_col) = db.offset_to_lsp(self.module.clone(), self.span.end); + + format!( + "{}:{}:{} – {}:{}: {}", + self.module, start_line, start_col, end_line, end_col, self.payload + ) + } +} diff --git a/compiler/src/compiler/lir.rs b/compiler/src/compiler/lir.rs index a51606024..576840982 100644 --- a/compiler/src/compiler/lir.rs +++ b/compiler/src/compiler/lir.rs @@ -1,7 +1,7 @@ -use super::{error::CompilerError, hir::Id}; +use super::mir::Id; use crate::{builtin_functions::BuiltinFunction, hir, module::Module}; use itertools::Itertools; -use num_bigint::BigUint; +use num_bigint::BigInt; use std::fmt::Display; #[derive(Clone, Debug, PartialEq, Eq)] @@ -14,7 +14,7 @@ pub type StackOffset = usize; // 0 is the last item, 1 the one before that, etc. #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Instruction { /// Pushes an int. - CreateInt(BigUint), + CreateInt(BigInt), /// Pushes a text. CreateText(String), @@ -22,6 +22,11 @@ pub enum Instruction { /// Pushes a symbol. CreateSymbol(String), + /// Pushes a builtin function. + /// + /// a -> a, builtin + CreateBuiltin(BuiltinFunction), + /// Pops num_items items, pushes a list. /// /// a, item, item, ..., item -> a, pointer to list @@ -36,50 +41,45 @@ pub enum Instruction { num_fields: usize, }, + /// Pushes a HIR ID. + CreateHirId(hir::Id), + /// Pushes a closure. /// /// a -> a, pointer to closure CreateClosure { - id: hir::Id, captured: Vec, - num_args: usize, + num_args: usize, // excluding responsible parameter body: Vec, - is_curly: bool, }, - /// Pushes a builtin function. - /// - /// a -> a, builtin - CreateBuiltin(BuiltinFunction), - /// Pushes an item from back in the stack on the stack again. PushFromStack(StackOffset), /// Leaves the top stack item untouched, but removes n below. PopMultipleBelowTop(usize), - /// Pops a closure and num_args arguments, pushes the current instruction - /// pointer, all captured variables, and arguments, and then changes the - /// instruction pointer to the first instruction of the closure. + /// Sets up the data stack for a closure execution and then changes the + /// instruction pointer to the first instruction. /// - /// a, arg1, arg2, ..., argN, closure -> a, caller, captured vars, arg1, arg2, ..., argN + /// a, closure, arg1, arg2, ..., argN, responsible -> a, caller, captured vars, arg1, arg2, ..., argN, responsible /// /// Later, when the closure returns (perhaps many instructions after this /// one), the stack will contain the result: /// - /// a, arg1, arg2, ..., argN, closure ~> a, return value from closure + /// a, closure, arg1, arg2, ..., argN, responsible ~> a, return value from closure Call { - num_args: usize, + num_args: usize, // excluding the responsible argument }, - /// Returns from the current closure to the original caller. - /// - /// a, caller, return value -> a, return value + /// Returns from the current closure to the original caller. Leaves the data + /// stack untouched, but pops a caller from the call stack and returns the + /// instruction pointer to continue where the current function was called. Return, - /// Pops a string path and then resolves the path relative to the current - /// module. Then does different things depending on whether this is a code - /// or asset module. + /// Pops a string path and responsilbe HIR ID and then resolves the path + /// relative to the current module. Then does different things depending on + /// whether this is a code or asset module. /// /// - Code module: /// @@ -87,52 +87,135 @@ pub enum Instruction { /// when the module returns, the stack will contain the struct of the /// exported definitions: /// - /// a, path ~> a, structOfModuleExports + /// a, path, responsible ~> a, structOfModuleExports /// /// - Asset module: /// /// Loads the file and pushes its content onto the stack: /// - /// a, path -> a, listOfContentBytes + /// a, path, responsible -> a, listOfContentBytes UseModule { current_module: Module, }, - /// Contrary to other languages, in Candy it's always clear who's fault it - /// is when a program panics. Each fiber maintains a responsibility stack - /// which notes which call-site is responsible for needs to be fulfilled. - StartResponsibility(Id), - EndResponsibility, - - /// Pops a boolean condition and a reason. If the condition is true, it - /// just pushes Nothing. If the condition is false, it panics with the - /// reason. + /// Panics. Because the panic instruction only occurs inside the generated + /// needs function, the reason is already guaranteed to be a text. /// - /// a, condition, reason -> a, Nothing - Needs, + /// a, reason, responsible -> πŸ’₯ + Panic, - /// Indicates that a fuzzable closure sits at the top of the stack. - RegisterFuzzableClosure(hir::Id), + ModuleStarts { + module: Module, + }, + ModuleEnds, - TraceValueEvaluated(hir::Id), + /// a, HIR ID, function, arg1, arg2, ..., argN, responsible -> a TraceCallStarts { - id: hir::Id, num_args: usize, }, + + // a, return value -> a TraceCallEnds, - TraceNeedsStarts { - id: hir::Id, - }, - TraceNeedsEnds, - TraceModuleStarts { - module: Module, - }, - TraceModuleEnds, - Error { - id: hir::Id, - errors: Vec, - }, + /// a, HIR ID, value -> a + TraceExpressionEvaluated, + + /// a, HIR ID, closure -> a + TraceFoundFuzzableClosure, +} + +impl Instruction { + /// Applies the instruction's effect on the stack. After calling it, the + /// stack will be in the same state as when the control flow continues after + /// this instruction. + pub fn apply_to_stack(&self, stack: &mut Vec, result: Id) { + match self { + Instruction::CreateInt(_) => { + stack.push(result); + } + Instruction::CreateText(_) => { + stack.push(result); + } + Instruction::CreateSymbol(_) => { + stack.push(result); + } + Instruction::CreateBuiltin(_) => { + stack.push(result); + } + Instruction::CreateList { num_items } => { + stack.pop_multiple(*num_items); + stack.push(result); + } + Instruction::CreateStruct { num_fields } => { + stack.pop_multiple(2 * num_fields); // fields + stack.push(result); + } + Instruction::CreateHirId { .. } => { + stack.push(result); + } + Instruction::CreateClosure { .. } => { + stack.push(result); + } + Instruction::PushFromStack(_) => { + stack.push(result); + } + Instruction::PopMultipleBelowTop(n) => { + let top = stack.pop().unwrap(); + stack.pop_multiple(*n); + stack.push(top); + } + Instruction::Call { num_args } => { + stack.pop(); // responsible + stack.pop_multiple(*num_args); + stack.pop(); // closure/builtin + stack.push(result); // return value + } + Instruction::Return => { + // Only modifies the call stack and the instruction pointer. + // Leaves the return value untouched on the stack. + } + Instruction::UseModule { .. } => { + stack.pop(); // responsible + stack.pop(); // module path + stack.push(result); // exported members or bytes of file + } + Instruction::Panic => { + stack.pop(); // responsible + stack.pop(); // reason + stack.push(result); + } + Instruction::ModuleStarts { .. } => {} + Instruction::ModuleEnds => {} + Instruction::TraceCallStarts { num_args } => { + stack.pop(); // HIR ID + stack.pop(); // responsible + stack.pop_multiple(*num_args); + stack.pop(); // callee + } + Instruction::TraceCallEnds => { + stack.pop(); // return value + } + Instruction::TraceExpressionEvaluated => { + stack.pop(); // HIR ID + stack.pop(); // value + } + Instruction::TraceFoundFuzzableClosure => { + stack.pop(); // HIR ID + stack.pop(); // value + } + } + } +} + +trait StackExt { + fn pop_multiple(&mut self, n: usize); +} +impl StackExt for Vec { + fn pop_multiple(&mut self, n: usize) { + for _ in 0..n { + self.pop(); + } + } } impl Display for Instruction { @@ -147,16 +230,15 @@ impl Display for Instruction { Instruction::CreateStruct { num_fields } => { write!(f, "createStruct {num_fields}") } + Instruction::CreateHirId(id) => write!(f, "createHirId {id}"), Instruction::CreateClosure { - id, captured, num_args, body: instructions, - is_curly, } => { write!( f, - "createClosure {id} with {num_args} {} capturing {} {}", + "createClosure with {num_args} {} capturing {}", if *num_args == 1 { "argument" } else { @@ -167,11 +249,6 @@ impl Display for Instruction { } else { captured.iter().join(", ") }, - if *is_curly { - "(is curly)" - } else { - "(is not curly)" - }, )?; for instruction in instructions { let indented = format!("{instruction}") @@ -196,38 +273,18 @@ impl Display for Instruction { Instruction::UseModule { current_module } => { write!(f, "useModule (currently in {})", current_module) } - Instruction::StartResponsibility(responsible) => { - write!(f, "responsibility of {responsible} starts") + Instruction::Panic => write!(f, "panic"), + Instruction::ModuleStarts { module } => write!(f, "moduleStarts {module}"), + Instruction::ModuleEnds => write!(f, "moduleEnds"), + Instruction::TraceCallStarts { num_args } => { + write!(f, "trace: callStarts ({num_args} args)") } - Instruction::EndResponsibility => write!(f, "responsibility ends"), - Instruction::Needs => write!(f, "needs"), - Instruction::RegisterFuzzableClosure(hir_id) => { - write!(f, "registerFuzzableClosure {hir_id}") + Instruction::TraceCallEnds => write!(f, "trace: callEnds"), + Instruction::TraceExpressionEvaluated => { + write!(f, "trace: expressionEvaluated") } - Instruction::TraceValueEvaluated(hir_id) => { - write!(f, "traceValueEvaluated {hir_id}") - } - Instruction::TraceCallStarts { id, num_args } => { - write!(f, "traceCallStarts {id} ({num_args} args)") - } - Instruction::TraceCallEnds => write!(f, "traceCallEnds"), - Instruction::TraceNeedsStarts { id } => { - write!(f, "traceNeedsStarts {id}") - } - Instruction::TraceNeedsEnds => write!(f, "traceNeedsEnds"), - Instruction::TraceModuleStarts { module } => write!(f, "traceModuleStarts {module}"), - Instruction::TraceModuleEnds => write!(f, "traceModuleEnds"), - Instruction::Error { id, errors } => { - write!( - f, - "{} at {id}:", - if errors.len() == 1 { "error" } else { "errors" } - )?; - write!(f, "error(s) at {id}")?; - for error in errors { - write!(f, "\n {error:?}")?; - } - Ok(()) + Instruction::TraceFoundFuzzableClosure => { + write!(f, "trace: foundFuzzableClosure") } } } diff --git a/compiler/src/compiler/mir.rs b/compiler/src/compiler/mir.rs new file mode 100644 index 000000000..33324f303 --- /dev/null +++ b/compiler/src/compiler/mir.rs @@ -0,0 +1,670 @@ +use super::hir; +use crate::{ + builtin_functions::BuiltinFunction, + module::Module, + utils::{CountableId, IdGenerator}, +}; +use itertools::Itertools; +use num_bigint::BigInt; +use std::{cmp::Ordering, collections::HashMap, fmt, hash, mem}; + +#[derive(Clone, PartialEq, Eq)] +pub struct Mir { + pub id_generator: IdGenerator, + pub body: Body, +} + +#[derive(Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Id(usize); + +#[derive(Clone, PartialEq, Eq, Hash, Default)] +pub struct Body { + expressions: Vec<(Id, Expression)>, +} + +#[derive(Clone, PartialEq, Eq)] +pub enum Expression { + Int(BigInt), + Text(String), + Symbol(String), + Builtin(BuiltinFunction), + List(Vec), + Struct(Vec<(Id, Id)>), + Reference(Id), + /// A HIR ID that can be used to refer to code in the HIR. + HirId(hir::Id), + /// In the MIR, responsibilities are explicitly tracked. All lambdas take a + /// responsible HIR ID as an extra parameter. Based on whether the function + /// is fuzzable or not, this parameter may be used to dynamically determine + /// who's at fault if some `needs` is not fulfilled. + Lambda { + parameters: Vec, + responsible_parameter: Id, + body: Body, + }, + /// This expression is never contained in an actual MIR body, but when + /// dealing with expressions, its easier to not special-case IDs referring + /// to parameters. + Parameter, + Call { + function: Id, + arguments: Vec, + responsible: Id, + }, + UseModule { + current_module: Module, + relative_path: Id, + responsible: Id, + }, + /// This expression indicates that the code will panic. It's created if the + /// compiler can statically determine that some expression will always + /// panic. + Panic { + reason: Id, + responsible: Id, + }, + + /// For convenience when writing optimization passes, this expression allows + /// storing multiple inner expressions in a single expression. The expansion + /// back into multiple expressions happens in the [multiple flattening] + /// optimization. + /// + /// [multiple flattening]: super::optimize::multiple_flattening + Multiple(Body), + + /// Indicates that a module started. + /// + /// Unlike the trace instructions below, this expression is not optional – + /// it needs to always be compiled into the MIR because the `ModuleStarts` + /// and `ModuleEnds` instructions directly influence the import stack of the + /// VM and thereby the behavior of the program. Depending on the order of + /// instructions being executed, an import may succeed, or panic because of + /// a circular import. + /// + /// If there's no `use` between the `ModuleStarts` and `ModuleEnds` + /// expressions, they can be optimized away. + ModuleStarts { + module: Module, + }, + ModuleEnds, + + TraceCallStarts { + hir_call: Id, + function: Id, + arguments: Vec, + responsible: Id, + }, + TraceCallEnds { + return_value: Id, + }, + TraceExpressionEvaluated { + hir_expression: Id, + value: Id, + }, + TraceFoundFuzzableClosure { + hir_definition: Id, + closure: Id, + }, +} + +impl CountableId for Id { + fn from_usize(id: usize) -> Self { + Self(id) + } + fn to_usize(&self) -> usize { + self.0 + } +} + +impl Expression { + pub fn nothing() -> Self { + Expression::Symbol("Nothing".to_string()) + } +} +impl From for Expression { + fn from(value: bool) -> Self { + Expression::Symbol(if value { "True" } else { "False" }.to_string()) + } +} +impl TryInto for &Expression { + type Error = (); + + fn try_into(self) -> Result { + let Expression::Symbol(symbol) = self else { return Err(()); }; + match symbol.as_str() { + "True" => Ok(true), + "False" => Ok(false), + _ => Err(()), + } + } +} + +impl Body { + pub fn iter(&self) -> impl DoubleEndedIterator { + self.expressions + .iter() + .map(|(id, expression)| (*id, expression)) + } + pub fn iter_mut(&mut self) -> impl DoubleEndedIterator { + self.expressions + .iter_mut() + .map(|(id, expression)| (*id, expression)) + } + pub fn into_iter(self) -> impl DoubleEndedIterator { + self.expressions.into_iter() + } + pub fn return_value(&mut self) -> Id { + let (id, _) = self.expressions.last().unwrap(); + *id + } + + pub fn push(&mut self, id: Id, expression: Expression) { + self.expressions.push((id, expression)); + } + pub fn push_with_new_id( + &mut self, + id_generator: &mut IdGenerator, + expression: Expression, + ) -> Id { + let id = id_generator.generate(); + self.push(id, expression); + id + } + pub fn insert_at_front(&mut self, expressions: Vec<(Id, Expression)>) { + let old_expressions = mem::take(&mut self.expressions); + self.expressions.extend(expressions); + self.expressions.extend(old_expressions); + } + pub fn remove_all(&mut self, mut predicate: F) + where + F: FnMut(Id, &Expression) -> bool, + { + self.expressions + .retain(|(id, expression)| !predicate(*id, expression)); + } + pub fn sort_by(&mut self, predicate: F) + where + F: FnMut(&(Id, Expression), &(Id, Expression)) -> Ordering, + { + self.expressions.sort_by(predicate); + } + + /// Flattens all `Expression::Multiple`. + pub fn flatten_multiples(&mut self) { + let old_expressions = mem::take(&mut self.expressions); + + for (id, mut expression) in old_expressions.into_iter() { + if let Expression::Multiple(mut inner_body) = expression { + inner_body.flatten_multiples(); + let returned_by_inner = inner_body.return_value(); + for (id, expression) in inner_body.expressions { + self.expressions.push((id, expression)); + } + self.expressions + .push((id, Expression::Reference(returned_by_inner))); + } else { + if let Expression::Lambda { body, .. } = &mut expression { + body.flatten_multiples(); + } + self.expressions.push((id, expression)); + } + } + } + + pub fn visit(&mut self, visitor: &mut dyn FnMut(Id, &mut Expression, bool)) { + let length = self.expressions.len(); + for i in 0..length { + let (id, expression) = self.expressions.get_mut(i).unwrap(); + Self::visit_expression(*id, expression, i == length - 1, visitor); + } + } + fn visit_expression( + id: Id, + expression: &mut Expression, + is_returned: bool, + visitor: &mut dyn FnMut(Id, &mut Expression, bool), + ) { + if let Expression::Lambda { body, .. } | Expression::Multiple(body) = expression { + body.visit(visitor); + } + visitor(id, expression, is_returned); + } + + /// Calls the visitor for each contained expression, even expressions in + /// lambdas or multiples. + /// + /// The visitor is called in inside-out order, so if the body contains a + /// lambda, the visitor is first called for its body expressions and only + /// then for the lambda expression itself. + /// + /// The visitor takes the ID of the current expression as well as the + /// expression itself. It also takes `VisibleExpressions`, which allows it + /// to inspect all expressions currently in scope. Finally, the visitor also + /// receives whether the current expression is returned from the surrounding + /// body. + pub fn visit_with_visible( + &mut self, + visitor: &mut dyn FnMut(Id, &mut Expression, &VisibleExpressions, bool), + ) { + self.visit_with_visible_rec(&mut VisibleExpressions::none_visible(), visitor); + } + fn visit_with_visible_rec( + &mut self, + visible: &mut VisibleExpressions, + visitor: &mut dyn FnMut(Id, &mut Expression, &VisibleExpressions, bool), + ) { + let expressions_in_this_body = self.expressions.iter().map(|(id, _)| *id).collect_vec(); + let length = expressions_in_this_body.len(); + + for index in 0..length { + let (id, mut expression) = mem::replace( + self.expressions.get_mut(index).unwrap(), + (Id::from_usize(0), Expression::Parameter), + ); + let is_returned = index == length - 1; + Self::visit_expression_with_visible(id, &mut expression, visible, is_returned, visitor); + visible.insert(id, expression); + } + + for (index, id) in expressions_in_this_body.iter().enumerate() { + *self.expressions.get_mut(index).unwrap() = + (*id, visible.expressions.remove(id).unwrap()); + } + } + fn visit_expression_with_visible( + id: Id, + expression: &mut Expression, + visible: &mut VisibleExpressions, + is_returned: bool, + visitor: &mut dyn FnMut(Id, &mut Expression, &VisibleExpressions, bool), + ) { + if let Expression::Lambda { + parameters, + responsible_parameter, + body, + .. + } = expression + { + for parameter in parameters.iter() { + visible.insert(*parameter, Expression::Parameter); + } + visible.insert(*responsible_parameter, Expression::Parameter); + body.visit_with_visible_rec(visible, visitor); + for parameter in parameters.iter() { + visible.expressions.remove(parameter); + } + visible.expressions.remove(responsible_parameter); + } + if let Expression::Multiple(body) = expression { + body.visit_with_visible_rec(visible, visitor); + } + + visitor(id, expression, visible, is_returned); + } + + pub fn visit_bodies(&mut self, visitor: &mut dyn FnMut(&mut Body)) { + for (_, expression) in self.iter_mut() { + expression.visit_bodies(visitor); + } + visitor(self); + } +} +impl Expression { + pub fn visit_bodies(&mut self, visitor: &mut dyn FnMut(&mut Body)) { + match self { + Expression::Lambda { body, .. } => body.visit_bodies(visitor), + Expression::Multiple(body) => body.visit_bodies(visitor), + _ => {} + } + } +} + +#[derive(Clone)] +pub struct VisibleExpressions { + expressions: HashMap, +} +impl VisibleExpressions { + pub fn none_visible() -> Self { + Self { + expressions: HashMap::new(), + } + } + pub fn insert(&mut self, id: Id, expression: Expression) { + self.expressions.insert(id, expression); + } + pub fn get(&self, id: Id) -> &Expression { + self.expressions.get(&id).unwrap() + } + pub fn contains(&self, id: Id) -> bool { + self.expressions.contains_key(&id) + } +} + +#[test] +fn test_multiple_flattening() { + use crate::{builtin_functions::BuiltinFunction, compiler::mir::Expression}; + + // $0 = + // $1 = builtinEquals + // + // # becomes: + // $0 = builtinEquals + // $1 = $0 + let mut mir = Mir::build(|body| { + body.push_multiple(|body| { + body.push(Expression::Builtin(BuiltinFunction::Equals)); + }); + }); + mir.flatten_multiples(); + mir.normalize_ids(); + assert_eq!( + mir, + Mir::build(|body| { + let inlined = body.push(Expression::Builtin(BuiltinFunction::Equals)); + body.push(Expression::Reference(inlined)); + }), + ); +} + +#[allow(clippy::derive_hash_xor_eq)] +impl hash::Hash for Expression { + fn hash(&self, state: &mut H) { + core::mem::discriminant(self).hash(state); + match self { + Expression::Int(int) => int.hash(state), + Expression::Text(text) => text.hash(state), + Expression::Symbol(symbol) => symbol.hash(state), + Expression::Builtin(builtin) => builtin.hash(state), + Expression::List(items) => items.hash(state), + Expression::Struct(fields) => fields.len().hash(state), + Expression::Reference(id) => id.hash(state), + Expression::HirId(id) => id.hash(state), + Expression::Lambda { + parameters, + responsible_parameter, + body, + } => { + parameters.hash(state); + responsible_parameter.hash(state); + body.hash(state); + } + Expression::Parameter => {} + Expression::Call { + function, + arguments, + responsible, + } => { + function.hash(state); + arguments.hash(state); + responsible.hash(state); + } + Expression::UseModule { + current_module, + relative_path, + responsible, + } => { + current_module.hash(state); + relative_path.hash(state); + responsible.hash(state); + } + Expression::Panic { + reason, + responsible, + } => { + reason.hash(state); + responsible.hash(state); + } + Expression::Multiple(body) => body.hash(state), + Expression::ModuleStarts { module } => module.hash(state), + Expression::ModuleEnds => {} + Expression::TraceCallStarts { + hir_call, + function, + arguments, + responsible, + } => { + hir_call.hash(state); + function.hash(state); + arguments.hash(state); + responsible.hash(state); + } + Expression::TraceCallEnds { return_value } => return_value.hash(state), + Expression::TraceExpressionEvaluated { + hir_expression, + value, + } => { + hir_expression.hash(state); + value.hash(state); + } + Expression::TraceFoundFuzzableClosure { + hir_definition, + closure, + } => { + hir_definition.hash(state); + closure.hash(state); + } + } + } +} + +impl Mir { + // For now, this is only used in tests. + #[cfg(test)] + pub fn build(function: F) -> Self { + let mut id_generator = IdGenerator::start_at(0); + let mut builder = MirBodyBuilder::with_generator(&mut id_generator); + function(&mut builder); + assert!(builder.parameters.is_empty()); + let body = builder.body; + + Mir { id_generator, body } + } +} +impl Expression { + // The builder function takes the builder and the responsible parameter. + pub fn build_lambda( + id_generator: &mut IdGenerator, + function: F, + ) -> Self { + let responsible_parameter = id_generator.generate(); + let mut builder = MirBodyBuilder::with_generator(id_generator); + function(&mut builder, responsible_parameter); + + Expression::Lambda { + parameters: builder.parameters, + responsible_parameter, + body: builder.body, + } + } +} +pub struct MirBodyBuilder<'a> { + id_generator: &'a mut IdGenerator, + parameters: Vec, + body: Body, +} +impl<'a> MirBodyBuilder<'a> { + fn with_generator(id_generator: &'a mut IdGenerator) -> Self { + MirBodyBuilder { + id_generator, + parameters: vec![], + body: Body::default(), + } + } + pub fn new_parameter(&mut self) -> Id { + let id = self.id_generator.generate(); + self.parameters.push(id); + id + } + pub fn push(&mut self, expression: Expression) -> Id { + self.body.push_with_new_id(self.id_generator, expression) + } + pub fn push_lambda(&mut self, function: F) -> Id { + let lambda = Expression::build_lambda(self.id_generator, function); + self.push(lambda) + } + #[cfg(test)] + pub fn push_multiple(&mut self, function: F) -> Id { + let mut builder = MirBodyBuilder::with_generator(self.id_generator); + function(&mut builder); + assert!(builder.parameters.is_empty()); + let body = builder.body; + self.push(Expression::Multiple(body)) + } +} + +impl fmt::Display for Mir { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.body) + } +} +impl fmt::Debug for Mir { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.body) + } +} +impl fmt::Display for Body { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (id, expression) in &self.expressions { + writeln!(f, "{id} = {expression:?}")?; + } + Ok(()) + } +} +impl fmt::Debug for Id { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "${}", self.0) + } +} +impl fmt::Display for Id { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "${}", self.0) + } +} +impl fmt::Debug for Expression { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Expression::Int(int) => write!(f, "{int}"), + Expression::Text(text) => write!(f, "{text:?}"), + Expression::Symbol(symbol) => write!(f, "{symbol}"), + Expression::Builtin(builtin) => write!(f, "builtin{builtin:?}"), + Expression::List(items) => write!( + f, + "({})", + if items.is_empty() { + ",".to_string() + } else { + items.iter().map(|item| format!("{item}")).join(", ") + }, + ), + Expression::Struct(fields) => write!( + f, + "[{}]", + fields + .iter() + .map(|(key, value)| format!("{key}: {value}")) + .join(", "), + ), + Expression::Reference(id) => write!(f, "{id}"), + Expression::HirId(id) => write!(f, "{id}"), + Expression::Lambda { + parameters, + responsible_parameter, + body, + } => write!( + f, + "{{ {} ->\n{}\n}}", + if parameters.is_empty() { + format!("(responsible {responsible_parameter})") + } else { + format!( + "{} (+ responsible {responsible_parameter})", + parameters + .iter() + .map(|parameter| format!("{parameter}")) + .join(" "), + ) + }, + format!("{body}") + .lines() + .map(|line| format!(" {line}")) + .join("\n"), + ), + Expression::Parameter => write!(f, "parameter"), + Expression::Call { + function, + arguments, + responsible, + } => write!( + f, + "call {function} with {} ({responsible} is responsible)", + if arguments.is_empty() { + "no arguments".to_string() + } else { + arguments.iter().map(|arg| format!("{arg}")).join(" ") + }, + ), + Expression::UseModule { + current_module, + relative_path, + responsible, + } => write!( + f, + "use {relative_path} (relative to {current_module}; {responsible} is responsible)", + ), + Expression::Panic { + reason, + responsible, + } => write!(f, "panicking because {reason} ({responsible} is at fault)"), + Expression::Multiple(body) => write!( + f, + "\n{}", + format!("{body}") + .lines() + .map(|line| format!(" {line}")) + .join("\n"), + ), + Expression::ModuleStarts { module } => write!(f, "module {module} starts"), + Expression::ModuleEnds => write!(f, "module ends"), + Expression::TraceCallStarts { + hir_call, + function, + arguments, + responsible, + } => write!(f, + "trace: start of call of {function} with {} ({responsible} is responsible, code is at {hir_call})", + arguments.iter().map(|arg| format!("{arg}")).join(" "), + ), + Expression::TraceCallEnds { return_value } => { + write!(f, "trace: end of call with return value {return_value}") + } + Expression::TraceExpressionEvaluated { + hir_expression, + value, + } => { + write!(f, "trace: expression {hir_expression} evaluated to {value}") + } + Expression::TraceFoundFuzzableClosure { + hir_definition, + closure, + } => { + write!( + f, + "trace: found fuzzable closure {closure}, defined at {hir_definition}", + ) + } + } + } +} +impl fmt::Debug for VisibleExpressions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + self.expressions + .keys() + .sorted() + .map(|id| format!("{id}")) + .join(", ") + ) + } +} diff --git a/compiler/src/compiler/mir_optimize/cleanup.rs b/compiler/src/compiler/mir_optimize/cleanup.rs new file mode 100644 index 000000000..094d48d1d --- /dev/null +++ b/compiler/src/compiler/mir_optimize/cleanup.rs @@ -0,0 +1,82 @@ +//! Cleanup makes the MIR more regular. Thus, it's easier to read for humans and +//! salsa should have an easier time caching optimized MIRs. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! $4 = "Banana" | $0 = "Apple" +//! $8 = Foo | $1 = "Banana" +//! $2 = "Apple" | $2 = Foo +//! ... | ... +//! ``` + +use crate::{ + compiler::mir::{Body, Expression, Id, Mir}, + utils::IdGenerator, +}; +use std::{collections::HashMap, mem}; + +impl Mir { + pub fn cleanup(&mut self) { + self.sort_leading_constants(); + self.normalize_ids(); + } + + /// Sorts the leading constants in the body. This wouldn't be super useful + /// when applied to an unoptimized MIR, but because we optimize it using + /// [constant lifting], we can assume that all constants at the beginning + /// of the body. + /// + /// [constant lifting]: super::constant_lifting + fn sort_leading_constants(&mut self) { + let mut still_constants = true; + let old_body = mem::take(&mut self.body); + for (id, expression) in old_body.into_iter() { + if still_constants && !expression.is_pure() { + still_constants = false; + Self::sort_constants(&mut self.body); + } + self.body.push(id, expression); + } + if still_constants { + Self::sort_constants(&mut self.body); + } + } + /// Assumes that the given body contains only constants. + fn sort_constants(body: &mut Body) { + body.sort_by(|(_, a), (_, b)| { + fn order_score(expr: &Expression) -> u8 { + match expr { + Expression::HirId(_) => 0, + Expression::Builtin(_) => 1, + Expression::Symbol(_) => 2, + Expression::Int(_) => 3, + Expression::Text(_) => 4, + _ => 5, + } + } + match (a, b) { + (Expression::HirId(a), Expression::HirId(b)) => format!("{a}").cmp(&format!("{b}")), + (Expression::Builtin(a), Expression::Builtin(b)) => { + format!("{a:?}").cmp(&format!("{b:?}")) + } + (Expression::Symbol(a), Expression::Symbol(b)) => a.cmp(b), + (Expression::Int(a), Expression::Int(b)) => a.cmp(b), + (Expression::Text(a), Expression::Text(b)) => a.cmp(b), + _ => order_score(a).cmp(&order_score(b)), + } + }); + } + + pub fn normalize_ids(&mut self) { + let mut generator = IdGenerator::start_at(0); + let mapping: HashMap = self + .body + .defined_ids() + .into_iter() + .map(|id| (id, generator.generate())) + .collect(); + + self.body.replace_ids(&mut |id| *id = mapping[id]) + } +} diff --git a/compiler/src/compiler/mir_optimize/common_subtree_elimination.rs b/compiler/src/compiler/mir_optimize/common_subtree_elimination.rs new file mode 100644 index 000000000..4a9d3601c --- /dev/null +++ b/compiler/src/compiler/mir_optimize/common_subtree_elimination.rs @@ -0,0 +1,72 @@ +//! Common subtree elimination deduplicates pure expressions that yield the same +//! value. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! $0 = builtinIntAdd | $0 = builtinIntAdd +//! $1 = 2 | $1 = 2 +//! $2 = 2 | $2 = $1 +//! $3 = call $0 with $1 $2 | $3 = call $0 with $1 $2 +//! ``` +//! +//! This is especially effective after [constant lifting] because lots of +//! constants are in the same scope. This optimization is also a necessity to +//! avoid exponential code blowup when importing modules – after +//! [module folding], a lot of duplicate functions exist. +//! +//! [constant lifting]: super::constant_lifting +//! [module folding]: super::module_folding + +use crate::{ + compiler::mir::{Expression, Id, Mir}, + utils::{CountableId, IdGenerator}, +}; +use std::collections::HashMap; + +impl Mir { + pub fn eliminate_common_subtrees(&mut self) { + let mut pure_expressions: HashMap = HashMap::new(); + + self.body.visit_with_visible(&mut |id, expression, visible, _| { + if !expression.is_pure() { + return; + } + + let mut normalized = expression.clone(); + normalized.normalize(); + + if let Some(id_of_same_expression) = pure_expressions.get(&normalized) && visible.contains(*id_of_same_expression) { + *expression = Expression::Reference(*id_of_same_expression); + } else { + pure_expressions.insert(normalized, id); + } + }); + } +} + +impl Expression { + /// Two lambdas where local expressions have different IDs are usually not + /// considered equal. This method normalizes expressions by replacing all + /// locally defined IDs. + fn normalize(&mut self) { + let mut generator = IdGenerator::start_at( + self.captured_ids() + .into_iter() + .max() + .map(|id| id.to_usize() + 1) + .unwrap_or(0), + ); + let mapping: HashMap = self + .defined_ids() + .into_iter() + .map(|id| (id, generator.generate())) + .collect(); + + self.replace_ids(&mut |id| { + if let Some(replacement) = mapping.get(id) { + *id = *replacement; + } + }) + } +} diff --git a/compiler/src/compiler/mir_optimize/complexity.rs b/compiler/src/compiler/mir_optimize/complexity.rs new file mode 100644 index 000000000..8f97fe97b --- /dev/null +++ b/compiler/src/compiler/mir_optimize/complexity.rs @@ -0,0 +1,88 @@ +use crate::compiler::mir::{Body, Expression, Mir}; +use core::fmt; +use std::{cmp::Ordering, ops::Add}; + +pub struct Complexity { + pub is_self_contained: bool, + pub expressions: usize, +} + +impl Complexity { + fn none() -> Self { + Self { + is_self_contained: true, + expressions: 0, + } + } + fn single_expression() -> Self { + Self { + is_self_contained: true, + expressions: 1, + } + } +} +impl Add for Complexity { + type Output = Complexity; + + fn add(self, other: Self) -> Self::Output { + Complexity { + is_self_contained: self.is_self_contained && other.is_self_contained, + expressions: self.expressions + other.expressions, + } + } +} +impl PartialOrd for Complexity { + fn partial_cmp(&self, other: &Self) -> Option { + match (self.is_self_contained, other.is_self_contained) { + (false, false) => None, + (false, true) => Some(Ordering::Greater), + (true, false) => Some(Ordering::Less), + (true, true) => self.expressions.partial_cmp(&other.expressions), + } + } +} +impl PartialEq for Complexity { + fn eq(&self, other: &Self) -> bool { + self.partial_cmp(other) == Some(Ordering::Equal) + } +} +impl fmt::Display for Complexity { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}, {} expressions", + if self.is_self_contained { + "self-contained" + } else { + "still contains `use`" + }, + self.expressions, + ) + } +} + +impl Mir { + pub fn complexity(&self) -> Complexity { + self.body.complexity() + } +} +impl Body { + pub fn complexity(&self) -> Complexity { + self.iter() + .fold(Complexity::none(), |complexity, (_, expression)| { + complexity + expression.complexity() + }) + } +} +impl Expression { + fn complexity(&self) -> Complexity { + match self { + Expression::Lambda { body, .. } => Complexity::single_expression() + body.complexity(), + Expression::UseModule { .. } => Complexity { + is_self_contained: false, + expressions: 1, + }, + _ => Complexity::single_expression(), + } + } +} diff --git a/compiler/src/compiler/mir_optimize/constant_folding.rs b/compiler/src/compiler/mir_optimize/constant_folding.rs new file mode 100644 index 000000000..632124aee --- /dev/null +++ b/compiler/src/compiler/mir_optimize/constant_folding.rs @@ -0,0 +1,240 @@ +//! Constant folding is just a fancy term for executing instructions at +//! compile-time when their result is known. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! $0 = builtinIntAdd | $0 = builtinIntAdd +//! $1 = 2 | $1 = 2 +//! $2 = call $0 with $1 $1 | $2 = 4 +//! ``` +//! +//! Afterwards, [tree shaking] can remove unneeded arguments. In the example +//! above, only `$2` would remain. +//! +//! Not all arguments need to be compile-time known. For example, even this code +//! could be simplified: +//! +//! ```mir +//! $0 = Foo | $0 = Foo +//! $1 = struct [$0: $a] | $1 = struct [$0: $a] +//! $2 = builtinStructGet | $2 = builtinStructGet +//! $3 = call $3 with $1 $0 | $3 = $a +//! ``` +//! +//! Not only builtins can be compile-time evaluated: Needs and compile-time +//! errors from previous compilation stages can possibly also be executed at +//! compile-time. +//! +//! [tree shaking]: super::tree_shaking + +use crate::{ + builtin_functions::BuiltinFunction, + compiler::mir::{Body, Expression, Id, Mir, VisibleExpressions}, +}; + +impl Mir { + pub fn fold_constants(&mut self) { + self.body + .visit_with_visible(&mut |_, expression, visible, _| { + let Expression::Call { + function, + arguments, + responsible, + } = expression else { return; }; + let Expression::Builtin(builtin) = visible.get(*function) else { return; }; + let Some(result) = Self::run_builtin(*builtin, arguments, *responsible, visible) else { + return; + }; + let evaluated_call = match result { + Ok(return_value) => return_value, + Err(panic_reason) => { + let mut body = Body::default(); + let reason = body.push_with_new_id( + &mut self.id_generator, + Expression::Text(panic_reason), + ); + body.push_with_new_id( + &mut self.id_generator, + Expression::Panic { + reason, + responsible: *responsible, + }, + ); + Expression::Multiple(body) + } + }; + *expression = evaluated_call; + }); + } + + /// This function tries to run a builtin, requiring a minimal amount of + /// static knowledge. For example, it can find out that the result of + /// `builtinEquals $3 $3` is `True`, even if the value of `$3` is not known + /// at compile-time. + /// + /// Returns `None` if the call couldn't be evaluated statically. Returns + /// `Some(Ok(expression))` if the call successfully completed with a return + /// value. Returns `Some(Err(reason))` if the call panics. + fn run_builtin( + builtin: BuiltinFunction, + arguments: &[Id], + responsible: Id, + visible: &VisibleExpressions, + ) -> Option> { + let return_value = match builtin { + BuiltinFunction::Equals => { + if arguments.len() != 2 { + return Some(Err("wrong number of arguments".to_string())); + } + + let a = arguments[0]; + let b = arguments[1]; + + let are_equal = a.semantically_equals(b, visible)?; + Expression::Symbol(if are_equal { "True" } else { "False" }.to_string()) + } + BuiltinFunction::FunctionRun => { + if arguments.len() != 1 { + return Some(Err("wrong number of arguments".to_string())); + } + Expression::Call { + function: arguments[0], + arguments: vec![], + responsible, + } + } + BuiltinFunction::IfElse => { + if arguments.len() != 3 { + return Some(Err("wrong number of arguments".to_string())); + } + + let condition = arguments[0]; + let then_body = arguments[1]; + let else_body = arguments[2]; + + let Ok(condition) = visible.get(condition).try_into() else { + return None; + }; + Expression::Call { + function: if condition { then_body } else { else_body }, + arguments: vec![], + responsible, + } + } + BuiltinFunction::StructGet => { + if arguments.len() != 2 { + return Some(Err("wrong number of arguments".to_string())); + } + + let struct_id = arguments[0]; + let key_id = arguments[1]; + + // TODO: Also catch this being called on a non-struct and + // statically panic in that case. + let Expression::Struct(fields) = visible.get(struct_id) else { + return None; + }; + + // TODO: Relax this requirement. Even if not all keys are + // constant, we may still conclude the result of the builtin: + // If one key `semantically_equals` the requested one and all + // others are definitely not, then we can still resolve that. + if !visible.get(key_id).is_constant(visible) { + return None; + } + if fields + .iter() + .any(|(key, _)| !visible.get(*key).is_constant(visible)) + { + return None; + } + + let value = fields + .iter() + .rev() + .find(|(k, _)| k.semantically_equals(key_id, visible).unwrap_or(false)) + .map(|(_, value)| *value); + if let Some(value) = value { + Expression::Reference(value) + } else { + return Some(Err(format!( + "Struct access will panic because key {:?} isn't in there.", + visible.get(key_id), + ))); + } + } + BuiltinFunction::TypeOf => { + if arguments.len() != 1 { + return Some(Err("wrong number of arguments".to_string())); + } + + match visible.get(arguments[0]) { + Expression::Int(_) => Expression::Symbol("Int".to_string()), + Expression::Text(_) => Expression::Symbol("Text".to_string()), + Expression::Symbol(_) => Expression::Symbol("Symbol".to_string()), + Expression::Builtin(_) => Expression::Symbol("Function".to_string()), + Expression::List(_) => Expression::Symbol("List".to_string()), + Expression::Struct(_) => Expression::Symbol("Struct".to_string()), + Expression::Reference(_) => return None, + Expression::HirId(_) => unreachable!(), + Expression::Lambda { .. } => Expression::Symbol("Function".to_string()), + Expression::Parameter => return None, + Expression::Call { function, .. } => { + let callee = visible.get(*function); + let Expression::Builtin(builtin) = callee else { + return None; + }; + let return_type = match builtin { + BuiltinFunction::Equals => "Symbol", + BuiltinFunction::GetArgumentCount => "Int", + BuiltinFunction::IntAdd => "Int", + BuiltinFunction::IntBitLength => "Int", + BuiltinFunction::IntBitwiseAnd => "Int", + BuiltinFunction::IntBitwiseOr => "Int", + BuiltinFunction::IntBitwiseXor => "Int", + BuiltinFunction::IntCompareTo => "Symbol", + BuiltinFunction::IntDivideTruncating => "Int", + BuiltinFunction::IntModulo => "Int", + BuiltinFunction::IntMultiply => "Int", + BuiltinFunction::IntRemainder => "Int", + BuiltinFunction::IntShiftLeft => "Int", + BuiltinFunction::IntShiftRight => "Int", + BuiltinFunction::IntSubtract => "Int", + BuiltinFunction::ListFilled => "List", + BuiltinFunction::ListInsert => "List", + BuiltinFunction::ListLength => "Int", + BuiltinFunction::ListRemoveAt => "List", + BuiltinFunction::ListReplace => "List", + BuiltinFunction::StructHasKey => "Symbol", + BuiltinFunction::TextCharacters => "List", + BuiltinFunction::TextConcatenate => "Text", + BuiltinFunction::TextContains => "Symbol", + BuiltinFunction::TextEndsWith => "Symbol", + BuiltinFunction::TextGetRange => "Text", + BuiltinFunction::TextIsEmpty => "Symbol", + BuiltinFunction::TextLength => "Int", + BuiltinFunction::TextStartsWith => "Symbol", + BuiltinFunction::TextTrimEnd => "Text", + BuiltinFunction::TextTrimStart => "Text", + BuiltinFunction::TypeOf => "Symbol", + _ => return None, + }; + Expression::Symbol(return_type.to_string()) + } + Expression::UseModule { .. } => return None, + Expression::Panic { .. } => return None, + Expression::Multiple(_) => return None, + Expression::ModuleStarts { .. } + | Expression::ModuleEnds + | Expression::TraceCallStarts { .. } + | Expression::TraceCallEnds { .. } + | Expression::TraceExpressionEvaluated { .. } + | Expression::TraceFoundFuzzableClosure { .. } => unreachable!(), + } + } + _ => return None, + }; + Some(Ok(return_value)) + } +} diff --git a/compiler/src/compiler/mir_optimize/constant_lifting.rs b/compiler/src/compiler/mir_optimize/constant_lifting.rs new file mode 100644 index 000000000..10dd40e1e --- /dev/null +++ b/compiler/src/compiler/mir_optimize/constant_lifting.rs @@ -0,0 +1,102 @@ +//! Constant lifting refers to lifting constants from lambdas into surrounding +//! scopes. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! | $2 = Foo +//! | $5 = Foo +//! $0 = { ($1 responsible) -> | $0 = { ($1 responsible) -> +//! $2 = Foo | +//! ... | ... +//! } | } +//! $3 = { ($4 responsible) -> | $3 = { ($4 responsible) -> +//! $5 = Foo | +//! ... | ... +//! } | } +//! ``` +//! +//! This enables more effective [common subtree elimination] and is especially +//! important for avoiding an exponential code blowup when importing modules. +//! +//! When the lifted constant is the last in a body (aka it's the body's return +//! value), a reference expression is inserted in its place. +//! +//! ```mir +//! | $2 = Foo +//! $0 = { ($1 responsible) -> | $0 = { ($1 responsible) -> +//! $2 = Foo | $3 = $2 +//! } | } +//! ``` +//! +//! TODO: Have a separate constant heap directly in the LIR, so that +//! instructions such as `Instruction::CreateInt` are never actually executed at +//! runtime. +//! +//! [common subtree elimination]: super::common_subtree_elimination + +use crate::{ + compiler::mir::{Body, Expression, Id, Mir}, + utils::IdGenerator, +}; +use std::collections::HashSet; + +impl Mir { + pub fn lift_constants(&mut self) { + // Expressions in the top level should not be lifted as that would just + // mean moving some constants and then creating references to them in + // the original places. + let top_level_ids = self.body.iter().map(|(id, _)| id).collect::>(); + + let mut constants = vec![]; + let mut constant_ids = HashSet::new(); + + self.body.visit(&mut |id, expression, is_return_value| { + if top_level_ids.contains(&id) { + return; + } + let is_constant = expression.is_pure() + && expression + .captured_ids() + .iter() + .all(|captured| constant_ids.contains(captured)); + if !is_constant { + return; + } + if is_return_value && let Expression::Reference(_) = expression { + // Returned references shouldn't be lifted. If we would lift + // one, we'd have to add a reference anyway. + return; + } + constants.push((id, expression.clone())); + constant_ids.insert(id); + }); + + self.body.visit_bodies(&mut |body| { + Self::remove_constants(body, &constant_ids, &mut self.id_generator) + }); + Self::remove_constants(&mut self.body, &constant_ids, &mut self.id_generator); + for (_, expression) in &mut constants { + expression.visit_bodies(&mut |body| { + Self::remove_constants(body, &constant_ids, &mut self.id_generator); + }) + } + + self.body.insert_at_front(constants); + } + + fn remove_constants( + body: &mut Body, + constant_ids: &HashSet, + id_generator: &mut IdGenerator, + ) { + let return_value = body.return_value(); + body.remove_all(|id, _| constant_ids.contains(&id)); + + if constant_ids.contains(&return_value) { + // The return value was removed. Add a reference to the lifted + // constant. + body.push(id_generator.generate(), Expression::Reference(return_value)); + } + } +} diff --git a/compiler/src/compiler/mir_optimize/inlining.rs b/compiler/src/compiler/mir_optimize/inlining.rs new file mode 100644 index 000000000..d8af49266 --- /dev/null +++ b/compiler/src/compiler/mir_optimize/inlining.rs @@ -0,0 +1,146 @@ +//! Inlining means inserting a lambda's code at the caller site. +//! +//! Here's a before-and-after example of a `use "Core"` call being inlined: +//! +//! ```mir +//! # before: +//! $0 = { $1 ($2 responsible) -> +//! $3 = use $1 relative to here, $2 responsible +//! } +//! $4 = "Core" +//! $5 = HirId(the `use "Core"` expression) +//! $6 = call $0 with $4 ($5 is responsible) +//! +//! # after: +//! $0 = { $1 ($2 responsible) -> +//! $3 = use $1 relative to here, $2 responsible +//! } +//! $4 = "Core" +//! $5 = HirId(the `use "Core"` expression) +//! $6 = +//! $7 = use $4 relative to here, $5 responsible +//! ``` +//! +//! Inlining makes lots of other optimizations more effective, in partiuclar +//! [tree shaking] of lambdas that were inlined into all call sites. Because at +//! the call sites, more information about arguments exist, [constant folding] +//! and [module folding] can be more effective. +//! +//! TODO: When we have a metric for judging performance vs. code size, also +//! speculatively inline more call sites, such as smallish functions and +//! functions only used once. +//! +//! [constant folding]: super::constant_folding +//! [module folding]: super::module_folding +//! [tree shaking]: super::tree_shaking + +use crate::{ + compiler::mir::{Expression, Id, Mir, VisibleExpressions}, + utils::IdGenerator, +}; +use std::collections::{HashMap, HashSet}; + +use super::complexity::Complexity; + +impl Expression { + pub fn inline_call( + &mut self, + visible: &VisibleExpressions, + id_generator: &mut IdGenerator, + ) -> Result<(), &'static str> { + let Expression::Call { + function, + arguments, + responsible: responsible_argument, + } = self else { + return Err("Tried to inline, but the expression is not a call."); + }; + let Expression::Lambda { + parameters, + responsible_parameter, + body, + } = visible.get(*function) else { + return Err("Tried to inline, but the callee is not a lambda."); + }; + if arguments.len() != parameters.len() { + return Err("Tried to inline, but the number of arguments doesn't match the expected parameter count."); + } + + let id_mapping: HashMap = parameters + .iter() + .zip(arguments.iter()) + .map(|(parameter, argument)| (*parameter, *argument)) + .chain([(*responsible_parameter, *responsible_argument)]) + .chain( + body.defined_ids() + .into_iter() + .map(|id| (id, id_generator.generate())), + ) + .collect(); + let mut inlined_body = body.clone(); + inlined_body.replace_ids(&mut |id| { + if let Some(replacement) = id_mapping.get(id) { + *id = *replacement; + } + }); + + *self = Expression::Multiple(inlined_body); + + Ok(()) + } +} + +impl Mir { + pub fn inline_functions_containing_use(&mut self) { + let mut functions_with_use = HashSet::new(); + for (id, expression) in self.body.iter() { + if let Expression::Lambda { body, .. } = expression && + body.iter().any(|(_, expr)| matches!(expr, Expression::UseModule { .. })) { + functions_with_use.insert(id); + } + } + + self.body.visit_with_visible(&mut |_, expression, visible, _| { + if let Expression::Call { function, .. } = expression && functions_with_use.contains(function) { + // If inlining fails with an `Err`, there's nothing we can do + // except apply other optimizations first and then try again + // later. + let _ = expression.inline_call(visible, &mut self.id_generator); + } + }); + } + + pub fn inline_functions_of_maximum_complexity(&mut self, complexity: Complexity) { + let mut small_functions = HashSet::new(); + for (id, expression) in self.body.iter() { + if let Expression::Lambda { body, .. } = expression && body.complexity() <= complexity { + small_functions.insert(id); + } + } + + self.body.visit_with_visible(&mut |_, expression, visible, _| { + if let Expression::Call { function, .. } = expression && small_functions.contains(function) { + let _ = expression.inline_call(visible, &mut self.id_generator); + } + }); + } + + pub fn inline_tiny_functions(&mut self) { + self.inline_functions_of_maximum_complexity(Complexity { + is_self_contained: true, + expressions: 1, + }); + } + + pub fn inline_functions_only_called_once(&mut self) { + let mut reference_counts: HashMap = HashMap::new(); + self.body.replace_id_references(&mut |id| { + *reference_counts.entry(*id).or_default() += 1; + }); + self.body.visit_with_visible(&mut |_, expression, visible, _| { + if let Expression::Call { function, .. } = expression && reference_counts[function] == 1 { + let _ = expression.inline_call(visible, &mut self.id_generator); + } + }); + } +} diff --git a/compiler/src/compiler/mir_optimize/mod.rs b/compiler/src/compiler/mir_optimize/mod.rs new file mode 100644 index 000000000..211333d52 --- /dev/null +++ b/compiler/src/compiler/mir_optimize/mod.rs @@ -0,0 +1,171 @@ +//! Optimizations are a necessity for Candy code to run reasonably fast. For +//! example, without optimizations, if two modules import a third module using +//! `use "..foo"`, then the `foo` module is instantiated twice completely +//! separately. Because this module can in turn depend on other modules, this +//! approach would lead to exponential code blowup. +//! +//! When optimizing code in general, there are two main objectives: +//! +//! - Making the code fast. +//! - Making the code small. +//! +//! Some optimizations benefit both of these objectives. For example, removing +//! ignored computations from the program makes it smaller, but also means +//! there's less code to be executed. Other optimizations further one objective, +//! but harm the other. For example, inlining functions (basically copying their +//! code to where they're used), can make the code bigger, but also potentially +//! faster because there are less function calls to be performed. +//! +//! Depending on the use case, the tradeoff between both objectives changes. To +//! put you in the right mindset, here are just two use cases: +//! +//! - Programming for a microcontroller with 1 MB of ROM available for the +//! program. In this case, you want your code to be as fast as possible while +//! still fitting in 1 MB. Interestingly, the importance of code size is a +//! step function: There's no benefit in only using 0.5 MB, but 1.1 MB makes +//! the program completely unusable. +//! +//! - Programming for a WASM module to be downloaded. In this case, you might +//! have some concrete measurements on how performance and download size +//! affect user retention. +//! +//! It should be noted that we can't judge performance statically. Although some +//! optimizations such as inlining typically improve performance, there are rare +//! cases where they don't. For example, inlining a function that's used in +//! multiple places means the CPU's branch predictor can't benefit from the +//! knowledge gained by previous function executions. Inlining might also make +//! your program bigger, causing more cache misses. Thankfully, Candy is not yet +//! optimized enough for us to care about such details. +//! +//! This module contains several optimizations. All of them operate on the MIR. +//! Some are called "obvious". Those are optimizations that typically improve +//! both performance and code size. Whenever they can be applied, they should be +//! applied. + +mod cleanup; +mod common_subtree_elimination; +mod complexity; +mod constant_folding; +mod constant_lifting; +mod inlining; +mod module_folding; +mod module_stack_cancelling; +mod multiple_flattening; +mod reference_following; +mod tree_shaking; +mod utils; + +use super::{ + hir, + hir_to_mir::{HirToMir, TracingConfig}, + mir::{Body, Expression, Mir}, +}; +use crate::{module::Module, utils::IdGenerator}; +use std::sync::Arc; +use tracing::debug; + +#[salsa::query_group(OptimizeMirStorage)] +pub trait OptimizeMir: HirToMir { + #[salsa::cycle(recover_from_cycle)] + fn mir_with_obvious_optimized(&self, module: Module, config: TracingConfig) + -> Option>; +} + +fn mir_with_obvious_optimized( + db: &dyn OptimizeMir, + module: Module, + config: TracingConfig, +) -> Option> { + debug!("{module}: Compiling."); + let mir = db.mir(module.clone(), config.clone())?; + let mut mir = (*mir).clone(); + + let complexity_before = mir.complexity(); + mir.optimize_obvious(db, &config); + let complexity_after = mir.complexity(); + + debug!("{module}: Done. Optimized from {complexity_before} to {complexity_after}"); + Some(Arc::new(mir)) +} + +impl Mir { + /// Performs optimizations that improve both performance and code size. + pub fn optimize_obvious(&mut self, db: &dyn OptimizeMir, config: &TracingConfig) { + loop { + let before = self.clone(); + + self.optimize_obvious_self_contained(); + self.fold_modules(db, config); + + if *self == before { + break; + } + } + self.optimize_obvious_self_contained(); + self.cleanup(); + } + + /// Performs optimizations that improve both performance and code size and + /// that work without looking at other modules. + pub fn optimize_obvious_self_contained(&mut self) { + // TODO: This optimization may make the code more inefficient for very + // long functions containing a `use`. Remove this optimization as soon + // as we support general speculative inlining. + self.checked_optimization(|mir| mir.inline_functions_containing_use()); + + loop { + let before = self.clone(); + + self.checked_optimization(|mir| mir.follow_references()); + self.checked_optimization(|mir| mir.remove_redundant_return_references()); + self.checked_optimization(|mir| mir.tree_shake()); + self.checked_optimization(|mir| mir.fold_constants()); + self.checked_optimization(|mir| mir.inline_functions_only_called_once()); + self.checked_optimization(|mir| mir.inline_tiny_functions()); + self.checked_optimization(|mir| mir.lift_constants()); + self.checked_optimization(|mir| mir.eliminate_common_subtrees()); + self.checked_optimization(|mir| mir.flatten_multiples()); + self.checked_optimization(|mir| mir.cancel_out_module_expressions()); + + if *self == before { + return; + } + } + } + + fn checked_optimization(&mut self, optimization: fn(&mut Mir) -> ()) { + optimization(self); + if cfg!(debug_assertions) { + self.validate(); + } + } +} + +fn recover_from_cycle( + _db: &dyn OptimizeMir, + cycle: &[String], + module: &Module, + _config: &TracingConfig, +) -> Option> { + let mut id_generator = IdGenerator::start_at(0); + let mut body = Body::default(); + let reason = body.push_with_new_id( + &mut id_generator, + Expression::Text(format!( + "There's a cycle in the used modules: {}", + cycle.join(" β†’ "), + )), + ); + let responsible = body.push_with_new_id( + &mut id_generator, + Expression::HirId(hir::Id::new(module.clone(), vec![])), + ); + body.push_with_new_id( + &mut id_generator, + Expression::Panic { + reason, + responsible, + }, + ); + Some(Arc::new(Mir { id_generator, body })) +} diff --git a/compiler/src/compiler/mir_optimize/module_folding.rs b/compiler/src/compiler/mir_optimize/module_folding.rs new file mode 100644 index 000000000..feccd38df --- /dev/null +++ b/compiler/src/compiler/mir_optimize/module_folding.rs @@ -0,0 +1,82 @@ +//! Module folding evaluates imports with known argument at compile-time. +//! +//! This is similar to [constant folding], but for the `builtinUseModule` +//! builtin. This is also similar to [inlining], but for entire module contents. +//! +//! Here's a before-and-after example of an import of Core being folded: +//! +//! ```mir +//! # before: +//! $0 = "Core" +//! $1 = HirId(the `use "Core"` expression) +//! $2 = use $0 relative to here, $1 responsible +//! +//! # after: +//! $0 = "Core" +//! $1 = HirId(the `use "Core"` expression) +//! $2 = +//! (code of Core) +//! ``` +//! +//! Like [inlining], module folding enables many other optimizations, but across +//! module boundaries. If all imports can be resolved at compile-time, that also +//! means that the VM never needs to interrupt the program execution for parsing +//! and compiling other modules. Module folding is a necessity for building +//! binaries that don't include the Candy compiler itself. +//! +//! [constant folding]: super::constant_folding +//! [inlining]: super::inlining + +use crate::{ + compiler::{ + hir_to_mir::TracingConfig, + mir::{Expression, Id, Mir}, + mir_optimize::OptimizeMir, + }, + module::UsePath, +}; +use std::collections::HashMap; +use tracing::warn; + +impl Mir { + pub fn fold_modules(&mut self, db: &dyn OptimizeMir, config: &TracingConfig) { + self.body + .visit_with_visible(&mut |_, expression, visible, _| { + let Expression::UseModule { + current_module, + relative_path, + responsible: _, + } = expression else { return; }; + + let Expression::Text(path) = visible.get(*relative_path) else { + return; // TODO: Replace with a panic. + }; + let Ok(path) = UsePath::parse(path) else { + warn!("`use` called with an invalid path."); + return; // TODO: Replace with a panic. + }; + let Ok(module_to_import) = path.resolve_relative_to(current_module.clone()) else { + warn!("`use` called with an invalid path."); + return; // TODO: Replace with a panic. + }; + + let mir = db.mir_with_obvious_optimized(module_to_import, config.clone()); + let Some(mir) = mir else { + warn!("Module not found."); + return; // TODO: Replace with a panic. + }; + let mir = (*mir).clone(); + + let mapping: HashMap = mir + .body + .all_ids() + .into_iter() + .map(|id| (id, self.id_generator.generate())) + .collect(); + let mut body_to_insert = mir.body; + body_to_insert.replace_ids(&mut |id| *id = mapping[id]); + + *expression = Expression::Multiple(body_to_insert); + }); + } +} diff --git a/compiler/src/compiler/mir_optimize/module_stack_cancelling.rs b/compiler/src/compiler/mir_optimize/module_stack_cancelling.rs new file mode 100644 index 000000000..068b1c997 --- /dev/null +++ b/compiler/src/compiler/mir_optimize/module_stack_cancelling.rs @@ -0,0 +1,41 @@ +//! Module stack collapsing removes `ModuleStarts` and `ModuleEnds` expressions +//! without a `Use` in between. Those are guaranteed not to cause cycles: Nested +//! expressions for the same module can only be created by the same salsa query, +//! so the import cycle would have been detected right there. +//! +//! Here's a before-and-after example of module expressions cancelling out: +//! +//! ```mir +//! # before: +//! $0 = moduleStarts "some module" +//! $1 = moduleEnds +//! +//! # after: +//! ``` + +use crate::compiler::mir::{Expression, Mir}; +use itertools::Itertools; +use std::collections::HashSet; + +impl Mir { + pub fn cancel_out_module_expressions(&mut self) { + self.body.visit_bodies(&mut |body| { + let mut indices_of_expressions_to_eliminate = HashSet::new(); + + for ((a_index, (_, a)), (b_index, (_, b))) in body.iter().enumerate().tuple_windows() { + if matches!(a, Expression::ModuleStarts { .. }) + && matches!(b, Expression::ModuleEnds) + { + indices_of_expressions_to_eliminate.insert(a_index); + indices_of_expressions_to_eliminate.insert(b_index); + } + } + + for (index, (_, expr)) in body.iter_mut().enumerate() { + if indices_of_expressions_to_eliminate.contains(&index) { + *expr = Expression::nothing(); + } + } + }); + } +} diff --git a/compiler/src/compiler/mir_optimize/multiple_flattening.rs b/compiler/src/compiler/mir_optimize/multiple_flattening.rs new file mode 100644 index 000000000..11ae4bef5 --- /dev/null +++ b/compiler/src/compiler/mir_optimize/multiple_flattening.rs @@ -0,0 +1,27 @@ +//! Multiple flattening lifts `Expression::Multiple` into the parent body. +//! +//! For convenience reasons, other optimizations such as [module folding] and +//! [inlining] may insert `Expression::Multiple`s in the code. This optimization +//! removes those expressions by inlining their content into the parent body. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! $0 = | +//! $1 = ... | $1 = ... +//! $2 = ... | $2 = ... +//! | $0 = $2 +//! ``` +//! +//! [module folding]: super::module_folding +//! [inlining]: super::inlining + +use crate::compiler::mir::Mir; + +impl Mir { + pub fn flatten_multiples(&mut self) { + // For effiency reasons, flattening multiples operates directly on the + // body's internal state and is thus defined directly in the MIR module. + self.body.flatten_multiples(); + } +} diff --git a/compiler/src/compiler/mir_optimize/reference_following.rs b/compiler/src/compiler/mir_optimize/reference_following.rs new file mode 100644 index 000000000..4ecf25682 --- /dev/null +++ b/compiler/src/compiler/mir_optimize/reference_following.rs @@ -0,0 +1,57 @@ +//! Reference following avoids reference expressions by replacing their usages +//! with original referenced value. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! $0 = Foo | $0 = Foo +//! $1 = $0 | $1 = $0 +//! $2 = call ... with $1 | $2 = call ... with $0 +//! ``` +//! +//! This is useful for [constant folding], which tests for specific expression +//! types. For example, to constant-fold a `builtinIntAdd', it tests whether +//! both arguments are an `Expression::Int`. An `Expression::Reference` prevents +//! that optimization. +//! +//! [constant folding]: super::constant_folding + +use crate::compiler::mir::{Expression, Id, Mir}; +use std::collections::HashMap; + +impl Mir { + pub fn follow_references(&mut self) { + let mut replacements = HashMap::::new(); + + self.body.visit(&mut |id, expression, _| { + if let Expression::Reference(reference) = &expression { + let replacement = *replacements.get(reference).unwrap_or(reference); + replacements.insert(id, replacement); + } + }); + self.body.visit(&mut |_, expression, _| { + expression.replace_id_references(&mut |id| { + if let Some(&replacement) = replacements.get(id) { + *id = replacement; + } + }); + }); + } + + pub fn remove_redundant_return_references(&mut self) { + self.body.visit_bodies(&mut |body| { + loop { + let mut from_back = body.iter_mut().rev(); + let (last_id, last_expression) = from_back.next().unwrap(); + let Some((before_last_id, _)) = from_back.next() else { return; }; + + if let Expression::Reference(referenced) = last_expression && before_last_id == *referenced { + drop(from_back); + body.remove_all(|id, _| last_id == id); + } else { + break; + } + } + }); + } +} diff --git a/compiler/src/compiler/mir_optimize/tree_shaking.rs b/compiler/src/compiler/mir_optimize/tree_shaking.rs new file mode 100644 index 000000000..20fc2b31e --- /dev/null +++ b/compiler/src/compiler/mir_optimize/tree_shaking.rs @@ -0,0 +1,49 @@ +//! Tree shaking removes unused pure expressions. +//! +//! Here's a before-and-after example: +//! +//! ```mir +//! $0 = 4 | $0 = 4 +//! $1 = Foo | +//! $2 = $0 | +//! $3 = call ... with $0 | $3 = call ... with $0 +//! ``` +//! +//! This is useful because other optimization passes such as [constant folding] +//! cause some expressions to be no longer needed. +//! +//! [constant folding]: super::constant_folding + +use crate::compiler::mir::{Body, Expression, Id, Mir}; +use itertools::Itertools; +use std::collections::HashSet; + +impl Mir { + pub fn tree_shake(&mut self) { + self.body.tree_shake(&mut HashSet::new()); + } +} +impl Body { + fn tree_shake(&mut self, keep: &mut HashSet) { + let body = self.iter_mut().collect_vec(); + let mut ids_to_remove = vec![]; + + let return_value_id = body.last().unwrap().0; + keep.insert(return_value_id); + + for (id, expression) in body.into_iter().rev() { + if !expression.is_pure() || keep.contains(&id) { + keep.insert(id); + keep.extend(expression.referenced_ids()); + + if let Expression::Lambda { body, .. } = expression { + body.tree_shake(keep); + } + } else { + ids_to_remove.push(id); + } + } + + self.remove_all(|id, _| ids_to_remove.contains(&id)); + } +} diff --git a/compiler/src/compiler/mir_optimize/utils.rs b/compiler/src/compiler/mir_optimize/utils.rs new file mode 100644 index 000000000..395f5ca89 --- /dev/null +++ b/compiler/src/compiler/mir_optimize/utils.rs @@ -0,0 +1,417 @@ +use crate::compiler::mir::{Body, Expression, Id, Mir, VisibleExpressions}; +use std::{collections::HashSet, mem}; +use tracing::error; + +impl Expression { + /// All IDs defined inside this expression. For all expressions except + /// lambdas, this returns an empty vector. The IDs are returned in the order + /// that they are defined in. + pub fn defined_ids(&self) -> Vec { + let mut defined = vec![]; + self.collect_defined_ids(&mut defined); + defined + } + fn collect_defined_ids(&self, defined: &mut Vec) { + match self { + Expression::Lambda { + parameters, + responsible_parameter, + body, + .. + } => { + defined.extend(parameters); + defined.push(*responsible_parameter); + body.collect_defined_ids(defined); + } + Expression::Multiple(body) => body.collect_defined_ids(defined), + _ => {} + } + } +} +impl Body { + pub fn defined_ids(&self) -> Vec { + let mut defined = vec![]; + self.collect_defined_ids(&mut defined); + defined + } + fn collect_defined_ids(&self, defined: &mut Vec) { + for (id, expression) in self.iter() { + defined.push(id); + expression.collect_defined_ids(defined); + } + } +} + +impl Expression { + /// All IDs referenced inside this expression. If this is a lambda, this + /// also includes references to locally defined IDs. IDs are returned in the + /// order that they are referenced, which means that the vector may contain + /// the same ID multiple times. + pub fn referenced_ids(&self) -> HashSet { + let mut referenced = HashSet::new(); + self.collect_referenced_ids(&mut referenced); + referenced + } + fn collect_referenced_ids(&self, referenced: &mut HashSet) { + match self { + Expression::Int(_) + | Expression::Text(_) + | Expression::Symbol(_) + | Expression::Builtin(_) + | Expression::HirId(_) => {} + Expression::List(items) => { + referenced.extend(items); + } + Expression::Struct(fields) => { + for (key, value) in fields { + referenced.insert(*key); + referenced.insert(*value); + } + } + Expression::Reference(reference) => { + referenced.insert(*reference); + } + Expression::Lambda { body, .. } => body.collect_referenced_ids(referenced), + Expression::Parameter => {} + Expression::Call { + function, + arguments, + responsible, + } => { + referenced.insert(*function); + referenced.extend(arguments); + referenced.insert(*responsible); + } + Expression::UseModule { + current_module: _, + relative_path, + responsible, + } => { + referenced.insert(*relative_path); + referenced.insert(*responsible); + } + Expression::Panic { + reason, + responsible, + } => { + referenced.insert(*reason); + referenced.insert(*responsible); + } + Expression::Multiple(body) => body.collect_referenced_ids(referenced), + Expression::ModuleStarts { .. } => {} + Expression::ModuleEnds => {} + Expression::TraceCallStarts { + hir_call, + function, + arguments, + responsible, + } => { + referenced.insert(*hir_call); + referenced.insert(*function); + referenced.extend(arguments); + referenced.insert(*responsible); + } + Expression::TraceCallEnds { return_value } => { + referenced.insert(*return_value); + } + Expression::TraceExpressionEvaluated { + hir_expression, + value, + } => { + referenced.insert(*hir_expression); + referenced.insert(*value); + } + Expression::TraceFoundFuzzableClosure { + hir_definition, + closure, + } => { + referenced.insert(*hir_definition); + referenced.insert(*closure); + } + } + } +} +impl Body { + fn collect_referenced_ids(&self, referenced: &mut HashSet) { + for (_, expression) in self.iter() { + expression.collect_referenced_ids(referenced); + } + } +} + +impl Expression { + pub fn captured_ids(&self) -> Vec { + let mut ids = self.referenced_ids().into_iter().collect::>(); + for id in self.defined_ids() { + ids.remove(&id); + } + ids.into_iter().collect() + } +} + +impl Body { + pub fn all_ids(&self) -> HashSet { + let mut ids = self.defined_ids().into_iter().collect::>(); + self.collect_referenced_ids(&mut ids); + ids + } +} + +impl Expression { + pub fn is_pure(&self) -> bool { + match self { + Expression::Int(_) => true, + Expression::Text(_) => true, + Expression::Reference(_) => true, + Expression::Symbol(_) => true, + Expression::List(_) => true, + Expression::Struct(_) => true, + Expression::Lambda { .. } => true, + Expression::Parameter => false, + Expression::Builtin(_) => true, + Expression::HirId(_) => true, + Expression::Call { .. } => false, + Expression::UseModule { .. } => false, + Expression::Panic { .. } => false, + Expression::Multiple(body) => body.iter().all(|(_, expr)| expr.is_pure()), + Expression::ModuleStarts { .. } => false, + Expression::ModuleEnds => false, + Expression::TraceCallStarts { .. } + | Expression::TraceCallEnds { .. } + | Expression::TraceExpressionEvaluated { .. } + | Expression::TraceFoundFuzzableClosure { .. } => false, + } + } + + /// Whether the value of this expression is pure and known at compile-time. + /// This is useful for moving expressions around without changing the + /// semantics. + pub fn is_constant(&self, visible: &VisibleExpressions) -> bool { + self.is_pure() + && self + .captured_ids() + .iter() + .all(|captured| visible.get(*captured).is_constant(visible)) + } +} + +impl Id { + pub fn semantically_equals(self, other: Id, visible: &VisibleExpressions) -> Option { + if self == other { + return Some(true); + } + + let self_expr = visible.get(self); + let other_expr = visible.get(other); + + if let Expression::Reference(reference) = self_expr { + return reference.semantically_equals(other, visible); + } + if let Expression::Reference(reference) = other_expr { + return self.semantically_equals(*reference, visible); + } + + if matches!(self_expr, Expression::Parameter) || matches!(other_expr, Expression::Parameter) + { + return None; + } + + if self_expr == other_expr { + return Some(true); + } + + if !self_expr.is_constant(visible) || !other_expr.is_constant(visible) { + return None; + } + + Some(false) + } +} + +impl Expression { + /// Replaces all referenced IDs. Does *not* replace IDs that are defined in + /// this expression. + pub fn replace_id_references(&mut self, replacer: &mut F) { + match self { + Expression::Int(_) + | Expression::Text(_) + | Expression::Symbol(_) + | Expression::Builtin(_) + | Expression::HirId(_) => {} + Expression::List(items) => { + for item in items { + replacer(item); + } + } + Expression::Struct(fields) => { + for (key, value) in fields { + replacer(key); + replacer(value); + } + } + Expression::Reference(reference) => replacer(reference), + Expression::Lambda { + parameters, + responsible_parameter, + body, + } => { + for parameter in parameters { + replacer(parameter); + } + replacer(responsible_parameter); + body.replace_id_references(replacer); + } + Expression::Parameter => {} + Expression::Call { + function, + arguments, + responsible, + } => { + replacer(function); + for argument in arguments { + replacer(argument); + } + replacer(responsible); + } + Expression::UseModule { + current_module: _, + relative_path, + responsible, + } => { + replacer(relative_path); + replacer(responsible); + } + Expression::Panic { + reason, + responsible, + } => { + replacer(reason); + replacer(responsible); + } + Expression::Multiple(body) => body.replace_id_references(replacer), + Expression::TraceCallStarts { + hir_call, + function, + arguments, + responsible, + } => { + replacer(hir_call); + replacer(function); + for argument in arguments { + replacer(argument); + } + replacer(responsible); + } + Expression::ModuleStarts { module: _ } => {} + Expression::ModuleEnds => {} + Expression::TraceCallEnds { return_value } => { + replacer(return_value); + } + Expression::TraceExpressionEvaluated { + hir_expression, + value, + } => { + replacer(hir_expression); + replacer(value); + } + Expression::TraceFoundFuzzableClosure { + hir_definition, + closure, + } => { + replacer(hir_definition); + replacer(closure); + } + } + } +} +impl Body { + pub fn replace_id_references(&mut self, replacer: &mut F) { + for (_, expression) in self.iter_mut() { + expression.replace_id_references(replacer); + } + } +} + +impl Expression { + /// Replaces all IDs in this expression using the replacer, including + /// definitions. + pub fn replace_ids(&mut self, replacer: &mut F) { + match self { + Expression::Lambda { + parameters, + responsible_parameter, + body, + } => { + for parameter in parameters { + replacer(parameter); + } + replacer(responsible_parameter); + body.replace_ids(replacer); + } + Expression::Multiple(body) => body.replace_ids(replacer), + // All other expressions don't define IDs and instead only contain + // references. Thus, the function above does the job. + _ => self.replace_id_references(replacer), + } + } +} +impl Body { + pub fn replace_ids(&mut self, replacer: &mut F) { + let body = mem::take(self); + for (mut id, mut expression) in body.into_iter() { + replacer(&mut id); + expression.replace_ids(replacer); + self.push(id, expression); + } + } +} + +impl Mir { + pub fn validate(&self) { + self.validate_body(&self.body, &mut HashSet::new(), im::HashSet::new()); + } + fn validate_body( + &self, + body: &Body, + defined_ids: &mut HashSet, + mut visible: im::HashSet, + ) { + if body.iter().next().is_none() { + error!("A body of a lambda is empty! Lambdas should have at least a return value."); + error!("This is the MIR:\n{self}"); + panic!("MIR is invalid!"); + } + for (id, expression) in body.iter() { + for captured in expression.captured_ids() { + if !visible.contains(&captured) { + error!("MIR is invalid! {id} captures {captured}, but that's not visible."); + error!("This is the MIR:\n{self}"); + panic!("MIR is invalid!"); + } + } + if let Expression::Lambda { + parameters, + responsible_parameter, + body, + } = expression + { + let mut inner_visible = visible.clone(); + inner_visible.extend(parameters.iter().copied()); + inner_visible.insert(*responsible_parameter); + self.validate_body(body, defined_ids, inner_visible); + } + if let Expression::Multiple(body) = expression { + self.validate_body(body, defined_ids, visible.clone()); + } + + if defined_ids.contains(&id) { + error!("ID {id} exists twice."); + error!("This is the MIR:\n{self}"); + panic!("MIR is invalid!"); + } + defined_ids.insert(id); + + visible.insert(id); + } + } +} diff --git a/compiler/src/compiler/mir_to_lir.rs b/compiler/src/compiler/mir_to_lir.rs new file mode 100644 index 000000000..9927f3fb2 --- /dev/null +++ b/compiler/src/compiler/mir_to_lir.rs @@ -0,0 +1,245 @@ +use super::{ + cst::CstDb, + hir_to_mir::TracingConfig, + lir::{Instruction, Lir, StackOffset}, + mir::{Body, Expression, Id}, + mir_optimize::OptimizeMir, +}; +use crate::{module::Module, utils::CountableId}; +use itertools::Itertools; +use std::sync::Arc; + +#[salsa::query_group(MirToLirStorage)] +pub trait MirToLir: CstDb + OptimizeMir { + fn lir(&self, module: Module, config: TracingConfig) -> Option>; +} + +fn lir(db: &dyn MirToLir, module: Module, config: TracingConfig) -> Option> { + let mir = db.mir_with_obvious_optimized(module, config)?; + let instructions = compile_lambda(&[], &[], Id::from_usize(0), &mir.body); + Some(Arc::new(Lir { instructions })) +} + +fn compile_lambda( + captured: &[Id], + parameters: &[Id], + responsible_parameter: Id, + body: &Body, +) -> Vec { + let mut context = LoweringContext::default(); + for captured in captured { + context.stack.push(*captured); + } + for parameter in parameters { + context.stack.push(*parameter); + } + context.stack.push(responsible_parameter); + + for (id, expression) in body.iter() { + context.compile_expression(id, expression); + } + + // The stack should only contain the return value. + let dummy_id = Id::from_usize(0); + context.emit( + dummy_id, + Instruction::PopMultipleBelowTop(context.stack.len() - 1), + ); + context.emit(dummy_id, Instruction::Return); + + context.instructions +} + +#[derive(Default)] +struct LoweringContext { + stack: Vec, + instructions: Vec, +} +impl LoweringContext { + fn compile_expression(&mut self, id: Id, expression: &Expression) { + match expression { + Expression::Int(int) => self.emit(id, Instruction::CreateInt(int.clone())), + Expression::Text(text) => self.emit(id, Instruction::CreateText(text.clone())), + Expression::Reference(reference) => { + self.emit_push_from_stack(*reference); + self.stack.replace_top_id(id); + } + Expression::Symbol(symbol) => self.emit(id, Instruction::CreateSymbol(symbol.clone())), + Expression::Builtin(builtin) => { + self.emit(id, Instruction::CreateBuiltin(*builtin)); + } + Expression::List(items) => { + for item in items { + self.emit_push_from_stack(*item); + } + self.emit( + id, + Instruction::CreateList { + num_items: items.len(), + }, + ); + } + Expression::Struct(fields) => { + for (key, value) in fields { + self.emit_push_from_stack(*key); + self.emit_push_from_stack(*value); + } + self.emit( + id, + Instruction::CreateStruct { + num_fields: fields.len(), + }, + ); + } + Expression::HirId(hir_id) => { + self.emit(id, Instruction::CreateHirId(hir_id.clone())); + } + Expression::Lambda { + parameters, + responsible_parameter, + body, + } => { + let captured = expression.captured_ids(); + let instructions = + compile_lambda(&captured, parameters, *responsible_parameter, body); + + self.emit( + id, + Instruction::CreateClosure { + captured: captured + .iter() + .map(|id| self.stack.find_id(*id)) + .collect_vec(), + num_args: parameters.len(), + body: instructions, + }, + ); + } + Expression::Parameter => { + panic!("The MIR should not contain any parameter expressions.") + } + Expression::Call { + function, + arguments, + responsible, + } => { + self.emit_push_from_stack(*function); + for argument in arguments { + self.emit_push_from_stack(*argument); + } + self.emit_push_from_stack(*responsible); + self.emit( + id, + Instruction::Call { + num_args: arguments.len(), + }, + ); + } + Expression::UseModule { + current_module, + relative_path, + responsible, + } => { + self.emit_push_from_stack(*relative_path); + self.emit_push_from_stack(*responsible); + self.emit( + id, + Instruction::UseModule { + current_module: current_module.clone(), + }, + ); + } + Expression::Panic { + reason, + responsible, + } => { + self.emit_push_from_stack(*reason); + self.emit_push_from_stack(*responsible); + self.emit(id, Instruction::Panic); + } + Expression::Multiple(_) => { + panic!("The MIR shouldn't contain multiple expressions anymore."); + } + Expression::ModuleStarts { module } => { + self.emit( + id, + Instruction::ModuleStarts { + module: module.clone(), + }, + ); + } + Expression::ModuleEnds => self.emit(id, Instruction::ModuleEnds), + Expression::TraceCallStarts { + hir_call, + function, + arguments, + responsible, + } => { + self.emit_push_from_stack(*hir_call); + self.emit_push_from_stack(*function); + for argument in arguments { + self.emit_push_from_stack(*argument); + } + self.emit_push_from_stack(*responsible); + self.emit( + id, + Instruction::TraceCallStarts { + num_args: arguments.len(), + }, + ); + } + Expression::TraceCallEnds { return_value } => { + self.emit_push_from_stack(*return_value); + self.emit(id, Instruction::TraceCallEnds); + } + Expression::TraceExpressionEvaluated { + hir_expression, + value, + } => { + self.emit_push_from_stack(*hir_expression); + self.emit_push_from_stack(*value); + self.emit(id, Instruction::TraceExpressionEvaluated); + } + Expression::TraceFoundFuzzableClosure { + hir_definition, + closure, + } => { + self.emit_push_from_stack(*hir_definition); + self.emit_push_from_stack(*closure); + self.emit(id, Instruction::TraceFoundFuzzableClosure); + } + } + } + + fn emit_push_from_stack(&mut self, id: Id) { + let offset = self.stack.find_id(id); + self.emit(id, Instruction::PushFromStack(offset)); + } + fn emit(&mut self, id: Id, instruction: Instruction) { + instruction.apply_to_stack(&mut self.stack, id); + self.instructions.push(instruction); + } +} + +trait StackExt { + fn pop_multiple(&mut self, n: usize); + fn find_id(&self, id: Id) -> StackOffset; + fn replace_top_id(&mut self, id: Id); +} +impl StackExt for Vec { + fn pop_multiple(&mut self, n: usize) { + for _ in 0..n { + self.pop(); + } + } + fn find_id(&self, id: Id) -> StackOffset { + self.iter() + .rev() + .position(|it| *it == id) + .unwrap_or_else(|| panic!("Id {} not found in stack: {}", id, self.iter().join(" "))) + } + fn replace_top_id(&mut self, id: Id) { + self.pop().unwrap(); + self.push(id); + } +} diff --git a/compiler/src/compiler/mod.rs b/compiler/src/compiler/mod.rs index 99069e1f2..323780b11 100644 --- a/compiler/src/compiler/mod.rs +++ b/compiler/src/compiler/mod.rs @@ -5,8 +5,11 @@ pub mod cst; pub mod cst_to_ast; pub mod error; pub mod hir; -pub mod hir_to_lir; +pub mod hir_to_mir; pub mod lir; +pub mod mir; +pub mod mir_optimize; +pub mod mir_to_lir; pub mod rcst; pub mod rcst_to_cst; pub mod string_to_rcst; diff --git a/compiler/src/compiler/string_to_rcst.rs b/compiler/src/compiler/string_to_rcst.rs index 446a4ef72..6b0506513 100644 --- a/compiler/src/compiler/string_to_rcst.rs +++ b/compiler/src/compiler/string_to_rcst.rs @@ -1,5 +1,5 @@ use super::rcst::{Rcst, RcstError}; -use crate::module::{Module, ModuleDb}; +use crate::module::{Module, ModuleDb, Package}; use std::sync::Arc; #[salsa::query_group(StringToRcstStorage)] @@ -8,6 +8,9 @@ pub trait StringToRcst: ModuleDb { } fn rcst(db: &dyn StringToRcst, module: Module) -> Result>, InvalidModuleError> { + if let Package::Tooling(_) = &module.package { + return Err(InvalidModuleError::IsToolingModule); + } let source = db .get_module_content(module) .ok_or(InvalidModuleError::DoesNotExist)?; @@ -31,6 +34,7 @@ fn rcst(db: &dyn StringToRcst, module: Module) -> Result>, Invalid pub enum InvalidModuleError { DoesNotExist, InvalidUtf8, + IsToolingModule, } impl Rcst { diff --git a/compiler/src/database.rs b/compiler/src/database.rs index 6da2b9b46..7e9ef4457 100644 --- a/compiler/src/database.rs +++ b/compiler/src/database.rs @@ -1,7 +1,8 @@ use crate::{ compiler::{ ast_to_hir::AstToHirStorage, cst::CstDbStorage, cst_to_ast::CstToAstStorage, - hir::HirDbStorage, hir_to_lir::HirToLirStorage, rcst_to_cst::RcstToCstStorage, + hir::HirDbStorage, hir_to_mir::HirToMirStorage, mir_optimize::OptimizeMirStorage, + mir_to_lir::MirToLirStorage, rcst_to_cst::RcstToCstStorage, string_to_rcst::StringToRcstStorage, }, language_server::{ @@ -19,9 +20,11 @@ use tracing::warn; CstToAstStorage, FoldingRangeDbStorage, HirDbStorage, - HirToLirStorage, - ModuleDbStorage, + HirToMirStorage, LspPositionConversionStorage, + MirToLirStorage, + ModuleDbStorage, + OptimizeMirStorage, RcstToCstStorage, ReferencesDbStorage, SemanticTokenDbStorage, diff --git a/compiler/src/fuzzer/fuzzer.rs b/compiler/src/fuzzer/fuzzer.rs index e901020f8..40a353f34 100644 --- a/compiler/src/fuzzer/fuzzer.rs +++ b/compiler/src/fuzzer/fuzzer.rs @@ -1,6 +1,6 @@ use super::generator::generate_n_values; use crate::{ - compiler::hir, + compiler::hir::{self, Id}, vm::{ self, context::{ExecutionController, UseProvider}, @@ -32,6 +32,7 @@ pub enum Status { PanickedForArguments { arguments: Vec, reason: String, + responsible: hir::Id, tracer: FullTracer, }, } @@ -52,7 +53,7 @@ impl Status { .collect_vec(); let mut vm = Vm::new(); - vm.set_up_for_running_closure(vm_heap, closure, &argument_addresses); + vm.set_up_for_running_closure(vm_heap, closure, &argument_addresses, Id::fuzzer()); Status::StillFuzzing { vm, @@ -109,19 +110,15 @@ impl Fuzzer { Status::StillFuzzing { vm, arguments, tracer } } vm::Status::WaitingForOperations => panic!("Fuzzing should not have to wait on channel operations because arguments were not channels."), - // The VM finished running without panicking. vm::Status::Done => Status::new_fuzzing_attempt(&self.closure_heap, self.closure), vm::Status::Panicked { reason, responsible } => { - // If a `needs` directly inside the tested closure was not - // satisfied, then the panic is not closure's fault, but our - // fault. - let is_our_fault = responsible.is_none(); - if is_our_fault { + if responsible == Id::fuzzer() { Status::new_fuzzing_attempt(&self.closure_heap, self.closure) } else { Status::PanickedForArguments { arguments, reason, + responsible, tracer, } } @@ -132,10 +129,12 @@ impl Fuzzer { Status::PanickedForArguments { arguments, reason, + responsible, tracer, } => Status::PanickedForArguments { arguments, reason, + responsible, tracer, }, } diff --git a/compiler/src/fuzzer/mod.rs b/compiler/src/fuzzer/mod.rs index fd50e4cda..4cae9f49d 100644 --- a/compiler/src/fuzzer/mod.rs +++ b/compiler/src/fuzzer/mod.rs @@ -7,7 +7,7 @@ pub use self::{ utils::FuzzablesFinder, }; use crate::{ - compiler::hir::Id, + compiler::{hir::Id, hir_to_mir::TracingConfig}, database::Database, module::Module, vm::{ @@ -17,14 +17,31 @@ use crate::{ }, }; use itertools::Itertools; +use std::collections::HashMap; use tracing::{error, info}; pub async fn fuzz(db: &Database, module: Module) -> Vec { - let (fuzzables_heap, fuzzables): (Heap, Vec<(Id, Pointer)>) = { + let config = TracingConfig { + register_fuzzables: true, + trace_calls: false, + trace_evaluated_expressions: false, + }; + + let (fuzzables_heap, fuzzables): (Heap, HashMap) = { let mut tracer = FuzzablesFinder::default(); let mut vm = Vm::new(); - vm.set_up_for_running_module_closure(Closure::of_module(db, module).unwrap()); - vm.run(&DbUseProvider { db }, &mut RunForever, &mut tracer); + vm.set_up_for_running_module_closure( + module.clone(), + Closure::of_module(db, module, config.clone()).unwrap(), + ); + vm.run( + &DbUseProvider { + db, + config: config.clone(), + }, + &mut RunForever, + &mut tracer, + ); (tracer.heap, tracer.fuzzables) }; @@ -39,14 +56,18 @@ pub async fn fuzz(db: &Database, module: Module) -> Vec { info!("Fuzzing {id}."); let mut fuzzer = Fuzzer::new(&fuzzables_heap, closure, id.clone()); fuzzer.run( - &mut DbUseProvider { db }, - &mut RunLimitedNumberOfInstructions::new(1000), + &mut DbUseProvider { + db, + config: config.clone(), + }, + &mut RunLimitedNumberOfInstructions::new(100000), ); match fuzzer.into_status() { Status::StillFuzzing { .. } => {} Status::PanickedForArguments { arguments, reason, + responsible, tracer, } => { error!("The fuzzer discovered an input that crashes {id}:"); @@ -54,6 +75,7 @@ pub async fn fuzz(db: &Database, module: Module) -> Vec { closure: id, arguments, reason, + responsible, tracer, }; case.dump(db); @@ -69,13 +91,14 @@ pub struct FailingFuzzCase { closure: Id, arguments: Vec, reason: String, + responsible: Id, tracer: FullTracer, } impl FailingFuzzCase { pub fn dump(&self, db: &Database) { error!( - "Calling `{} {}` doesn't work because {}.", + "Calling `{} {}` panics: {}", self.closure, self.arguments .iter() @@ -83,6 +106,7 @@ impl FailingFuzzCase { .join(" "), self.reason, ); + error!("{} is responsible.", self.responsible,); error!( "This is the stack trace:\n{}", self.tracer.format_panic_stack_trace_to_root_fiber(db) diff --git a/compiler/src/fuzzer/utils.rs b/compiler/src/fuzzer/utils.rs index 0e4bfefdd..b67406577 100644 --- a/compiler/src/fuzzer/utils.rs +++ b/compiler/src/fuzzer/utils.rs @@ -9,24 +9,25 @@ use std::collections::HashMap; #[derive(Default)] pub struct FuzzablesFinder { - pub fuzzables: Vec<(Id, Pointer)>, + pub fuzzables: HashMap, pub heap: Heap, transferred_objects: HashMap>, } impl Tracer for FuzzablesFinder { fn add(&mut self, event: VmEvent) { let VmEvent::InFiber { fiber, event } = event else { return; }; - let FiberEvent::FoundFuzzableClosure { id, closure, heap } = event else { return; }; + let FiberEvent::FoundFuzzableClosure { definition, closure, heap } = event else { return; }; + let definition = heap.get_hir_id(definition); let address_map = self .transferred_objects .entry(fiber) .or_insert_with(HashMap::new); - let address = heap.clone_single_to_other_heap_with_existing_mapping( + let closure = heap.clone_single_to_other_heap_with_existing_mapping( &mut self.heap, closure, address_map, ); - self.fuzzables.push((id, address)); + self.fuzzables.insert(definition, closure); } } diff --git a/compiler/src/language_server/hints/constant_evaluator.rs b/compiler/src/language_server/hints/constant_evaluator.rs index d5084a405..39f47de4c 100644 --- a/compiler/src/language_server/hints/constant_evaluator.rs +++ b/compiler/src/language_server/hints/constant_evaluator.rs @@ -5,6 +5,7 @@ use crate::{ ast_to_hir::AstToHir, cst_to_ast::CstToAst, hir::Id, + hir_to_mir::TracingConfig, }, database::Database, language_server::hints::{utils::id_to_end_of_line, HintKind}, @@ -14,7 +15,7 @@ use crate::{ context::{DbUseProvider, RunLimitedNumberOfInstructions}, tracer::{ full::{FullTracer, StoredFiberEvent, StoredVmEvent, TimedEvent}, - stack_trace::StackEntry, + stack_trace::Call, }, Closure, FiberId, Heap, Pointer, Vm, }, @@ -22,7 +23,7 @@ use crate::{ use itertools::Itertools; use rand::{prelude::SliceRandom, thread_rng}; use std::collections::HashMap; -use tracing::{span, trace, Level}; +use tracing::{span, Level}; #[derive(Default)] pub struct ConstantEvaluator { @@ -35,9 +36,17 @@ struct Evaluator { impl ConstantEvaluator { pub fn update_module(&mut self, db: &Database, module: Module) { + let config = TracingConfig { + register_fuzzables: true, + trace_calls: false, + trace_evaluated_expressions: true, + }; let tracer = FullTracer::default(); let mut vm = Vm::new(); - vm.set_up_for_running_module_closure(Closure::of_module(db, module.clone()).unwrap()); + vm.set_up_for_running_module_closure( + module.clone(), + Closure::of_module(db, module.clone(), config).unwrap(), + ); self.evaluators.insert(module, Evaluator { tracer, vm }); } @@ -46,28 +55,22 @@ impl ConstantEvaluator { } pub fn run(&mut self, db: &Database) -> Option { - let num_evaluators = self.evaluators.len(); let mut running_evaluators = self .evaluators .iter_mut() .filter(|(_, evaluator)| matches!(evaluator.vm.status(), vm::Status::CanRun)) .collect_vec(); - trace!( - "Constant evaluator running. {} running VMs, {} in total.", - running_evaluators.len(), - num_evaluators, + let (module, evaluator) = running_evaluators.choose_mut(&mut thread_rng())?; + + evaluator.vm.run( + &DbUseProvider { + db, + config: TracingConfig::none(), + }, + &mut RunLimitedNumberOfInstructions::new(500), + &mut evaluator.tracer, ); - - if let Some((module, evaluator)) = running_evaluators.choose_mut(&mut thread_rng()) { - evaluator.vm.run( - &DbUseProvider { db }, - &mut RunLimitedNumberOfInstructions::new(500), - &mut evaluator.tracer, - ); - Some(module.clone()) - } else { - None - } + Some(module.clone()) } pub fn get_fuzzable_closures(&self, module: &Module) -> (Heap, Vec<(Id, Pointer)>) { @@ -78,9 +81,13 @@ impl ConstantEvaluator { .iter() .filter_map(|event| match &event.event { StoredVmEvent::InFiber { - event: StoredFiberEvent::FoundFuzzableClosure { id, closure }, + event: + StoredFiberEvent::FoundFuzzableClosure { + definition: id, + closure, + }, .. - } => Some((id.clone(), *closure)), + } => Some((evaluator.tracer.heap.get_hir_id(*id), *closure)), _ => None, }) .collect(); @@ -103,7 +110,8 @@ impl ConstantEvaluator { for TimedEvent { event, .. } in &evaluator.tracer.events { let StoredVmEvent::InFiber { event, .. } = event else { continue; }; - let StoredFiberEvent::ValueEvaluated { id, value } = event else { continue; }; + let StoredFiberEvent::ValueEvaluated { expression, value } = event else { continue; }; + let id = evaluator.tracer.heap.get_hir_id(*expression); if &id.module != module { continue; @@ -152,46 +160,31 @@ fn panic_hint( return None; } - let last_call_in_this_module = stack.iter().find(|entry| { - let id = match entry { - StackEntry::Call { id, .. } => id, - StackEntry::Needs { id, .. } => id, - _ => return false, - }; + let last_call_in_this_module = stack.iter().find(|call| { + let call_site = evaluator.tracer.heap.get_hir_id(call.call_site); // Make sure the entry comes from the same file and is not generated // code. - id.module == module && db.hir_to_cst_id(id.clone()).is_some() + call_site.module == module && db.hir_to_cst_id(call_site).is_some() })?; - let (id, call_info) = match last_call_in_this_module { - StackEntry::Call { id, closure, args } => ( - id, - format!( - "{} {}", - closure.format(&evaluator.tracer.heap), - args.iter() - .map(|arg| arg.format(&evaluator.tracer.heap)) - .join(" ") - ), - ), - StackEntry::Needs { - id, - condition, - reason, - } => ( - id, - format!( - "needs {} {}", - condition.format(&evaluator.tracer.heap), - reason.format(&evaluator.tracer.heap) - ), - ), - _ => unreachable!(), - }; + let Call { + call_site, + callee, + arguments: args, + .. + } = last_call_in_this_module; + let call_site = evaluator.tracer.heap.get_hir_id(*call_site); + let call_info = format!( + "{} {}", + callee.format(&evaluator.tracer.heap), + args.iter() + .map(|arg| arg.format(&evaluator.tracer.heap)) + .join(" "), + ); Some(Hint { kind: HintKind::Panic, - text: format!("Calling `{call_info}` panics because {reason}."), - position: id_to_end_of_line(db, id.clone())?, + text: format!("Calling `{call_info}` panics: {reason}"), + position: id_to_end_of_line(db, call_site)?, }) } diff --git a/compiler/src/language_server/hints/fuzzer.rs b/compiler/src/language_server/hints/fuzzer.rs index 6121cb132..e4db378d4 100644 --- a/compiler/src/language_server/hints/fuzzer.rs +++ b/compiler/src/language_server/hints/fuzzer.rs @@ -3,20 +3,20 @@ use crate::{ compiler::{ ast_to_hir::AstToHir, hir::{Expression, HirDb, Id, Lambda}, + hir_to_mir::TracingConfig, }, database::Database, fuzzer::{Fuzzer, Status}, module::Module, vm::{ context::{DbUseProvider, RunLimitedNumberOfInstructions}, - tracer::full::{StoredFiberEvent, StoredVmEvent}, Heap, Pointer, }, }; use itertools::Itertools; use rand::{prelude::SliceRandom, thread_rng}; use std::collections::HashMap; -use tracing::{error, trace}; +use tracing::{debug, error}; #[derive(Default)] pub struct FuzzerManager { @@ -48,14 +48,13 @@ impl FuzzerManager { .flat_map(|fuzzers| fuzzers.values_mut()) .filter(|fuzzer| matches!(fuzzer.status(), Status::StillFuzzing { .. })) .collect_vec(); - trace!( - "Fuzzer running. {} fuzzers for relevant closures are running.", - running_fuzzers.len(), - ); let fuzzer = running_fuzzers.choose_mut(&mut thread_rng())?; fuzzer.run( - &mut DbUseProvider { db }, + &mut DbUseProvider { + db, + config: TracingConfig::none(), + }, &mut RunLimitedNumberOfInstructions::new(100), ); @@ -68,96 +67,72 @@ impl FuzzerManager { pub fn get_hints(&self, db: &Database, module: &Module) -> Vec> { let mut hints = vec![]; + debug!("There are {} fuzzers.", self.fuzzers.len()); + for fuzzer in self.fuzzers[module].values() { - if let Status::PanickedForArguments { + let Status::PanickedForArguments { arguments, reason, - tracer, - } = fuzzer.status() - { - let id = fuzzer.closure_id.clone(); - let first_hint = { - let parameter_names = match db.find_expression(id.clone()) { - Some(Expression::Lambda(Lambda { parameters, .. })) => parameters - .into_iter() - .map(|parameter| parameter.keys.last().unwrap().to_string()) - .collect_vec(), - Some(_) => panic!("Looks like we fuzzed a non-closure. That's weird."), - None => { - error!("Using fuzzing, we found an error in a generated closure."); - continue; - } - }; - Hint { - kind: HintKind::Fuzz, - text: format!( - "If this is called with {},", - parameter_names - .iter() - .zip(arguments.iter()) - .map(|(name, argument)| format!("`{name} = {argument:?}`")) - .collect_vec() - .join_with_commas_and_and(), - ), - position: id_to_end_of_line(db, id.clone()).unwrap(), - } - }; + responsible, + .. + } = fuzzer.status() else { continue; }; - let second_hint = { - let panicking_inner_call = tracer - .events - .iter() - .rev() - // Find the innermost panicking call that is in the - // function. - .filter_map(|event| match &event.event { - StoredVmEvent::InFiber { event, .. } => Some(event), - _ => None, - }) - .find(|event| { - let innermost_panicking_call_id = match &event { - StoredFiberEvent::CallStarted { id, .. } => id, - StoredFiberEvent::NeedsStarted { id, .. } => id, - _ => return false, - }; - id.is_same_module_and_any_parent_of(innermost_panicking_call_id) - && db.hir_to_cst_id(id.clone()).is_some() - }); - let panicking_inner_call = match panicking_inner_call { - Some(panicking_inner_call) => panicking_inner_call, - None => { - // We found a panicking function without an inner - // panicking needs. This indicates an error during - // compilation within a function body. - continue; - } - }; - let (call_id, name, arguments) = match &panicking_inner_call { - StoredFiberEvent::CallStarted { id, closure, args } => { - (id.clone(), closure.format(&tracer.heap), args.clone()) - } - StoredFiberEvent::NeedsStarted { - id, - condition, - reason, - } => (id.clone(), "needs".to_string(), vec![*condition, *reason]), - _ => unreachable!(), - }; - Hint { - kind: HintKind::Fuzz, - text: format!( - "then `{name} {}` panics because {reason}.", - arguments - .iter() - .map(|arg| arg.format(&tracer.heap)) - .join(" "), - ), - position: id_to_end_of_line(db, call_id).unwrap(), + let id = fuzzer.closure_id.clone(); + let first_hint = { + let parameter_names = match db.find_expression(id.clone()) { + Some(Expression::Lambda(Lambda { parameters, .. })) => parameters + .into_iter() + .map(|parameter| parameter.keys.last().unwrap().to_string()) + .collect_vec(), + Some(_) => panic!("Looks like we fuzzed a non-closure. That's weird."), + None => { + error!("Using fuzzing, we found an error in a generated closure."); + continue; } }; + Hint { + kind: HintKind::Fuzz, + text: format!( + "If this is called with {},", + parameter_names + .iter() + .zip(arguments.iter()) + .map(|(name, argument)| format!("`{name} = {argument:?}`")) + .collect_vec() + .join_with_commas_and_and(), + ), + position: id_to_end_of_line(db, id.clone()).unwrap(), + } + }; + + let second_hint = { + if &responsible.module != module { + // The function panics internally for an input, but it's the + // fault of an inner function that's in another module. + // TODO: The fuzz case should instead be highlighted in the + // used function directly. We don't do that right now + // because we assume the fuzzer will find the panic when + // fuzzing the faulty function, but we should save the + // panicking case (or something like that) in the future. + continue; + } + if db.hir_to_cst_id(id.clone()).is_none() { + panic!( + "It looks like the generated code {responsible} is at fault for a panic." + ); + } + + // TODO: In the future, re-run only the failing case with + // tracing enabled and also show the arguments to the failing + // function in the hint. + Hint { + kind: HintKind::Fuzz, + text: format!("then {responsible} panics: {reason}"), + position: id_to_end_of_line(db, responsible.clone()).unwrap(), + } + }; - hints.push(vec![first_hint, second_hint]); - } + hints.push(vec![first_hint, second_hint]); } hints diff --git a/compiler/src/language_server/hints/mod.rs b/compiler/src/language_server/hints/mod.rs index 20e3d1058..44e7eea44 100644 --- a/compiler/src/language_server/hints/mod.rs +++ b/compiler/src/language_server/hints/mod.rs @@ -23,7 +23,7 @@ use tokio::{ sync::mpsc::{error::TryRecvError, Receiver, Sender}, time::sleep, }; -use tracing::{trace, warn}; +use tracing::{debug, warn}; pub enum Event { UpdateModule(Module, Vec), @@ -66,7 +66,6 @@ pub async fn run_server( let mut outgoing_hints = OutgoingHints::new(outgoing_hints); 'server_loop: loop { - trace!("Hints server is running."); sleep(Duration::from_millis(100)).await; loop { @@ -97,11 +96,15 @@ pub async fn run_server( // priority. When constant evaluation is done, we try fuzzing the // functions we found. let module_with_new_insight = 'new_insight: { + debug!("Constant evaluating…"); if let Some(module) = constant_evaluator.run(&db) { let (heap, closures) = constant_evaluator.get_fuzzable_closures(&module); fuzzer.update_module(module.clone(), &heap, &closures); break 'new_insight Some(module); } + // For fuzzing, we're a bit more resource-conscious. + sleep(Duration::from_millis(200)).await; + debug!("Fuzzing…"); if let Some(module) = fuzzer.run(&db) { warn!("Fuzzer found a problem!"); break 'new_insight Some(module); @@ -162,9 +165,12 @@ impl OutgoingHints { } async fn report_hints(&mut self, module: Module, hints: Vec) { + debug!("Reporting hints for {module}:\n{hints:?}"); if self.last_sent.get(&module) != Some(&hints) { self.last_sent.insert(module.clone(), hints.clone()); self.sender.send((module, hints)).await.unwrap(); + } else { + debug!("Not sending hints to the main thread because they're the same as last time."); } } } diff --git a/compiler/src/language_server/mod.rs b/compiler/src/language_server/mod.rs index c88dd1bc2..a8e1ea22c 100644 --- a/compiler/src/language_server/mod.rs +++ b/compiler/src/language_server/mod.rs @@ -85,17 +85,17 @@ impl LanguageServer for CandyLanguageServer { _ => panic!("Workspace folder must be a file URI."), }; - let (events_sender, events_receiver) = tokio::sync::mpsc::channel(16); - let (hints_sender, mut hints_receiver) = tokio::sync::mpsc::channel(8); + let (events_sender, events_receiver) = tokio::sync::mpsc::channel(1024); + let (hints_sender, mut hints_receiver) = tokio::sync::mpsc::channel(1024); tokio::spawn(hints::run_server(events_receiver, hints_sender)); *self.hints_server_sink.lock().await = Some(events_sender); let client = self.client.clone(); let hint_reporter = async move || { while let Some((module, hints)) = hints_receiver.recv().await { - debug!("Reporting hints for {module}: {hints:?}"); + let url: Option = module.into(); client .send_notification::(HintsNotification { - uri: Url::from(module).to_string(), + uri: url.unwrap().to_string(), hints, }) .await; @@ -327,8 +327,9 @@ impl CandyLanguageServer { .map(|it| it.into_diagnostic(&db, module.clone())) .collect() }; + let url: Option = module.clone().into(); self.client - .publish_diagnostics(module.clone().into(), diagnostics, None) + .publish_diagnostics(url.unwrap(), diagnostics, None) .await; } } diff --git a/compiler/src/language_server/utils.rs b/compiler/src/language_server/utils.rs index 8a400bed8..9526f781d 100644 --- a/compiler/src/language_server/utils.rs +++ b/compiler/src/language_server/utils.rs @@ -1,8 +1,5 @@ use crate::{ - compiler::{ - error::{CompilerError, CompilerErrorPayload}, - hir::HirError, - }, + compiler::error::CompilerError, database::Database, module::{Module, ModuleDb, Package}, }; @@ -23,12 +20,7 @@ impl CompilerError { code: None, code_description: None, source: Some("🍭 Candy".to_owned()), - message: match self.payload { - CompilerErrorPayload::InvalidUtf8 => "Invalid UTF-8".to_string(), - CompilerErrorPayload::Rcst(rcst) => format!("RCST: {rcst:?}"), - CompilerErrorPayload::Ast(ast) => format!("AST: {ast:?}"), - CompilerErrorPayload::Hir(hir) => hir.format_message(), - }, + message: self.payload.to_string(), related_information: None, tags: None, data: None, @@ -36,32 +28,22 @@ impl CompilerError { } } -impl HirError { - fn format_message(&self) -> String { - match self { - HirError::UnknownReference { name } => format!("Unknown reference β€œ{name}”."), - HirError::PublicAssignmentInNotTopLevel => { - "Public assignments (:=) can only be used in top-level code.".to_string() - } - HirError::PublicAssignmentWithSameName { .. } => "A public assignment with the same name already exists.".to_string(), - HirError::NeedsWithWrongNumberOfArguments { num_args } => format!("`needs` accepts one or two arguments, but was called with {num_args} arguments. Its parameters are the `condition` and an optional `message`."), - } - } -} - -impl From for Url { - fn from(module: Module) -> Url { +impl From for Option { + fn from(module: Module) -> Option { match module.package { - Package::User(_) | Package::External(_) => Url::from_file_path( - module - .to_possible_paths() - .unwrap() - .into_iter() - .find_or_first(|path| path.exists()) - .unwrap(), - ) - .unwrap(), - Package::Anonymous { url } => Url::parse(&format!("untitled:{url}",)).unwrap(), + Package::User(_) | Package::External(_) => Some( + Url::from_file_path( + module + .to_possible_paths() + .unwrap() + .into_iter() + .find_or_first(|path| path.exists()) + .unwrap(), + ) + .unwrap(), + ), + Package::Anonymous { url } => Some(Url::parse(&format!("untitled:{url}",)).unwrap()), + Package::Tooling(_) => None, } } } diff --git a/compiler/src/main.rs b/compiler/src/main.rs index 5a83a4062..4376b3bce 100644 --- a/compiler/src/main.rs +++ b/compiler/src/main.rs @@ -11,6 +11,7 @@ mod database; mod fuzzer; mod language_server; mod module; +mod utils; mod vm; use crate::{ @@ -19,7 +20,7 @@ use crate::{ cst_to_ast::CstToAst, error::CompilerError, hir::{self, CollectErrors, Id}, - hir_to_lir::HirToLir, + mir_to_lir::MirToLir, rcst_to_cst::RcstToCst, string_to_rcst::StringToRcst, }, @@ -28,11 +29,15 @@ use crate::{ module::{Module, ModuleKind}, vm::{ context::{DbUseProvider, RunForever}, - tracer::{full::FullTracer, Tracer}, - Closure, ExecutionResult, FiberId, Status, Struct, Vm, + tracer::{dummy::DummyTracer, full::FullTracer, Tracer}, + Closure, Data, ExecutionResult, FiberId, Heap, Packet, SendPort, Status, Struct, Vm, }, }; -use compiler::lir::Lir; +use compiler::{ + hir_to_mir::{HirToMir, TracingConfig}, + lir::Lir, + mir_optimize::OptimizeMir, +}; use itertools::Itertools; use language_server::CandyLanguageServer; use notify::{watcher, RecursiveMode, Watcher}; @@ -40,6 +45,7 @@ use std::{ collections::HashMap, convert::TryInto, env::current_dir, + io::{self, BufRead, Write}, path::PathBuf, sync::{mpsc::channel, Arc}, time::Duration, @@ -71,6 +77,9 @@ struct CandyBuildOptions { #[structopt(long)] watch: bool, + #[structopt(long)] + tracing: bool, + #[structopt(parse(from_os_str))] file: PathBuf, } @@ -80,12 +89,18 @@ struct CandyRunOptions { #[structopt(long)] debug: bool, + #[structopt(long)] + tracing: bool, + #[structopt(parse(from_os_str))] file: PathBuf, } #[derive(StructOpt, Debug)] struct CandyFuzzOptions { + #[structopt(long)] + debug: bool, + #[structopt(parse(from_os_str))] file: PathBuf, } @@ -110,12 +125,18 @@ enum Exit { fn build(options: CandyBuildOptions) -> ProgramResult { init_logger(true); + let db = Database::default(); let module = Module::from_package_root_and_file( current_dir().unwrap(), options.file.clone(), ModuleKind::Code, ); - let result = raw_build(module.clone(), options.debug); + let config = TracingConfig { + register_fuzzables: false, + trace_calls: options.tracing, + trace_evaluated_expressions: false, + }; + let result = raw_build(&db, module.clone(), &config, options.debug); if !options.watch { result.ok_or(Exit::FileNotFound).map(|_| ()) @@ -128,108 +149,124 @@ fn build(options: CandyBuildOptions) -> ProgramResult { loop { match rx.recv() { Ok(_) => { - raw_build(module.clone(), options.debug); + raw_build(&db, module.clone(), &config, options.debug); } Err(e) => error!("watch error: {e:#?}"), } } } } -fn raw_build(module: Module, debug: bool) -> Option> { - let db = Database::default(); +fn raw_build( + db: &Database, + module: Module, + config: &TracingConfig, + debug: bool, +) -> Option> { + let rcst = db + .rcst(module.clone()) + .unwrap_or_else(|err| panic!("Error parsing file `{}`: {:?}", module, err)); + if debug { + module.dump_associated_debug_file("rcst", &format!("{:#?}\n", rcst)); + } - tracing::span!(Level::DEBUG, "Parsing string to RCST").in_scope(|| { - let rcst = db - .rcst(module.clone()) - .unwrap_or_else(|err| panic!("Error parsing file `{}`: {:?}", module, err)); - if debug { - module.dump_associated_debug_file("rcst", &format!("{:#?}\n", rcst)); - } - }); + let cst = db.cst(module.clone()).unwrap(); + if debug { + module.dump_associated_debug_file("cst", &format!("{:#?}\n", cst)); + } - tracing::span!(Level::DEBUG, "Turning RCST to CST").in_scope(|| { - let cst = db.cst(module.clone()).unwrap(); - if debug { - module.dump_associated_debug_file("cst", &format!("{:#?}\n", cst)); - } - }); - - tracing::span!(Level::DEBUG, "Abstracting CST to AST").in_scope(|| { - let (asts, ast_cst_id_map) = db.ast(module.clone()).unwrap(); - if debug { - module.dump_associated_debug_file( - "ast", - &format!("{}\n", asts.iter().map(|ast| format!("{}", ast)).join("\n")), - ); - module.dump_associated_debug_file( - "ast_to_cst_ids", - &ast_cst_id_map - .keys() - .into_iter() - .sorted_by_key(|it| it.local) - .map(|key| format!("{key} -> {}\n", ast_cst_id_map[key].0)) - .join(""), - ); - } - }); - - tracing::span!(Level::DEBUG, "Turning AST to HIR").in_scope(|| { - let (hir, hir_ast_id_map) = db.hir(module.clone()).unwrap(); - if debug { - module.dump_associated_debug_file("hir", &format!("{}", hir)); - module.dump_associated_debug_file( - "hir_to_ast_ids", - &hir_ast_id_map - .keys() - .into_iter() - .map(|key| format!("{key} -> {}\n", hir_ast_id_map[key])) - .join(""), - ); - } - let mut errors = vec![]; - hir.collect_errors(&mut errors); - for CompilerError { span, payload, .. } in errors { - let (start_line, start_col) = db.offset_to_lsp(module.clone(), span.start); - let (end_line, end_col) = db.offset_to_lsp(module.clone(), span.end); - warn!("{start_line}:{start_col} – {end_line}:{end_col}: {payload:?}"); - } - }); + let (asts, ast_cst_id_map) = db.ast(module.clone()).unwrap(); + if debug { + module.dump_associated_debug_file( + "ast", + &format!("{}\n", asts.iter().map(|ast| format!("{}", ast)).join("\n")), + ); + module.dump_associated_debug_file( + "ast_to_cst_ids", + &ast_cst_id_map + .keys() + .into_iter() + .sorted_by_key(|it| it.local) + .map(|key| format!("{key} -> {}\n", ast_cst_id_map[key].0)) + .join(""), + ); + } - let lir = tracing::span!(Level::DEBUG, "Lowering HIR to LIR").in_scope(|| { - let lir = db.lir(module.clone()).unwrap(); - if debug { - module.dump_associated_debug_file("lir", &format!("{lir}")); - } - lir - }); + let (hir, hir_ast_id_map) = db.hir(module.clone()).unwrap(); + if debug { + module.dump_associated_debug_file("hir", &format!("{}", hir)); + module.dump_associated_debug_file( + "hir_to_ast_ids", + &hir_ast_id_map + .keys() + .into_iter() + .map(|key| format!("{key} -> {}\n", hir_ast_id_map[key])) + .join(""), + ); + } + + let mut errors = vec![]; + hir.collect_errors(&mut errors); + for CompilerError { span, payload, .. } in errors { + let (start_line, start_col) = db.offset_to_lsp(module.clone(), span.start); + let (end_line, end_col) = db.offset_to_lsp(module.clone(), span.end); + warn!("{start_line}:{start_col} – {end_line}:{end_col}: {payload:?}"); + } + + let mir = db.mir(module.clone(), config.clone()).unwrap(); + if debug { + module.dump_associated_debug_file("mir", &format!("{mir}")); + } + + let optimized_mir = db + .mir_with_obvious_optimized(module.clone(), config.clone()) + .unwrap(); + if debug { + module.dump_associated_debug_file("optimized_mir", &format!("{optimized_mir}")); + } + + let lir = db.lir(module.clone(), config.clone()).unwrap(); + if debug { + module.dump_associated_debug_file("lir", &format!("{lir}")); + } Some(lir) } fn run(options: CandyRunOptions) -> ProgramResult { init_logger(true); + let db = Database::default(); let module = Module::from_package_root_and_file( current_dir().unwrap(), options.file.clone(), ModuleKind::Code, ); - let db = Database::default(); - if raw_build(module.clone(), false).is_none() { + let config = TracingConfig { + register_fuzzables: false, + trace_calls: options.tracing, + trace_evaluated_expressions: false, + }; + if raw_build(&db, module.clone(), &config, options.debug).is_none() { warn!("File not found."); return Err(Exit::FileNotFound); }; - // TODO: Optimize the code before running. let path_string = options.file.to_string_lossy(); debug!("Running `{path_string}`."); - let module_closure = Closure::of_module(&db, module.clone()).unwrap(); + let module_closure = Closure::of_module(&db, module.clone(), config.clone()).unwrap(); let mut tracer = FullTracer::default(); let mut vm = Vm::new(); - vm.set_up_for_running_module_closure(module_closure); - vm.run(&DbUseProvider { db: &db }, &mut RunForever, &mut tracer); + vm.set_up_for_running_module_closure(module.clone(), module_closure); + vm.run( + &DbUseProvider { + db: &db, + config: config.clone(), + }, + &mut RunForever, + &mut tracer, + ); if let Status::WaitingForOperations = vm.status() { error!("The module waits on channel operations. Perhaps, the code tried to read from a channel without sending a packet into it."); // TODO: Show stack traces of all fibers? @@ -256,12 +293,10 @@ fn run(options: CandyRunOptions) -> ProgramResult { reason, responsible, } => { - error!("The module panicked because {reason}."); - if let Some(responsible) = responsible { - error!("{responsible} is responsible."); - } else { - error!("Some top-level code panics."); - } + error!("The module panicked: {reason}"); + error!("{responsible} is responsible."); + let span = db.hir_id_to_span(responsible).unwrap(); + error!("Responsible is at {span:?}."); error!( "This is the stack trace:\n{}", tracer.format_panic_stack_trace_to_root_fiber(&db) @@ -283,34 +318,50 @@ fn run(options: CandyRunOptions) -> ProgramResult { // TODO: Add more environment stuff. let mut vm = Vm::new(); let mut stdout = StdoutService::new(&mut vm); + let mut stdin = StdinService::new(&mut vm); let environment = { let stdout_symbol = heap.create_symbol("Stdout".to_string()); let stdout_port = heap.create_send_port(stdout.channel); - heap.create_struct(HashMap::from([(stdout_symbol, stdout_port)])) + let stdin_symbol = heap.create_symbol("Stdin".to_string()); + let stdin_port = heap.create_send_port(stdin.channel); + heap.create_struct(HashMap::from([ + (stdout_symbol, stdout_port), + (stdin_symbol, stdin_port), + ])) }; + let platform = heap.create_hir_id(Id::platform()); tracer.for_fiber(FiberId::root()).call_started( - Id::new(module, vec!["main".to_string()]), + platform, main, vec![environment], + platform, &heap, ); - vm.set_up_for_running_closure(heap, main, &[environment]); + vm.set_up_for_running_closure(heap, main, &[environment], Id::platform()); loop { match vm.status() { Status::CanRun => { - debug!("VM still running."); - vm.run(&DbUseProvider { db: &db }, &mut RunForever, &mut tracer); - } - Status::WaitingForOperations => { - todo!("VM can't proceed until some operations complete."); + vm.run( + &DbUseProvider { + db: &db, + config: config.clone(), + }, + &mut RunForever, + &mut tracer, + ); } + Status::WaitingForOperations => {} _ => break, } stdout.run(&mut vm); + stdin.run(&mut vm); for channel in vm.unreferenced_channels.iter().copied().collect_vec() { vm.free_channel(channel); } } + if options.debug { + module.dump_associated_debug_file("trace", &format!("{tracer:?}")); + } match vm.tear_down() { ExecutionResult::Finished(return_value) => { tracer @@ -323,12 +374,8 @@ fn run(options: CandyRunOptions) -> ProgramResult { reason, responsible, } => { - error!("The main function panicked because {reason}."); - if let Some(responsible) = responsible { - error!("{responsible} is responsible."); - } else { - error!("A needs directly in the main function panicks. Perhaps the main functions expects more in the environment."); - } + error!("The main function panicked: {reason}"); + error!("{responsible} is responsible."); error!( "This is the stack trace:\n{}", tracer.format_panic_stack_trace_to_root_fiber(&db) @@ -346,7 +393,32 @@ struct StdoutService { } impl StdoutService { fn new(vm: &mut Vm) -> Self { - let channel = vm.create_channel(1); + let channel = vm.create_channel(0); + let current_receive = vm.receive(channel); + Self { + channel, + current_receive, + } + } + fn run(&mut self, vm: &mut Vm) { + while let Some(CompletedOperation::Received { packet }) = + vm.completed_operations.remove(&self.current_receive) + { + match &packet.heap.get(packet.address).data { + Data::Text(text) => println!("{}", text.value), + _ => info!("Non-text value sent to stdout: {packet:?}"), + } + self.current_receive = vm.receive(self.channel); + } + } +} +struct StdinService { + channel: ChannelId, + current_receive: OperationId, +} +impl StdinService { + fn new(vm: &mut Vm) -> Self { + let channel = vm.create_channel(0); let current_receive = vm.receive(channel); Self { channel, @@ -354,10 +426,30 @@ impl StdoutService { } } fn run(&mut self, vm: &mut Vm) { - if let Some(CompletedOperation::Received { packet }) = + while let Some(CompletedOperation::Received { packet }) = vm.completed_operations.remove(&self.current_receive) { - info!("Sent to stdout: {packet:?}"); + let request: SendPort = packet + .heap + .get(packet.address) + .data + .clone() + .try_into() + .expect("expected a send port"); + print!(">> "); + io::stdout().flush().unwrap(); + let input = { + let stdin = io::stdin(); + stdin.lock().lines().next().unwrap().unwrap() + }; + let packet = { + let mut heap = Heap::default(); + let address = heap.create_text(input); + Packet { heap, address } + }; + vm.send(&mut DummyTracer, request.channel, packet); + + // Receive the next request self.current_receive = vm.receive(self.channel); } } @@ -365,19 +457,24 @@ impl StdoutService { async fn fuzz(options: CandyFuzzOptions) -> ProgramResult { init_logger(true); + let db = Database::default(); let module = Module::from_package_root_and_file( current_dir().unwrap(), options.file.clone(), ModuleKind::Code, ); + let config = TracingConfig { + register_fuzzables: true, + trace_calls: false, + trace_evaluated_expressions: false, + }; - if raw_build(module.clone(), false).is_none() { + if raw_build(&db, module.clone(), &config, options.debug).is_none() { warn!("File not found."); return Err(Exit::FileNotFound); } debug!("Fuzzing `{module}`."); - let db = Database::default(); let failing_cases = fuzzer::fuzz(&db, module).await; if failing_cases.is_empty() { @@ -423,11 +520,22 @@ fn init_logger(use_stdout: bool) { .unwrap_or_default() .starts_with("candy") })) - .with_filter(filter::filter_fn(level_for("candy::compiler", Level::WARN))) .with_filter(filter::filter_fn(level_for( - "candy::language_server", + "candy::compiler::optimize", Level::DEBUG, ))) + .with_filter(filter::filter_fn(level_for( + "candy::compiler::string_to_rcst", + Level::WARN, + ))) + .with_filter(filter::filter_fn(level_for( + "candy::compiler", + Level::DEBUG, + ))) + .with_filter(filter::filter_fn(level_for( + "candy::language_server", + Level::TRACE, + ))) .with_filter(filter::filter_fn(level_for("candy::vm", Level::DEBUG))) .with_filter(filter::filter_fn(level_for( "candy::vm::heap", diff --git a/compiler/src/module.rs b/compiler/src/module.rs index 1f521fb6b..45110a671 100644 --- a/compiler/src/module.rs +++ b/compiler/src/module.rs @@ -46,6 +46,10 @@ pub enum Package { /// not yet persisted to disk (such as when opening a new VSCode tab and /// typing some code). Anonymous { url: String }, + + /// This package can make the tooling responsible for calls. For example, + /// the fuzzer and constant evaluator use this. + Tooling(String), } impl Module { @@ -127,6 +131,7 @@ impl Package { Package::User(path) => Some(path.clone()), Package::External(path) => Some(path.clone()), Package::Anonymous { .. } => None, + Package::Tooling(_) => None, } } } @@ -171,6 +176,7 @@ impl Display for Package { Package::User(path) => write!(f, "user:{path:?}"), Package::External(path) => write!(f, "extern:{path:?}"), Package::Anonymous { url } => write!(f, "anonymous:{url}"), + Package::Tooling(tooling) => write!(f, "tooling:{tooling}"), } } } @@ -239,6 +245,60 @@ pub trait ModuleWatcher { fn get_open_module_raw(&self, module: &Module) -> Option>; } +pub struct UsePath { + parent_navigations: usize, + path: String, +} +impl UsePath { + const PARENT_NAVIGATION_CHAR: char = '.'; + + pub fn parse(mut path: &str) -> Result { + let parent_navigations = { + let mut navigations = 0; + while path.starts_with(UsePath::PARENT_NAVIGATION_CHAR) { + navigations += 1; + path = &path[UsePath::PARENT_NAVIGATION_CHAR.len_utf8()..]; + } + match navigations { + 0 => return Err("the target must start with at least one dot".to_string()), + i => i - 1, // two dots means one parent navigation + } + }; + let path = { + if !path.chars().all(|c| c.is_ascii_alphanumeric() || c == '.') { + return Err("the target name can only contain letters and dots".to_string()); + } + path.to_string() + }; + Ok(UsePath { + parent_navigations, + path, + }) + } + + pub fn resolve_relative_to(&self, current_module: Module) -> Result { + let kind = if self.path.contains('.') { + ModuleKind::Asset + } else { + ModuleKind::Code + }; + + let mut path = current_module.path; + for _ in 0..self.parent_navigations { + if path.pop().is_none() { + return Err("too many parent navigations".to_string()); + } + } + path.push(self.path.to_string()); + + Ok(Module { + package: current_module.package, + path: path.clone(), + kind, + }) + } +} + #[cfg(test)] mod test { use super::*; diff --git a/compiler/src/utils.rs b/compiler/src/utils.rs new file mode 100644 index 000000000..7c5884819 --- /dev/null +++ b/compiler/src/utils.rs @@ -0,0 +1,24 @@ +use std::marker::PhantomData; + +#[derive(Clone, PartialEq, Eq)] +pub struct IdGenerator { + next_id: usize, + _data: PhantomData, +} +impl IdGenerator { + pub fn start_at(id: usize) -> Self { + Self { + next_id: id, + _data: Default::default(), + } + } + pub fn generate(&mut self) -> T { + let id = self.next_id; + self.next_id += 1; + T::from_usize(id) + } +} +pub trait CountableId { + fn from_usize(id: usize) -> Self; + fn to_usize(&self) -> usize; +} diff --git a/compiler/src/vm/builtin_functions.rs b/compiler/src/vm/builtin_functions.rs index 3410b598d..1506655ca 100644 --- a/compiler/src/vm/builtin_functions.rs +++ b/compiler/src/vm/builtin_functions.rs @@ -7,7 +7,10 @@ use super::{ tracer::{dummy::DummyTracer, Tracer}, FiberId, Heap, }; -use crate::{builtin_functions::BuiltinFunction, compiler::lir::Instruction}; +use crate::{ + builtin_functions::BuiltinFunction, + compiler::{hir::Id, lir::Instruction}, +}; use itertools::Itertools; use num_bigint::BigInt; use num_integer::Integer; @@ -21,15 +24,16 @@ impl Fiber { &mut self, builtin_function: &BuiltinFunction, args: &[Pointer], + responsible: Pointer, ) { let result = span!(Level::TRACE, "Running builtin").in_scope(|| match &builtin_function { BuiltinFunction::ChannelCreate => self.heap.channel_create(args), BuiltinFunction::ChannelSend => self.heap.channel_send(args), BuiltinFunction::ChannelReceive => self.heap.channel_receive(args), BuiltinFunction::Equals => self.heap.equals(args), - BuiltinFunction::FunctionRun => self.heap.function_run(args), + BuiltinFunction::FunctionRun => self.heap.function_run(args, responsible), BuiltinFunction::GetArgumentCount => self.heap.get_argument_count(args), - BuiltinFunction::IfElse => self.heap.if_else(args), + BuiltinFunction::IfElse => self.heap.if_else(args, responsible), BuiltinFunction::IntAdd => self.heap.int_add(args), BuiltinFunction::IntBitLength => self.heap.int_bit_length(args), BuiltinFunction::IntBitwiseAnd => self.heap.int_bitwise_and(args), @@ -70,8 +74,12 @@ impl Fiber { }); match result { Ok(Return(value)) => self.data_stack.push(value), - Ok(DivergeControlFlow { closure }) => { + Ok(DivergeControlFlow { + closure, + responsible, + }) => { self.data_stack.push(closure); + self.data_stack.push(responsible); self.run_instruction( &PanickingUseProvider, &mut DummyTracer.for_fiber(FiberId::root()), @@ -83,7 +91,7 @@ impl Fiber { Ok(Receive { channel }) => self.status = Status::Receiving { channel }, Ok(Parallel { body }) => self.status = Status::InParallelScope { body }, Ok(Try { body }) => self.status = Status::InTry { body }, - Err(reason) => self.panic(reason), + Err(reason) => self.panic(reason, self.heap.get_hir_id(responsible)), } } } @@ -91,12 +99,26 @@ impl Fiber { type BuiltinResult = Result; enum SuccessfulBehavior { Return(Pointer), - DivergeControlFlow { closure: Pointer }, - CreateChannel { capacity: Capacity }, - Send { channel: ChannelId, packet: Packet }, - Receive { channel: ChannelId }, - Parallel { body: Pointer }, - Try { body: Pointer }, + DivergeControlFlow { + closure: Pointer, + responsible: Pointer, + }, + CreateChannel { + capacity: Capacity, + }, + Send { + channel: ChannelId, + packet: Packet, + }, + Receive { + channel: ChannelId, + }, + Parallel { + body: Pointer, + }, + Try { + body: Pointer, + }, } use SuccessfulBehavior::*; @@ -113,7 +135,7 @@ macro_rules! unpack { ( $( *$arg, )+ ) } else { return Err( - "a builtin function was called with the wrong number of arguments".to_string(), + "A builtin function was called with the wrong number of arguments.".to_string(), ); }; let ( $( $arg, )+ ): ( $( UnpackedData<$type>, )+ ) = ( $( @@ -134,7 +156,7 @@ macro_rules! unpack_and_later_drop { ( $( *$arg, )+ ) } else { return Err( - "a builtin function was called with the wrong number of arguments".to_string(), + "A builtin function was called with the wrong number of arguments.".to_string(), ); }; let ( $( $arg, )+ ): ( $( UnpackedData<$type>, )+ ) = ( $( @@ -156,7 +178,7 @@ impl Heap { unpack_and_later_drop!(self, args, |capacity: Int| { match capacity.value.clone().try_into() { Ok(capacity) => CreateChannel { capacity }, - Err(_) => return Err("you tried to create a channel with a capacity that is either negative or bigger than the maximum usize".to_string()), + Err(_) => return Err("You tried to create a channel with a capacity that is either negative or bigger than the maximum usize.".to_string()), } }) } @@ -187,11 +209,12 @@ impl Heap { }) } - fn function_run(&mut self, args: &[Pointer]) -> BuiltinResult { + fn function_run(&mut self, args: &[Pointer], responsible: Pointer) -> BuiltinResult { unpack!(self, args, |closure: Closure| { closure.should_take_no_arguments()?; DivergeControlFlow { closure: closure.address, + responsible, } }) } @@ -202,7 +225,7 @@ impl Heap { }) } - fn if_else(&mut self, args: &[Pointer]) -> BuiltinResult { + fn if_else(&mut self, args: &[Pointer], responsible: Pointer) -> BuiltinResult { unpack!(self, args, |condition: bool, then: Closure, else_: Closure| { @@ -215,6 +238,7 @@ impl Heap { self.drop(dont_run.address); DivergeControlFlow { closure: run.address, + responsible, } }) } @@ -286,9 +310,8 @@ impl Heap { } fn int_shift_right(&mut self, args: &[Pointer]) -> BuiltinResult { unpack_and_later_drop!(self, args, |value: Int, amount: Int| { - let value = value.value.to_biguint().unwrap(); let amount = amount.value.to_u128().unwrap(); - Return(self.create_int((value >> amount).into())) + Return(self.create_int(&value.value >> amount)) }) } fn int_subtract(&mut self, args: &[Pointer]) -> BuiltinResult { @@ -353,7 +376,7 @@ impl Heap { fn parallel(&mut self, args: &[Pointer]) -> BuiltinResult { unpack!(self, args, |body_taking_nursery: Closure| { if body_taking_nursery.num_args != 1 { - return Err("parallel expects a closure taking a nursery".to_string()); + return Err("`parallel` expects a closure taking a nursery.".to_string()); } Parallel { body: body_taking_nursery.address, @@ -362,8 +385,8 @@ impl Heap { } fn print(&mut self, args: &[Pointer]) -> BuiltinResult { - unpack_and_later_drop!(self, args, |message: Text| { - info!("{:?}", message.value); + unpack_and_later_drop!(self, args, |message: Any| { + info!("{:?}", message.data.data.format(self)); Return(self.create_nothing()) }) } @@ -376,7 +399,7 @@ impl Heap { Ok(Return(value)) } None => Err(format!( - "the struct does not contain the key {}", + "The struct does not contain the key {}.", key.format(self) )), } @@ -479,6 +502,7 @@ impl Heap { Data::Symbol(_) => "Symbol", Data::List(_) => "List", Data::Struct(_) => "Struct", + Data::HirId(_) => unreachable!(), Data::Closure(_) => "Function", Data::Builtin(_) => "Builtin", Data::SendPort(_) => "SendPort", @@ -493,7 +517,7 @@ impl Closure { fn should_take_no_arguments(&self) -> Result<(), String> { match self.num_args { 0 => Ok(()), - n => Err(format!("a builtin function expected a function without arguments, but got one that takes {n} arguments")), + n => Err(format!("A builtin function expected a function without arguments, but got one that takes {n} arguments.")), } } } @@ -529,7 +553,7 @@ impl TryInto for Data { } } macro_rules! impl_data_try_into_type { - ($type:ty, $variant:tt, $error_message:expr) => { + ($type:ty, $variant:tt, $error_message:expr$(,)?) => { impl TryInto<$type> for Data { type Error = String; @@ -542,21 +566,22 @@ macro_rules! impl_data_try_into_type { } }; } -impl_data_try_into_type!(Int, Int, "a builtin function expected an int"); -impl_data_try_into_type!(Text, Text, "a builtin function expected a text"); -impl_data_try_into_type!(Symbol, Symbol, "a builtin function expected a symbol"); -impl_data_try_into_type!(List, List, "a builtin function expected a list"); -impl_data_try_into_type!(Struct, Struct, "a builtin function expected a struct"); -impl_data_try_into_type!(Closure, Closure, "a builtin function expected a closure"); +impl_data_try_into_type!(Int, Int, "A builtin function expected an int."); +impl_data_try_into_type!(Text, Text, "A builtin function expected a text."); +impl_data_try_into_type!(Symbol, Symbol, "A builtin function expected a symbol."); +impl_data_try_into_type!(List, List, "A builtin function expected a list."); +impl_data_try_into_type!(Struct, Struct, "A builtin function expected a struct."); +impl_data_try_into_type!(Id, HirId, "A builtin function expected a HIR ID."); +impl_data_try_into_type!(Closure, Closure, "A builtin function expected a closure."); impl_data_try_into_type!( SendPort, SendPort, - "a builtin function expected a send port" + "A builtin function expected a send port.", ); impl_data_try_into_type!( ReceivePort, ReceivePort, - "a builtin function expected a receive port" + "A builtin function expected a receive port.", ); impl TryInto for Data { @@ -567,7 +592,7 @@ impl TryInto for Data { match symbol.value.as_str() { "True" => Ok(true), "False" => Ok(false), - _ => Err("a builtin function expected True or False".to_string()), + _ => Err("A builtin function expected `True` or `False`.".to_string()), } } } diff --git a/compiler/src/vm/context.rs b/compiler/src/vm/context.rs index 971b6c6ec..694033072 100644 --- a/compiler/src/vm/context.rs +++ b/compiler/src/vm/context.rs @@ -1,5 +1,5 @@ use crate::{ - compiler::{hir_to_lir::HirToLir, lir::Lir}, + compiler::{hir_to_mir::TracingConfig, lir::Lir, mir_to_lir::MirToLir}, database::Database, module::{Module, ModuleDb, ModuleKind}, }; @@ -26,6 +26,7 @@ impl UseProvider for PanickingUseProvider { pub struct DbUseProvider<'a> { pub db: &'a Database, + pub config: TracingConfig, } impl<'a> UseProvider for DbUseProvider<'a> { fn use_module(&self, module: Module) -> Result { @@ -34,7 +35,7 @@ impl<'a> UseProvider for DbUseProvider<'a> { Some(bytes) => Ok(UseResult::Asset((*bytes).clone())), None => Err(format!("use couldn't import the asset module `{}`", module)), }, - ModuleKind::Code => match self.db.lir(module.clone()) { + ModuleKind::Code => match self.db.lir(module.clone(), self.config.clone()) { Some(lir) => Ok(UseResult::Code((*lir).clone())), None => Err(format!("use couldn't import the code module `{}`", module)), }, diff --git a/compiler/src/vm/fiber.rs b/compiler/src/vm/fiber.rs index 2b75a6994..c06cb5cef 100644 --- a/compiler/src/vm/fiber.rs +++ b/compiler/src/vm/fiber.rs @@ -1,13 +1,16 @@ use super::{ channel::{Capacity, Packet}, context::{ExecutionController, PanickingUseProvider, UseProvider}, - heap::{Builtin, Closure, Data, Heap, Pointer}, + heap::{Builtin, Closure, Data, Heap, Pointer, Text}, ids::ChannelId, tracer::{dummy::DummyTracer, FiberTracer, Tracer}, FiberId, }; use crate::{ - compiler::{hir::Id, lir::Instruction}, + compiler::{ + hir::{self, Id}, + lir::Instruction, + }, module::Module, }; use itertools::Itertools; @@ -25,7 +28,6 @@ pub struct Fiber { pub data_stack: Vec, pub call_stack: Vec, pub import_stack: Vec, - pub responsible_stack: Vec, pub heap: Heap, } @@ -51,7 +53,7 @@ pub enum Status { Done, Panicked { reason: String, - responsible: Option, + responsible: hir::Id, }, } @@ -80,10 +82,7 @@ impl InstructionPointer { pub enum ExecutionResult { Finished(Packet), - Panicked { - reason: String, - responsible: Option, - }, + Panicked { reason: String, responsible: Id }, } impl Fiber { @@ -94,47 +93,35 @@ impl Fiber { data_stack: vec![], call_stack: vec![], import_stack: vec![], - responsible_stack: vec![], heap, } } - pub fn new_for_running_closure(heap: Heap, closure: Pointer, arguments: &[Pointer]) -> Self { - let Data::Closure(Closure { id, .. }) = &heap.get(closure).data else { - panic!("Can only use with closures."); - }; - let id = id.clone(); + pub fn new_for_running_closure( + heap: Heap, + closure: Pointer, + arguments: &[Pointer], + responsible: hir::Id, + ) -> Self { + assert!(matches!(heap.get(closure).data, Data::Closure(_))); let mut fiber = Self::new_with_heap(heap); - let runner_closure = fiber.heap.create(Data::Closure(Closure { - id: id.clone(), - captured: vec![], - num_args: 0, - body: vec![ - Instruction::TraceCallStarts { - id, - num_args: arguments.len(), - }, - Instruction::Call { - num_args: arguments.len(), - }, - Instruction::TraceCallEnds, - Instruction::Return, - ], - responsible: None, - })); - fiber.data_stack.extend(arguments); - fiber.data_stack.push(closure); - fiber.data_stack.push(runner_closure); - + let responsible = fiber.heap.create(Data::HirId(responsible)); fiber.status = Status::Running; + + fiber.data_stack.push(closure); + fiber.data_stack.extend(arguments); + fiber.data_stack.push(responsible); fiber.run_instruction( &PanickingUseProvider, &mut DummyTracer.for_fiber(FiberId::root()), - Instruction::Call { num_args: 0 }, + Instruction::Call { + num_args: arguments.len(), + }, ); + fiber } - pub fn new_for_running_module_closure(closure: Closure) -> Self { + pub fn new_for_running_module_closure(module: Module, closure: Closure) -> Self { assert_eq!( closure.captured.len(), 0, @@ -144,9 +131,10 @@ impl Fiber { closure.num_args, 0, "Closure is not a module closure (it has arguments)." ); + let module_id = Id::new(module, vec![]); let mut heap = Heap::default(); let closure = heap.create_closure(closure); - Self::new_for_running_closure(heap, closure, &[]) + Self::new_for_running_closure(heap, closure, &[], module_id) } pub fn tear_down(mut self) -> ExecutionResult { @@ -203,7 +191,7 @@ impl Fiber { self.data_stack.push(address); self.status = Status::Running; } - pub fn complete_parallel_scope(&mut self, result: Result) { + pub fn complete_parallel_scope(&mut self, result: Result) { assert!(matches!(self.status, Status::InParallelScope { .. })); match result { @@ -214,7 +202,7 @@ impl Fiber { self.data_stack.push(value); self.status = Status::Running; } - Err(reason) => self.panic(reason), + Err((reason, responsible)) => self.panic(reason, responsible), } } pub fn complete_try(&mut self, result: ExecutionResult) { @@ -233,14 +221,14 @@ impl Fiber { fn get_from_data_stack(&self, offset: usize) -> Pointer { self.data_stack[self.data_stack.len() - 1 - offset] } - pub fn panic(&mut self, reason: String) { + pub fn panic(&mut self, reason: String, responsible: hir::Id) { assert!(!matches!( self.status, Status::Done | Status::Panicked { .. } )); self.status = Status::Panicked { reason, - responsible: self.responsible_stack.last().cloned(), + responsible, }; } @@ -265,37 +253,6 @@ impl Fiber { }; let instruction = current_body[self.next_instruction.instruction].clone(); - if TRACE { - trace!( - "Instruction pointer: {}:{}", - self.next_instruction.closure, - self.next_instruction.instruction - ); - trace!( - "Data stack: {}", - self.data_stack - .iter() - .map(|it| it.format(&self.heap)) - .join(", ") - ); - trace!( - "Call stack: {}", - self.call_stack - .iter() - .map(|ip| format!("{}:{}", ip.closure, ip.instruction)) - .join(", ") - ); - trace!( - "Responsible stack: {}", - self.responsible_stack - .iter() - .map(|responsible| format!("{}", responsible)) - .join(", ") - ); - trace!("Heap: {:?}", self.heap); - trace!("Running instruction: {instruction:?}"); - } - self.next_instruction.instruction += 1; self.run_instruction(use_provider, tracer, instruction); execution_controller.instruction_executed(); @@ -311,9 +268,33 @@ impl Fiber { tracer: &mut FiberTracer, instruction: Instruction, ) { + if TRACE { + trace!( + "Instruction pointer: {}:{}", + self.next_instruction.closure, + self.next_instruction.instruction, + ); + trace!( + "Data stack: {}", + self.data_stack + .iter() + .map(|it| it.format(&self.heap)) + .join(", "), + ); + trace!( + "Call stack: {}", + self.call_stack + .iter() + .map(|ip| format!("{}:{}", ip.closure, ip.instruction)) + .join(", "), + ); + trace!("Heap: {:?}", self.heap); + trace!("Running instruction: {instruction:?}"); + } + match instruction { Instruction::CreateInt(int) => { - let address = self.heap.create_int(int.into()); + let address = self.heap.create_int(int); self.data_stack.push(address); } Instruction::CreateText(text) => { @@ -348,12 +329,14 @@ impl Fiber { let address = self.heap.create_struct(entries); self.data_stack.push(address); } + Instruction::CreateHirId(id) => { + let address = self.heap.create_hir_id(id); + self.data_stack.push(address); + } Instruction::CreateClosure { - id, num_args, body, captured, - is_curly, } => { let captured = captured .iter() @@ -363,15 +346,9 @@ impl Fiber { self.heap.dup(*address); } let address = self.heap.create_closure(Closure { - id, captured, num_args, body, - responsible: if is_curly { - self.responsible_stack.last().cloned() - } else { - None - }, }); self.data_stack.push(address); } @@ -393,23 +370,27 @@ impl Fiber { self.data_stack.push(top); } Instruction::Call { num_args } => { - let closure_address = self.data_stack.pop().unwrap(); + let responsible_address = self.data_stack.pop().unwrap(); let mut args = vec![]; for _ in 0..num_args { args.push(self.data_stack.pop().unwrap()); } + let callee_address = self.data_stack.pop().unwrap(); + + let callee = self.heap.get(callee_address); args.reverse(); - let object = self.heap.get(closure_address); - match object.data.clone() { + match callee.data.clone() { Data::Closure(Closure { captured, num_args: expected_num_args, - responsible, .. }) => { if num_args != expected_num_args { - self.panic(format!("a closure expected {expected_num_args} parameters, but you called it with {num_args} arguments")); + self.panic( + format!("A closure expected {expected_num_args} parameters, but you called it with {num_args} arguments."), + self.heap.get_hir_id(responsible_address), + ); return; } @@ -419,157 +400,113 @@ impl Fiber { self.heap.dup(captured); } self.data_stack.append(&mut args); - if let Some(responsible) = responsible { - self.responsible_stack.push(responsible); - } + self.data_stack.push(responsible_address); self.next_instruction = - InstructionPointer::start_of_closure(closure_address); + InstructionPointer::start_of_closure(callee_address); } Data::Builtin(Builtin { function: builtin }) => { - self.heap.drop(closure_address); - self.run_builtin_function(&builtin, &args); + self.heap.drop(callee_address); + self.run_builtin_function(&builtin, &args, responsible_address); } _ => { - self.panic(format!( - "you can only call closures and builtins, but you tried to call {}", - object.format(&self.heap), - )); + self.panic( + format!( + "You can only call closures and builtins, but you tried to call {}.", + callee.format(&self.heap), + ), + self.heap.get_hir_id(responsible_address), + ); } }; } Instruction::Return => { - let closure: Closure = self - .heap - .get(self.next_instruction.closure) - .data - .clone() - .try_into() - .unwrap(); - if closure.responsible.is_some() { - self.responsible_stack.pop().unwrap(); - } self.heap.drop(self.next_instruction.closure); let caller = self.call_stack.pop().unwrap(); self.next_instruction = caller; } Instruction::UseModule { current_module } => { + let responsible = self.data_stack.pop().unwrap(); let relative_path = self.data_stack.pop().unwrap(); + match self.use_module(use_provider, current_module, relative_path) { Ok(()) => {} Err(reason) => { - self.panic(reason); + let responsible = self.heap.get_hir_id(responsible); + self.panic(reason, responsible); } } } - Instruction::StartResponsibility(responsible) => { - self.responsible_stack.push(responsible); - } - Instruction::EndResponsibility => { - self.responsible_stack.pop().unwrap(); - } - Instruction::Needs => { + Instruction::Panic => { + let responsible_for_panic = self.data_stack.pop().unwrap(); let reason = self.data_stack.pop().unwrap(); - let condition = self.data_stack.pop().unwrap(); - let reason = match self.heap.get(reason).data.clone() { - Data::Text(reason) => reason.value, - _ => { - self.panic("you can only use text as the reason of a `needs`".to_string()); - return; - } + let reason: Result = self.heap.get(reason).data.clone().try_into(); + let Ok(reason) = reason else { + // Panic expressions only occur inside the needs function + // where we have validated the inputs before calling the + // instructions, or when lowering compiler errors from the + // HIR to the MIR. + panic!("We should never generate a LIR where the reason is not a text."); }; + let responsible = self.heap.get_hir_id(responsible_for_panic); - match self.heap.get(condition).data.clone() { - Data::Symbol(symbol) => match symbol.value.as_str() { - "True" => { - self.data_stack.push(self.heap.create_nothing()); - } - "False" => self.panic(reason), - _ => { - self.panic("needs expect True or False as the condition".to_string()); - } - }, - _ => { - self.panic("needs expect a boolean symbol as the condition".to_string()); - } - } + self.panic(reason.value, responsible); } - Instruction::RegisterFuzzableClosure(id) => { - let closure = *self.data_stack.last().unwrap(); - if !matches!(self.heap.get(closure).data, Data::Closure(_)) { - panic!("Instruction RegisterFuzzableClosure executed, but stack top is not a closure."); + Instruction::ModuleStarts { module } => { + if self.import_stack.contains(&module) { + self.panic( + "Import cycle.".to_string(), + hir::Id::new(module.clone(), vec![]), + ); } - self.heap.dup(closure); - tracer.found_fuzzable_closure(id, closure, &self.heap); + self.import_stack.push(module); } - Instruction::TraceValueEvaluated(id) => { - let value = *self.data_stack.last().unwrap(); - self.heap.dup(value); - tracer.value_evaluated(id, value, &self.heap); + Instruction::ModuleEnds => { + self.import_stack.pop().unwrap(); } - Instruction::TraceCallStarts { id, num_args } => { - let closure = *self.data_stack.last().unwrap(); - self.heap.dup(closure); - + Instruction::TraceCallStarts { num_args } => { + let responsible = self.data_stack.pop().unwrap(); let mut args = vec![]; - let stack_size = self.data_stack.len(); - for i in 0..num_args { - let argument = self.data_stack[stack_size - i - 2]; - self.heap.dup(argument); - args.push(argument); + for _ in 0..num_args { + args.push(self.data_stack.pop().unwrap()); } - args.reverse(); + let callee_address = self.data_stack.pop().unwrap(); + let call_site = self.data_stack.pop().unwrap(); - tracer.call_started(id, closure, args, &self.heap); + args.reverse(); + tracer.call_started(call_site, callee_address, args, responsible, &self.heap); } Instruction::TraceCallEnds => { - let return_value = *self.data_stack.last().unwrap(); - self.heap.dup(return_value); + let return_value = self.data_stack.pop().unwrap(); + tracer.call_ended(return_value, &self.heap); } - Instruction::TraceNeedsStarts { id } => { - let condition = self.data_stack[self.data_stack.len() - 2]; - let reason = self.data_stack[self.data_stack.len() - 1]; - self.heap.dup(condition); - self.heap.dup(reason); - tracer.needs_started(id, condition, reason, &self.heap); - } - Instruction::TraceNeedsEnds => { - let nothing = *self.data_stack.last().unwrap(); - self.heap.dup(nothing); - tracer.needs_ended(); - } - Instruction::TraceModuleStarts { module } => { - if self.import_stack.contains(&module) { - self.panic(format!( - "there's an import cycle ({})", - self.import_stack - .iter() - .skip_while(|it| **it != module) - .chain([&module]) - .map(|module| format!("{module}")) - .join(" β†’ "), - )); - } - self.import_stack.push(module.clone()); - tracer.module_started(module); - } - Instruction::TraceModuleEnds => { - self.import_stack.pop().unwrap(); - let export_map = *self.data_stack.last().unwrap(); - self.heap.dup(export_map); - tracer.module_ended(export_map, &self.heap); + Instruction::TraceExpressionEvaluated => { + let value = self.data_stack.pop().unwrap(); + let expression = self.data_stack.pop().unwrap(); + + tracer.value_evaluated(expression, value, &self.heap); } - Instruction::Error { id, errors } => { - self.panic(format!( - "there {} at {id}: {errors:?}", - if errors.len() == 1 { - "was an error" - } else { - "were errors" - } - )); + Instruction::TraceFoundFuzzableClosure => { + let closure = self.data_stack.pop().unwrap(); + let definition = self.data_stack.pop().unwrap(); + + assert!( + matches!(self.heap.get(closure).data, Data::Closure(_)), + "Instruction TraceFoundFuzzableClosure executed, but stack top is not a closure.", + ); + + tracer.found_fuzzable_closure(definition, closure, &self.heap); } } } } + +trait NthLast { + fn nth_last(&mut self, index: usize) -> Pointer; +} +impl NthLast for Vec { + fn nth_last(&mut self, index: usize) -> Pointer { + self[self.len() - 1 - index] + } +} diff --git a/compiler/src/vm/heap/mod.rs b/compiler/src/vm/heap/mod.rs index 8da0fcfdf..1b35ed2fd 100644 --- a/compiler/src/vm/heap/mod.rs +++ b/compiler/src/vm/heap/mod.rs @@ -8,7 +8,7 @@ pub use self::{ pointer::Pointer, }; use super::ids::ChannelId; -use crate::builtin_functions::BuiltinFunction; +use crate::{builtin_functions::BuiltinFunction, compiler::hir::Id}; use itertools::Itertools; use num_bigint::BigInt; use std::{ @@ -79,6 +79,10 @@ impl Heap { .get_mut(&address) .unwrap_or_else(|| panic!("Couldn't get object {address}.")) } + pub fn get_hir_id(&self, address: Pointer) -> Id { + let Data::HirId(id) = &self.get(address).data else { panic!(); }; + id.clone() + } pub fn dup(&mut self, address: Pointer) { self.dup_by(address, 1); @@ -198,8 +202,8 @@ impl Heap { .map(|(hash, key, value)| (*hash, address_map[key], address_map[value])) .collect(), }), + Data::HirId(id) => Data::HirId(id.clone()), Data::Closure(closure) => Data::Closure(Closure { - id: closure.id.clone(), captured: closure .captured .iter() @@ -207,7 +211,6 @@ impl Heap { .collect(), num_args: closure.num_args, body: closure.body.clone(), - responsible: closure.responsible.clone(), }), Data::Builtin(builtin) => Data::Builtin(builtin.clone()), Data::SendPort(port) => Data::SendPort(SendPort::new(port.channel)), @@ -250,6 +253,9 @@ impl Heap { pub fn create_struct(&mut self, fields: HashMap) -> Pointer { self.create(Data::Struct(Struct::from_fields(self, fields))) } + pub fn create_hir_id(&mut self, id: Id) -> Pointer { + self.create(Data::HirId(id)) + } pub fn create_closure(&mut self, closure: Closure) -> Pointer { self.create(Data::Closure(closure)) } diff --git a/compiler/src/vm/heap/object.rs b/compiler/src/vm/heap/object.rs index a23a2fd4f..3204fee27 100644 --- a/compiler/src/vm/heap/object.rs +++ b/compiler/src/vm/heap/object.rs @@ -3,8 +3,9 @@ use crate::{ builtin_functions::BuiltinFunction, compiler::{ hir::Id, - hir_to_lir::HirToLir, + hir_to_mir::TracingConfig, lir::{Instruction, Lir}, + mir_to_lir::MirToLir, }, database::Database, module::Module, @@ -30,6 +31,7 @@ pub enum Data { Symbol(Symbol), List(List), Struct(Struct), + HirId(Id), Closure(Closure), Builtin(Builtin), SendPort(SendPort), @@ -64,11 +66,9 @@ pub struct Struct { #[derive(Clone)] pub struct Closure { - pub id: Id, pub captured: Vec, pub num_args: usize, pub body: Vec, - pub responsible: Option, } #[derive(Clone)] @@ -150,32 +150,16 @@ impl Struct { } impl Closure { - pub fn of_module_lir(module: Module, lir: Lir) -> Self { + pub fn of_module_lir(lir: Lir) -> Self { Closure { - id: Id::new(module.clone(), vec![]), captured: vec![], num_args: 0, - body: vec![ - Instruction::TraceModuleStarts { - module: module.clone(), - }, - Instruction::CreateClosure { - id: Id::new(module, vec![]), - captured: vec![], - num_args: 0, - body: lir.instructions, - is_curly: true, - }, - Instruction::Call { num_args: 0 }, - Instruction::TraceModuleEnds, - Instruction::Return, - ], - responsible: None, + body: lir.instructions, } } - pub fn of_module(db: &Database, module: Module) -> Option { - let lir = db.lir(module.clone())?; - Some(Self::of_module_lir(module, (*lir).clone())) + pub fn of_module(db: &Database, module: Module, config: TracingConfig) -> Option { + let lir = db.lir(module, config)?; + Some(Self::of_module_lir((*lir).clone())) } } @@ -226,6 +210,7 @@ impl Data { } s.hash(state) } + Data::HirId(id) => id.hash(state), Data::Closure(closure) => { for captured in &closure.captured { captured.hash_with_state(heap, state); @@ -246,6 +231,7 @@ impl Data { (Data::Symbol(a), Data::Symbol(b)) => a.value == b.value, (Data::List(a), Data::List(b)) => a.equals(heap, b), (Data::Struct(a), Data::Struct(b)) => a.equals(heap, b), + (Data::HirId(a), Data::HirId(b)) => a == b, (Data::Closure(_), Data::Closure(_)) => false, (Data::Builtin(a), Data::Builtin(b)) => a.function == b.function, (Data::SendPort(a), Data::SendPort(b)) => a.channel == b.channel, @@ -276,6 +262,7 @@ impl Data { .map(|(key, value)| format!("{}: {}", key, value)) .join(", ") ), + Data::HirId(id) => format!("{id:?}"), Data::Closure(_) => "{…}".to_string(), Data::Builtin(builtin) => format!("builtin{:?}", builtin.function), Data::SendPort(port) => format!("sendPort {:?}", port.channel), @@ -289,6 +276,7 @@ impl Data { | Data::Text(_) | Data::Symbol(_) | Data::Builtin(_) + | Data::HirId(_) | Data::SendPort(_) | Data::ReceivePort(_) => vec![], Data::List(List { items }) => items.clone(), diff --git a/compiler/src/vm/ids.rs b/compiler/src/vm/ids.rs index 13731d185..bd736cbb4 100644 --- a/compiler/src/vm/ids.rs +++ b/compiler/src/vm/ids.rs @@ -1,27 +1,5 @@ -use std::{fmt, marker::PhantomData}; - -#[derive(Clone)] -pub struct IdGenerator { - next_id: usize, - _data: PhantomData, -} -impl IdGenerator { - pub fn start_at(id: usize) -> Self { - Self { - next_id: id, - _data: Default::default(), - } - } - pub fn generate(&mut self) -> T { - let id = self.next_id; - self.next_id += 1; - T::from_usize(id) - } -} -pub trait CountableId { - fn from_usize(id: usize) -> Self; - fn to_usize(&self) -> usize; -} +use crate::utils::CountableId; +use std::fmt; #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct FiberId(usize); diff --git a/compiler/src/vm/mod.rs b/compiler/src/vm/mod.rs index 917c076d9..5111fc21a 100644 --- a/compiler/src/vm/mod.rs +++ b/compiler/src/vm/mod.rs @@ -10,7 +10,7 @@ mod use_module; pub use self::{ channel::Packet, fiber::{ExecutionResult, Fiber}, - heap::{Closure, Heap, Object, Pointer, Struct}, + heap::{Closure, Data, Heap, Object, Pointer, SendPort, Struct}, ids::{ChannelId, FiberId, OperationId}, tracer::{full::FullTracer, Tracer}, }; @@ -20,10 +20,12 @@ use self::{ CombiningExecutionController, ExecutionController, RunLimitedNumberOfInstructions, UseProvider, }, - heap::SendPort, - ids::{CountableId, IdGenerator}, }; -use crate::compiler::hir::Id; +use crate::{ + compiler::hir::Id, + module::Module, + utils::{CountableId, IdGenerator}, +}; use itertools::Itertools; use rand::seq::SliceRandom; use std::{ @@ -112,10 +114,7 @@ pub enum Status { CanRun, WaitingForOperations, Done, - Panicked { - reason: String, - responsible: Option, - }, + Panicked { reason: String, responsible: Id }, } impl FiberId { @@ -155,11 +154,17 @@ impl Vm { heap: Heap, closure: Pointer, arguments: &[Pointer], + responsible: Id, ) { - self.set_up_with_fiber(Fiber::new_for_running_closure(heap, closure, arguments)); + self.set_up_with_fiber(Fiber::new_for_running_closure( + heap, + closure, + arguments, + responsible, + )); } - pub fn set_up_for_running_module_closure(&mut self, closure: Closure) { - self.set_up_with_fiber(Fiber::new_for_running_module_closure(closure)) + pub fn set_up_for_running_module_closure(&mut self, module: Module, closure: Closure) { + self.set_up_with_fiber(Fiber::new_for_running_module_closure(module, closure)) } pub fn tear_down(mut self) -> ExecutionResult { @@ -330,7 +335,12 @@ impl Vm { self.fibers.insert( id, FiberTree::Single(Single { - fiber: Fiber::new_for_running_closure(heap, body, &[nursery_send_port]), + fiber: Fiber::new_for_running_closure( + heap, + body, + &[nursery_send_port], + Id::complicated_responsibility(), + ), parent: Some(fiber_id), }), ); @@ -357,7 +367,12 @@ impl Vm { self.fibers.insert( id, FiberTree::Single(Single { - fiber: Fiber::new_for_running_closure(heap, body, &[]), + fiber: Fiber::new_for_running_closure( + heap, + body, + &[], + Id::complicated_responsibility(), + ), parent: Some(fiber_id), }), ); @@ -425,11 +440,14 @@ impl Vm { ) } } - ExecutionResult::Panicked { reason, .. } => self.finish_parallel( + ExecutionResult::Panicked { + reason, + responsible, + } => self.finish_parallel( tracer, parent, Performer::Fiber(fiber_id), - Err(reason), + Err((reason, responsible)), ), } } @@ -472,7 +490,7 @@ impl Vm { tracer: &mut T, parallel_id: FiberId, cause: Performer, - result: Result<(), String>, + result: Result<(), (String, Id)>, ) { let parallel = self .fibers @@ -540,8 +558,9 @@ impl Vm { if let Performer::Fiber(fiber) = performer { let tree = self.fibers.get_mut(&fiber).unwrap(); tree.as_single_mut().unwrap().fiber.panic( - "the nursery is already dead because the parallel section ended" + "The nursery is already dead because the parallel section ended." .to_string(), + Id::complicated_responsibility(), ); } return; @@ -564,7 +583,12 @@ impl Vm { self.fibers.insert( child_id, FiberTree::Single(Single { - fiber: Fiber::new_for_running_closure(heap, closure_to_spawn, &[]), + fiber: Fiber::new_for_running_closure( + heap, + closure_to_spawn, + &[], + Id::complicated_responsibility(), + ), parent: Some(parent_id), }), ); @@ -583,7 +607,10 @@ impl Vm { tracer, parent_id, performer.clone(), - Err("a nursery received an invalid message".to_string()), + Err(( + "a nursery received an invalid message".to_string(), + Id::complicated_responsibility(), + )), ), } diff --git a/compiler/src/vm/tracer/full.rs b/compiler/src/vm/tracer/full.rs index 952bee1b9..c95d8bea4 100644 --- a/compiler/src/vm/tracer/full.rs +++ b/compiler/src/vm/tracer/full.rs @@ -1,14 +1,8 @@ +use super::{FiberEvent, Tracer, VmEvent}; +use crate::vm::{ChannelId, FiberId, Heap, Pointer}; use itertools::Itertools; - -use crate::{ - compiler::hir::Id, - module::Module, - vm::{ChannelId, FiberId, Heap, Pointer}, -}; use std::{collections::HashMap, fmt, time::Instant}; -use super::{FiberEvent, Tracer, VmEvent}; - /// A full tracer that saves all events that occur with timestamps. #[derive(Clone, Default)] pub struct FullTracer { @@ -53,34 +47,23 @@ pub enum StoredVmEvent { } #[derive(Clone)] pub enum StoredFiberEvent { - ModuleStarted { - module: Module, - }, - ModuleEnded { - export_map: Pointer, - }, ValueEvaluated { - id: Id, + expression: Pointer, value: Pointer, }, FoundFuzzableClosure { - id: Id, + definition: Pointer, closure: Pointer, }, CallStarted { - id: Id, - closure: Pointer, - args: Vec, + call_site: Pointer, + callee: Pointer, + arguments: Vec, + responsible: Pointer, }, CallEnded { return_value: Pointer, }, - NeedsStarted { - id: Id, - condition: Pointer, - reason: Pointer, - }, - NeedsEnded, } impl Tracer for FullTracer { @@ -135,51 +118,52 @@ impl FullTracer { } fn map_fiber_event(&mut self, event: FiberEvent, fiber: FiberId) -> StoredFiberEvent { match event { - FiberEvent::ModuleStarted { module } => StoredFiberEvent::ModuleStarted { module }, - FiberEvent::ModuleEnded { export_map, heap } => { - let export_map = self.import_from_heap(export_map, heap, Some(fiber)); - StoredFiberEvent::ModuleEnded { export_map } - } - FiberEvent::ValueEvaluated { id, value, heap } => { + FiberEvent::ValueEvaluated { + expression, + value, + heap, + } => { + let expression = self.import_from_heap(expression, heap, Some(fiber)); let value = self.import_from_heap(value, heap, Some(fiber)); - StoredFiberEvent::ValueEvaluated { id, value } + StoredFiberEvent::ValueEvaluated { expression, value } } - FiberEvent::FoundFuzzableClosure { id, closure, heap } => { + FiberEvent::FoundFuzzableClosure { + definition, + closure, + heap, + } => { + let definition = self.import_from_heap(definition, heap, Some(fiber)); let closure = self.import_from_heap(closure, heap, Some(fiber)); - StoredFiberEvent::FoundFuzzableClosure { id, closure } + StoredFiberEvent::FoundFuzzableClosure { + definition, + closure, + } } FiberEvent::CallStarted { - id, - closure, - args, + call_site, + callee, + arguments, + responsible, heap, } => { - let closure = self.import_from_heap(closure, heap, Some(fiber)); - let args = args + let call_site = self.import_from_heap(call_site, heap, Some(fiber)); + let callee = self.import_from_heap(callee, heap, Some(fiber)); + let arguments = arguments .into_iter() .map(|arg| self.import_from_heap(arg, heap, Some(fiber))) .collect(); - StoredFiberEvent::CallStarted { id, closure, args } + let responsible = self.import_from_heap(responsible, heap, Some(fiber)); + StoredFiberEvent::CallStarted { + call_site, + callee, + arguments, + responsible, + } } FiberEvent::CallEnded { return_value, heap } => { let return_value = self.import_from_heap(return_value, heap, Some(fiber)); StoredFiberEvent::CallEnded { return_value } } - FiberEvent::NeedsStarted { - id, - condition, - reason, - heap, - } => { - let condition = self.import_from_heap(condition, heap, Some(fiber)); - let reason = self.import_from_heap(reason, heap, Some(fiber)); - StoredFiberEvent::NeedsStarted { - id, - condition, - reason, - } - } - FiberEvent::NeedsEnded => StoredFiberEvent::NeedsEnded, } } } @@ -215,33 +199,24 @@ impl fmt::Debug for FullTracer { StoredVmEvent::InFiber { fiber, event } => format!( "{fiber:?}: {}", match event { - StoredFiberEvent::ModuleStarted { module } => - format!("module {module} started"), - StoredFiberEvent::ModuleEnded { export_map } => format!( - "module ended and exported {}", - export_map.format(&self.heap) - ), - StoredFiberEvent::ValueEvaluated { id, value } => - format!("value {id} is {}", value.format(&self.heap)), - StoredFiberEvent::FoundFuzzableClosure { id, .. } => - format!("found fuzzable closure {id}"), - StoredFiberEvent::CallStarted { id, closure, args } => format!( - "call {id} started: {} {}", - closure.format(&self.heap), - args.iter().map(|arg| arg.format(&self.heap)).join(" ") + StoredFiberEvent::ValueEvaluated { expression, value } => + format!("value {expression} is {}", value.format(&self.heap)), + StoredFiberEvent::FoundFuzzableClosure { definition, .. } => + format!("found fuzzable closure {definition}"), + StoredFiberEvent::CallStarted { + call_site, + callee, + arguments, + responsible, + } => format!( + "call started: {} {} (call site {}, {} is responsible)", + callee.format(&self.heap), + arguments.iter().map(|arg| arg.format(&self.heap)).join(" "), + self.heap.get_hir_id(*call_site), + self.heap.get_hir_id(*responsible), ), StoredFiberEvent::CallEnded { return_value } => format!("call ended: {}", return_value.format(&self.heap)), - StoredFiberEvent::NeedsStarted { - id, - condition, - reason, - } => format!( - "needs {id} started: needs {} {}", - condition.format(&self.heap), - reason.format(&self.heap) - ), - StoredFiberEvent::NeedsEnded => "needs ended".to_string(), } ), } diff --git a/compiler/src/vm/tracer/mod.rs b/compiler/src/vm/tracer/mod.rs index f819a5a7b..179591e21 100644 --- a/compiler/src/vm/tracer/mod.rs +++ b/compiler/src/vm/tracer/mod.rs @@ -3,7 +3,6 @@ pub mod full; pub mod stack_trace; use super::{heap::Pointer, ChannelId, FiberId, Heap}; -use crate::{compiler::hir::Id, module::Module}; /// An event that happened inside a VM. #[derive(Clone)] @@ -39,40 +38,27 @@ pub enum VmEvent<'event> { /// An event that happened inside a fiber. #[derive(Clone)] pub enum FiberEvent<'event> { - ModuleStarted { - module: Module, - }, - ModuleEnded { - export_map: Pointer, - heap: &'event Heap, - }, ValueEvaluated { - id: Id, + expression: Pointer, value: Pointer, heap: &'event Heap, }, FoundFuzzableClosure { - id: Id, + definition: Pointer, closure: Pointer, heap: &'event Heap, }, CallStarted { - id: Id, - closure: Pointer, - args: Vec, + call_site: Pointer, + callee: Pointer, + arguments: Vec, + responsible: Pointer, heap: &'event Heap, }, CallEnded { return_value: Pointer, heap: &'event Heap, }, - NeedsStarted { - id: Id, - condition: Pointer, - reason: Pointer, - heap: &'event Heap, - }, - NeedsEnded, } pub trait Tracer { @@ -126,38 +112,37 @@ impl<'fiber> FiberTracer<'fiber> { }); } - pub fn module_started(&mut self, module: Module) { - self.add(FiberEvent::ModuleStarted { module }); - } - pub fn module_ended(&mut self, export_map: Pointer, heap: &Heap) { - self.add(FiberEvent::ModuleEnded { export_map, heap }); - } - pub fn value_evaluated(&mut self, id: Id, value: Pointer, heap: &Heap) { - self.add(FiberEvent::ValueEvaluated { id, value, heap }); - } - pub fn found_fuzzable_closure(&mut self, id: Id, closure: Pointer, heap: &Heap) { - self.add(FiberEvent::FoundFuzzableClosure { id, closure, heap }); + pub fn value_evaluated(&mut self, expression: Pointer, value: Pointer, heap: &Heap) { + self.add(FiberEvent::ValueEvaluated { + expression, + value, + heap, + }); } - pub fn call_started(&mut self, id: Id, closure: Pointer, args: Vec, heap: &Heap) { - self.add(FiberEvent::CallStarted { - id, + pub fn found_fuzzable_closure(&mut self, definition: Pointer, closure: Pointer, heap: &Heap) { + self.add(FiberEvent::FoundFuzzableClosure { + definition, closure, - args, heap, }); } - pub fn call_ended(&mut self, return_value: Pointer, heap: &Heap) { - self.add(FiberEvent::CallEnded { return_value, heap }); - } - pub fn needs_started(&mut self, id: Id, condition: Pointer, reason: Pointer, heap: &Heap) { - self.add(FiberEvent::NeedsStarted { - id, - condition, - reason, + pub fn call_started( + &mut self, + call_site: Pointer, + callee: Pointer, + args: Vec, + responsible: Pointer, + heap: &Heap, + ) { + self.add(FiberEvent::CallStarted { + call_site, + callee, + arguments: args, + responsible, heap, }); } - pub fn needs_ended(&mut self) { - self.add(FiberEvent::NeedsEnded); + pub fn call_ended(&mut self, return_value: Pointer, heap: &Heap) { + self.add(FiberEvent::CallEnded { return_value, heap }); } } diff --git a/compiler/src/vm/tracer/stack_trace.rs b/compiler/src/vm/tracer/stack_trace.rs index ae001b017..0f5763caf 100644 --- a/compiler/src/vm/tracer/stack_trace.rs +++ b/compiler/src/vm/tracer/stack_trace.rs @@ -7,137 +7,95 @@ use crate::{ compiler::{ ast_to_hir::AstToHir, cst::{Cst, CstDb, CstKind}, - hir::Id, }, database::Database, language_server::utils::LspPositionConversion, - module::Module, }; use itertools::Itertools; use pad::PadStr; use std::collections::HashMap; +use tracing::debug; // Stack traces are a reduced view of the tracing state that represent the stack // trace at a given moment in time. #[derive(Clone)] -pub enum StackEntry { - Call { - id: Id, - closure: Pointer, - args: Vec, - }, - Needs { - id: Id, - condition: Pointer, - reason: Pointer, - }, - Module { - module: Module, - }, +pub struct Call { + pub call_site: Pointer, + pub callee: Pointer, + pub arguments: Vec, + pub responsible: Pointer, } impl FullTracer { - pub fn stack_traces(&self) -> HashMap> { - let mut stacks: HashMap> = HashMap::new(); + pub fn stack_traces(&self) -> HashMap> { + let mut stacks: HashMap> = HashMap::new(); for timed_event in &self.events { let StoredVmEvent::InFiber { fiber, event } = &timed_event.event else { continue; }; let stack = stacks.entry(*fiber).or_default(); match event { - StoredFiberEvent::ModuleStarted { module } => { - stack.push(StackEntry::Module { - module: module.clone(), - }); - } - StoredFiberEvent::ModuleEnded { .. } => { - assert!(matches!(stack.pop().unwrap(), StackEntry::Module { .. })); - } - StoredFiberEvent::CallStarted { id, closure, args } => { - stack.push(StackEntry::Call { - id: id.clone(), - closure: *closure, - args: args.clone(), - }); - } - StoredFiberEvent::CallEnded { .. } => { - assert!(matches!(stack.pop().unwrap(), StackEntry::Call { .. })); - } - StoredFiberEvent::NeedsStarted { - id, - condition, - reason, + StoredFiberEvent::CallStarted { + call_site, + callee, + arguments, + responsible, } => { - stack.push(StackEntry::Needs { - id: id.clone(), - condition: *condition, - reason: *reason, + stack.push(Call { + call_site: *call_site, + callee: *callee, + arguments: arguments.clone(), + responsible: *responsible, }); } - StoredFiberEvent::NeedsEnded => { - assert!(matches!(stack.pop().unwrap(), StackEntry::Needs { .. })); + StoredFiberEvent::CallEnded { .. } => { + stack.pop().unwrap(); } _ => {} } } stacks } - pub fn format_stack_trace(&self, db: &Database, stack: &[StackEntry]) -> String { + pub fn format_stack_trace(&self, db: &Database, stack: &[Call]) -> String { let mut caller_locations_and_calls = vec![]; - for entry in stack.iter().rev() { - let hir_id = match entry { - StackEntry::Call { id, .. } => Some(id), - StackEntry::Needs { id, .. } => Some(id), - StackEntry::Module { .. } => None, - }; - let (cst_id, span) = if let Some(hir_id) = hir_id { - let module = hir_id.module.clone(); - let cst_id = db.hir_to_cst_id(hir_id.clone()); - let cst = cst_id.map(|id| db.find_cst(module.clone(), id)); - let span = cst.map(|cst| { - ( - db.offset_to_lsp(module.clone(), cst.span.start), - db.offset_to_lsp(module.clone(), cst.span.end), - ) - }); - (cst_id, span) - } else { - (None, None) - }; + for Call { + call_site, + callee, + arguments, + .. + } in stack.iter().rev() + { + let hir_id = self.heap.get_hir_id(*call_site); + let module = hir_id.module.clone(); + let cst_id = db.hir_to_cst_id(hir_id.clone()); + let cst = cst_id.map(|id| db.find_cst(module.clone(), id)); + let span = cst.map(|cst| { + ( + db.offset_to_lsp(module.clone(), cst.span.start), + db.offset_to_lsp(module.clone(), cst.span.end), + ) + }); let caller_location_string = format!( - "{} {}", - hir_id - .map(|id| format!("{id}")) - .unwrap_or_else(|| "".to_string()), + "{hir_id} {}", span.map(|((start_line, start_col), (end_line, end_col))| format!( "{}:{} – {}:{}", start_line, start_col, end_line, end_col )) .unwrap_or_else(|| "".to_string()) ); - let call_string = match entry { - StackEntry::Call { closure, args, .. } => format!( - "{} {}", - cst_id - .and_then(|id| { - let cst = db.find_cst(hir_id.unwrap().module.clone(), id); - match cst.kind { - CstKind::Call { receiver, .. } => receiver.extract_receiver_name(), - _ => None, - } - }) - .unwrap_or_else(|| closure.format(&self.heap)), - args.iter().map(|arg| arg.format(&self.heap)).join(" ") - ), - StackEntry::Needs { - condition, reason, .. - } => format!( - "needs {} {}", - condition.format(&self.heap), - reason.format(&self.heap), - ), - StackEntry::Module { module } => format!("module {module}"), - }; + let call_string = format!( + "{} {}", + cst_id + .and_then(|id| { + let cst = db.find_cst(hir_id.module.clone(), id); + match cst.kind { + CstKind::Call { receiver, .. } => receiver.extract_receiver_name(), + _ => None, + } + }) + .unwrap_or_else(|| callee.format(&self.heap)), + arguments.iter().map(|arg| arg.format(&self.heap)).join(" "), + ); caller_locations_and_calls.push((caller_location_string, call_string)); } @@ -145,7 +103,7 @@ impl FullTracer { .iter() .map(|(location, _)| location.len()) .max() - .unwrap(); + .unwrap_or_default(); caller_locations_and_calls .into_iter() @@ -173,10 +131,14 @@ impl FullTracer { } let stack_traces = self.stack_traces(); + debug!("Stack traces: {:?}", stack_traces.keys().collect_vec()); panicking_fiber_chain .into_iter() .rev() - .map(|fiber| self.format_stack_trace(db, &stack_traces[&fiber])) + .map(|fiber| match stack_traces.get(&fiber) { + Some(stack_trace) => self.format_stack_trace(db, stack_trace), + None => "(there's no stack trace for this fiber)".to_string(), + }) .join("\n(fiber boundary)\n") } } diff --git a/compiler/src/vm/use_module.rs b/compiler/src/vm/use_module.rs index 396132f3c..a714aa597 100644 --- a/compiler/src/vm/use_module.rs +++ b/compiler/src/vm/use_module.rs @@ -1,12 +1,12 @@ use super::{ context::{PanickingUseProvider, UseProvider, UseResult}, - heap::{Closure, Heap, Pointer, Text}, + heap::{Closure, Pointer, Text}, tracer::{dummy::DummyTracer, Tracer}, Fiber, FiberId, }; use crate::{ compiler::lir::Instruction, - module::{Module, ModuleKind}, + module::{Module, UsePath}, }; use itertools::Itertools; @@ -17,10 +17,17 @@ impl Fiber { current_module: Module, relative_path: Pointer, ) -> Result<(), String> { - let target = UsePath::parse(&self.heap, relative_path)?; + let path: Text = self + .heap + .get(relative_path) + .data + .clone() + .try_into() + .map_err(|_| "the path has to be a text".to_string())?; + let target = UsePath::parse(path.value.as_str())?; let module = target.resolve_relative_to(current_module)?; - match use_provider.use_module(module.clone())? { + match use_provider.use_module(module)? { UseResult::Asset(bytes) => { let bytes = bytes .iter() @@ -30,7 +37,7 @@ impl Fiber { self.data_stack.push(list); } UseResult::Code(lir) => { - let module_closure = Closure::of_module_lir(module, lir); + let module_closure = Closure::of_module_lir(lir); let address = self.heap.create_closure(module_closure); self.data_stack.push(address); self.run_instruction( @@ -44,64 +51,3 @@ impl Fiber { Ok(()) } } - -struct UsePath { - parent_navigations: usize, - path: String, -} -impl UsePath { - const PARENT_NAVIGATION_CHAR: char = '.'; - - fn parse(heap: &Heap, path: Pointer) -> Result { - let path: Text = heap - .get(path) - .data - .clone() - .try_into() - .map_err(|_| "the path has to be a text".to_string())?; - let mut path = path.value.as_str(); - let parent_navigations = { - let mut navigations = 0; - while path.starts_with(UsePath::PARENT_NAVIGATION_CHAR) { - navigations += 1; - path = &path[UsePath::PARENT_NAVIGATION_CHAR.len_utf8()..]; - } - match navigations { - 0 => return Err("the target must start with at least one dot".to_string()), - i => i - 1, // two dots means one parent navigation - } - }; - let path = { - if !path.chars().all(|c| c.is_ascii_alphanumeric() || c == '.') { - return Err("the target name can only contain letters and dots".to_string()); - } - path.to_string() - }; - Ok(UsePath { - parent_navigations, - path, - }) - } - - fn resolve_relative_to(&self, current_module: Module) -> Result { - let kind = if self.path.contains('.') { - ModuleKind::Asset - } else { - ModuleKind::Code - }; - - let mut path = current_module.path; - for _ in 0..self.parent_navigations { - if path.pop().is_none() { - return Err("too many parent navigations".to_string()); - } - } - path.push(self.path.to_string()); - - Ok(Module { - package: current_module.package, - path: path.clone(), - kind, - }) - } -} diff --git a/language.md b/language.md index 5f9775f70..9f471046c 100644 --- a/language.md +++ b/language.md @@ -211,10 +211,10 @@ Variables only exist until the end of the scope they're defined in. ```candy foo = - bar = hello # error because hello doesn't exist yet + bar = hello # Error because `hello` doesn't exist yet. hello = 5 4 -bar = hello # error because hello doesn't exist anymore +bar = hello # Error because `hello` doesn't exist anymore. ``` Variables at the file's top level are visible to the module system (β€œpublic”) if they are declared using `:=`. @@ -249,7 +249,7 @@ Grouping using parentheses is only necessary if you have nested calls. ``` five = identity 5 five = identity (identity 5) -error = identity identity 5 # error because the first `identity` is called with two arguments +error = identity identity 5 # Error because the first `identity` is called with two arguments. ``` You can also split arguments across multiple lines using indentation. @@ -422,9 +422,9 @@ Optionally, you can pass a reason to the `needs` function that describes why you ```candy foo a = - needs (core.int.is a) "life's not fair" + needs (core.int.is a) "Life's not fair." -foo Hey # Calling `foo Hey` panics because life's not fair. +foo Hey # Calling `foo Hey` panics: Life's not fair. ``` Here are some recommended guidelines for writing reasons: diff --git a/packages/Core/bool.candy b/packages/Core/bool.candy index 52e328fe4..8dad1488d 100644 --- a/packages/Core/bool.candy +++ b/packages/Core/bool.candy @@ -4,9 +4,9 @@ type = use "..type" is value := ✨.ifElse - ✨.equals value True + equals value True { True } - { ✨.equals value False } + { equals value False } not a := needs (is a) diff --git a/packages/Core/concurrency.candy b/packages/Core/concurrency.candy index 83a5d16ae..32cdc733b 100644 --- a/packages/Core/concurrency.candy +++ b/packages/Core/concurrency.candy @@ -4,6 +4,8 @@ if = (use "..conditionals").if parallel body := needs (function.is1 body) "The `body` should be a function taking a nursery." + ## TODO: Propagate the responsibility here. `body` may contain a `needs` that + ## always fails. That shouldn't be our fault, but our caller's. ✨.parallel body async nursery body := diff --git a/packages/Core/function.candy b/packages/Core/function.candy index a04618f37..55281351f 100644 --- a/packages/Core/function.candy +++ b/packages/Core/function.candy @@ -14,7 +14,7 @@ is3 value := ✨.ifElse (is value) { equals (getArgumentCount value) 3 } { False is4 value := ✨.ifElse (is value) { equals (getArgumentCount value) 4 } { False } is5 value := ✨.ifElse (is value) { equals (getArgumentCount value) 5 } { False } -run body = +run body := # A function that runs the given `body` with no arguments and returns its result. # # ``` @@ -25,7 +25,7 @@ run body = needs (is0 body) ✨.functionRun body -doNotRun body = +doNotRun body := # A function that doesn't run the given `body`. # # This function is useful during development to mark code that's not ready to run yet. In contrast diff --git a/packages/Core/int.candy b/packages/Core/int.candy index 7113af5ac..5f1ad25de 100644 --- a/packages/Core/int.candy +++ b/packages/Core/int.candy @@ -72,7 +72,7 @@ isNonNegative value := value | isNegative | bool.not absolute value := needs (is value) - conditionals.ifElse (isNegative value) (negate value) value + conditionals.ifElse (isNegative value) { negate value } { value } fitsInRustU32 value := needs (is value) @@ -81,7 +81,7 @@ fitsInRustU32 value := rustU32Max = 4294967295 # https://doc.rust-lang.org/std/primitive.u32.html#associatedconstant.MAX value | isLessThan rustU32Max -fitsInRustU128 value = +fitsInRustU128 value := needs (is value) needs (isNonNegative value) @@ -136,14 +136,14 @@ min valueA valueB := needs (is valueA) needs (is valueB) conditionals.ifElse - valueA | isLessThanOrEqualTo valueB + (valueA | isLessThanOrEqualTo valueB) { valueA } { valueB } max valueA valueB := needs (is valueA) needs (is valueB) conditionals.ifElse - valueA | isGreaterThanOrEqualTo valueB + (valueA | isGreaterThanOrEqualTo valueB) { valueA } { valueB } coerceAtLeast value minimum := diff --git a/packages/Core/text.candy b/packages/Core/text.candy index 5f4f57add..ec4dbaf51 100644 --- a/packages/Core/text.candy +++ b/packages/Core/text.candy @@ -31,11 +31,12 @@ getRange text startInclusive endExclusive := needs (int.is startInclusive) needs (int.isNonNegative startInclusive) - needs (int.is endExclusive) + needs (int.is endExclusive) needs (int.isNonNegative endExclusive) + needs (int.isLessThanOrEqualTo startInclusive endExclusive) - needs (int.isLessThan endExclusive (length text)) + needs (int.isLessThanOrEqualTo endExclusive (length text)) # TODO: Support ranges when we have them. text | ✨.textGetRange startInclusive endExclusive @@ -62,7 +63,7 @@ removePrefix text prefix := needs (is text) needs (is prefix) conditionals.ifElse - text | startsWith prefix + (text | startsWith prefix) { text | getRange (prefix | length) (text | length) } { text } removeSuffix text suffix := diff --git a/packages/benchmark.candy b/packages/benchmark.candy deleted file mode 100644 index a5e8da4ba..000000000 --- a/packages/benchmark.candy +++ /dev/null @@ -1,38 +0,0 @@ -# Run or benchmark by navigating to the Candy folder and then running this command: -# `cargo build --release --manifest-path=compiler/Cargo.toml -- run packages/benchmark.candy` -# `cargo build --release --manifest-path=compiler/Cargo.toml && time target/release/candy run packages/benchmark.candy` - -core = use "..Core" - -fibRec = { fibRec n -> - core.ifElse (n | core.int.isLessThan 2) { n } { - fibRec fibRec (n | core.int.subtract 1) - | core.int.add (fibRec fibRec (n | core.int.subtract 2)) - } -} -fib n = - needs (core.int.is n) - fibRec fibRec n -twentyOne := fib 8 - -main := { environment -> - print message = - needs (core.text.is message) - core.channel.send environment.stdout message - - print "Hello, world!" - - core.parallel { nursery -> - nursery - | core.async { - print "Hello from fiber!" - "Hello, async await!" - } - | core.await - | print - - nursery | core.async { print "Kiwi" } - nursery | core.async { print "Banana" } - print "Hi" - } -} diff --git a/packages/examples/benchmark.candy b/packages/examples/benchmark.candy new file mode 100644 index 000000000..0ee71ea6e --- /dev/null +++ b/packages/examples/benchmark.candy @@ -0,0 +1,16 @@ +# Run or benchmark by navigating to the Candy folder and then running this command: +# `cargo build --release --manifest-path=compiler/Cargo.toml -- run packages/examples/benchmark.candy` +# `cargo build --release --manifest-path=compiler/Cargo.toml && time target/release/candy run packages/examples/benchmark.candy` + +core = use "...Core" +channel = core.channel +text = core.text + +echo = use "..echo" +fibonacci = use "..fibonacci" + +main := { environment -> + print message = + needs (text.is message) + environment.stdout | channel.send message +} diff --git a/packages/examples/echo.candy b/packages/examples/echo.candy new file mode 100644 index 000000000..6afec1d29 --- /dev/null +++ b/packages/examples/echo.candy @@ -0,0 +1,25 @@ +# Echoes whatever you send to it on stdin. + +core = use "...Core" +async = core.async +await = core.await +channel = core.channel +run = core.function.run +text = core.text + +main := { environment -> + print message = + needs (text.is message) + environment.stdout | channel.send message + + read = { + response = channel.create 1 + environment.stdin | channel.send response.sendPort + await response.receivePort + } + + print "Hello, world!" + foo = run read + print "Echo:" + print foo +} diff --git a/packages/examples/fibonacci.candy b/packages/examples/fibonacci.candy new file mode 100644 index 000000000..c2a9a9a3f --- /dev/null +++ b/packages/examples/fibonacci.candy @@ -0,0 +1,16 @@ +core = use "...Core" +ifElse = core.ifElse +int = core.int + +fibRec = { fibRec n -> + ifElse (n | int.isLessThan 2) { n } { + fibRec fibRec (n | int.subtract 1) + | int.add (fibRec fibRec (n | int.subtract 2)) + } +} +fib n = + needs (int.is n) + fibRec fibRec n +twentyOne := fib 8 + +main environment := Nothing diff --git a/packages/examples/higherLower.candy b/packages/examples/higherLower.candy new file mode 100644 index 000000000..9cc7bc363 --- /dev/null +++ b/packages/examples/higherLower.candy @@ -0,0 +1,48 @@ +# Repeatedly asks you for a guess and tells you whether the secret number is +# higher or lower. Exits once you found the secret number. + +core = use "...Core" +async = core.async +await = core.await +channel = core.channel +equals = core.equals +ifElse = core.ifElse +int = core.int +result = core.result +run = core.function.run +text = core.text + +main := { environment -> + print message = + needs (text.is message) + environment.stdout | channel.send message + + read = { + response = channel.create 1 + environment.stdin | channel.send response.sendPort + await response.receivePort + } + + print "Welcome to the number guessing game!" + + guessRec = { guessRec correct -> + print "What's your guess?" + guess = run read | text.trim | int.parse + + guess | result.mapOrElse { guess -> + ifElse (equals guess correct) { print "You did it!" } { + ifElse (int.isLessThan correct guess) { + print "Lower!" + } { + print "Higher!" + } + guessRec guessRec correct + } + } { error -> + print "Your guess must be a number." + guess guessRec correct + } + } + + guessRec guessRec 42 +} diff --git a/packages/examples/parallel.candy b/packages/examples/parallel.candy new file mode 100644 index 000000000..c81c2a880 --- /dev/null +++ b/packages/examples/parallel.candy @@ -0,0 +1,26 @@ +core = use "..Core" +async = core.async +await = core.await +channel = core.channel +parallel = core.parallel +text = core.text + +main := { environment -> + print message = + needs (text.is message) + environment.stdout | channel.send message + + parallel { nursery -> + nursery + | async { + print "Hello from fiber!" + "Hello, async await!" + } + | await + | print + + nursery | async { print "Kiwi" } + nursery | async { print "Banana" } + print "Hi" + } +} diff --git a/vscode_extension/package.json b/vscode_extension/package.json index 0c8ad30a5..e5278e7af 100644 --- a/vscode_extension/package.json +++ b/vscode_extension/package.json @@ -67,8 +67,9 @@ "[candy]": { "editor.detectIndentation": false, "editor.insertSpaces": true, - "editor.tabSize": 2, - "editor.suggest.insertMode": "replace" + "editor.semanticHighlighting.enabled": true, + "editor.suggest.insertMode": "replace", + "editor.tabSize": 2 } }, "languages": [