From d4305b3629982ae1ab6a99ae26e0e445a59d181b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Fri, 29 Mar 2024 18:26:43 +0100 Subject: [PATCH] Basic implementation of if/else without conditional operators --- grammar.md | 57 +++++++++++ src/base/diagnostic.rs | 8 ++ src/base/mod.rs | 2 +- src/compile/compiler.rs | 65 +++++++++++-- src/lexical/token.rs | 65 +++++++++++-- src/lib.rs | 4 +- src/syntax/error.rs | 5 +- src/syntax/syntax_tree/declaration.rs | 2 +- src/syntax/syntax_tree/expression.rs | 109 +++++++++++++++++++++ src/syntax/syntax_tree/statement.rs | 135 ++++++++++++++++++++++++-- 10 files changed, 424 insertions(+), 28 deletions(-) create mode 100644 grammar.md diff --git a/grammar.md b/grammar.md new file mode 100644 index 0000000..c0a8ecf --- /dev/null +++ b/grammar.md @@ -0,0 +1,57 @@ +# Grammar of the shulkerscript language + +## Table of contents + +### Program +```ebnf +Program: Declaration*; +``` + +### Declaration +```ebnf +Declaration: FunctionDeclaration; +``` + +### FunctionDeclaration +```ebnf +Function: + 'fn' Identifier '(' ParameterList? ')' Block + ; +ParameterList: + Identifier (',' Identifier)* ','? + ; +``` + +### Statement +```ebnf +Statement: + Block + | LiteralCommand + | Conditional + ; +``` + +### Block +```ebnf +Block: '{' Statement* '}'; +``` + +### Conditional +```ebnf +Conditional: + 'if' ParenthizedCondition Block ('else' Block)? + ; +``` + +### ParenthizedCondition +```ebnf +ParenthizedCondition: + '(' Condition ')' + ; +``` + +### Condition +```ebnf +Condition: + StringLiteral +``` \ No newline at end of file diff --git a/src/base/diagnostic.rs b/src/base/diagnostic.rs index 420aa00..38e8ae0 100644 --- a/src/base/diagnostic.rs +++ b/src/base/diagnostic.rs @@ -3,3 +3,11 @@ pub trait Handler { /// Receive an error and handles it. fn receive(&self, error: T); } + +/// Is a struct that implements [`Handler`] trait by doing nothing with the errors. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] +pub struct Dummy; + +impl Handler for Dummy { + fn receive(&self, _error: T) {} +} diff --git a/src/base/mod.rs b/src/base/mod.rs index 80c4b19..a357f24 100644 --- a/src/base/mod.rs +++ b/src/base/mod.rs @@ -7,6 +7,6 @@ mod error; pub use error::{Error, Result}; mod diagnostic; -pub use diagnostic::Handler; +pub use diagnostic::{Dummy, Handler}; pub mod log; diff --git a/src/compile/compiler.rs b/src/compile/compiler.rs index e81388a..384e805 100644 --- a/src/compile/compiler.rs +++ b/src/compile/compiler.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; -use shulkerbox::datapack::{Command, Datapack}; +use shulkerbox::datapack::{Command, Datapack, Execute}; use crate::{ base::{source_file::SourceElement, Handler}, @@ -63,14 +63,61 @@ impl Compiler { fn compile_function(statements: &[Statement]) -> Vec { let mut commands = Vec::new(); for statement in statements { - match statement { - Statement::LiteralCommand(literal_command) => { - commands.push(literal_command.clean_command().into()); - } - Statement::Block(_) => { - unreachable!("Only literal commands are allowed in functions at this time.") - } - } + commands.extend(compile_statement(statement)); } commands } + +fn compile_statement(statement: &Statement) -> Option { + match statement { + Statement::LiteralCommand(literal_command) => Some(literal_command.clean_command().into()), + Statement::Block(_) => { + unreachable!("Only literal commands are allowed in functions at this time.") + } + Statement::Conditional(cond) => { + let (_, cond, block, el) = cond.clone().dissolve(); + let (_, cond, _) = cond.dissolve(); + let statements = block.statements(); + + let el = el + .and_then(|el| { + let (_, block) = el.dissolve(); + let statements = block.statements(); + if statements.is_empty() { + None + } else if statements.len() == 1 { + compile_statement(&statements[0]).map(|cmd| Execute::Run(Box::new(cmd))) + } else { + let commands = statements.iter().filter_map(compile_statement).collect(); + Some(Execute::Runs(commands)) + } + }) + .map(Box::new); + + if statements.is_empty() { + if el.is_none() { + None + } else { + Some(Command::Execute(Execute::If( + cond.value().string_content().into(), + Box::new(Execute::Runs(Vec::new())), + el, + ))) + } + } else { + let run = if statements.len() > 1 { + let commands = statements.iter().filter_map(compile_statement).collect(); + Execute::Runs(commands) + } else { + Execute::Run(Box::new(compile_statement(&statements[0])?)) + }; + + Some(Command::Execute(Execute::If( + cond.value().string_content().into(), + Box::new(run), + el, + ))) + } + } + } +} diff --git a/src/lexical/token.rs b/src/lexical/token.rs index 01979bd..761ff8e 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -74,7 +74,8 @@ pub enum Token { Punctuation(Punctuation), Numeric(Numeric), Comment(Comment), - LiteralCommand(LiteralCommand), + CommandLiteral(CommandLiteral), + StringLiteral(StringLiteral), } impl Token { @@ -88,7 +89,8 @@ impl Token { Self::Punctuation(token) => &token.span, Self::Numeric(token) => &token.span, Self::Comment(token) => &token.span, - Self::LiteralCommand(token) => &token.span, + Self::CommandLiteral(token) => &token.span, + Self::StringLiteral(token) => &token.span, } } } @@ -102,7 +104,8 @@ impl SourceElement for Token { Self::Punctuation(token) => token.span(), Self::Numeric(token) => token.span(), Self::Comment(token) => token.span(), - Self::LiteralCommand(token) => token.span(), + Self::CommandLiteral(token) => token.span(), + Self::StringLiteral(token) => token.span(), } } } @@ -177,6 +180,28 @@ impl SourceElement for Numeric { } } +/// Represents a hardcoded string literal value in the source code. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct StringLiteral { + /// Is the span that makes up the token. + pub span: Span, +} + +impl StringLiteral { + /// Returns the string without the leading and trailing double quotes. + #[must_use] + pub fn string_content(&self) -> &str { + let string = self.span.str(); + &string[1..string.len() - 1] + } +} + +impl SourceElement for StringLiteral { + fn span(&self) -> Span { + self.span.clone() + } +} + /// Is an enumeration representing the two kinds of comments in the Flux programming language. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum CommentKind { @@ -205,17 +230,17 @@ impl SourceElement for Comment { /// Represents a hardcoded literal command in the source code. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct LiteralCommand { +pub struct CommandLiteral { /// Span that makes up the token. pub span: Span, } -impl SourceElement for LiteralCommand { +impl SourceElement for CommandLiteral { fn span(&self) -> Span { self.span.clone() } } -impl LiteralCommand { +impl CommandLiteral { /// Returns the command without the leading slash. #[must_use] pub fn clean_command(&self) -> &str { @@ -367,7 +392,7 @@ impl Token { } // When there is no second slash and at the start of a line else if prev_token.map_or(true, |token| token.span().str().contains('\n')) { - Ok(Self::handle_literal_command(iter, start)) + Ok(Self::handle_command_literal(iter, start)) } // Just a single slash punctuation else { @@ -390,11 +415,31 @@ impl Token { .into() } + /// Handles a sequence of characters that are enclosed in double quotes + fn handle_string_literal(iter: &mut SourceIterator, start: usize) -> Self { + let mut is_escaped = false; + + for (_, character) in iter.by_ref() { + if character == '\\' { + is_escaped = !is_escaped; + } else if character == '"' && !is_escaped { + break; + } else { + is_escaped = false; + } + } + + StringLiteral { + span: Self::create_span(start, iter), + } + .into() + } + /// Handles a command that is preceeded by a slash - fn handle_literal_command(iter: &mut SourceIterator, start: usize) -> Self { + fn handle_command_literal(iter: &mut SourceIterator, start: usize) -> Self { Self::walk_iter(iter, |c| !(c.is_whitespace() && c.is_ascii_control())); - LiteralCommand { + CommandLiteral { span: Self::create_span(start, iter), } .into() @@ -431,6 +476,8 @@ impl Token { // Found comment/single slash punctuation else if character == '/' { Self::handle_comment(iter, start, character, prev_token, handler) + } else if character == '"' { + Ok(Self::handle_string_literal(iter, start)) } // Found numeric literal else if character.is_ascii_digit() { diff --git a/src/lib.rs b/src/lib.rs index bfcddf3..dfee576 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -36,6 +36,8 @@ pub fn compile(path: PathBuf) -> Result { let tokens = TokenStream::tokenize(&source_file, &printer); + // println!("tokens: {tokens:#?}"); + if printer.has_printed() { return Err(Error::Other( "An error occurred while tokenizing the source code.", @@ -47,7 +49,7 @@ pub fn compile(path: PathBuf) -> Result { "An error occured while parsing the source code.", ))?; - // println!("result: {result:#?}"); + // println!("program: {program:#?}"); let mut compiler = Compiler::new(); let datapack = compiler.compile(&program, &printer)?; diff --git a/src/syntax/error.rs b/src/syntax/error.rs index 0afbb0c..489821c 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -16,6 +16,7 @@ pub enum SyntaxKind { Identifier, Declaration, Numeric, + StringLiteral, Statement, Expression, Type, @@ -39,6 +40,7 @@ impl Display for UnexpectedSyntax { SyntaxKind::Keyword(keyword) => format!("a keyword token `{}`", keyword.as_str()), SyntaxKind::Declaration => "a declaration token".to_string(), SyntaxKind::Numeric => "a numeric token".to_string(), + SyntaxKind::StringLiteral => "a string literal".to_string(), SyntaxKind::Statement => "a statement syntax".to_string(), SyntaxKind::Expression => "an expression syntax".to_string(), SyntaxKind::Type => "a type syntax".to_string(), @@ -54,7 +56,8 @@ impl Display for UnexpectedSyntax { format!("a punctuation token `{}`", punctuation.punctuation) } Some(Token::Numeric(..)) => "a numeric token".to_string(), - Some(Token::LiteralCommand(..)) => "a literal command token".to_string(), + Some(Token::CommandLiteral(..)) => "a literal command token".to_string(), + Some(Token::StringLiteral(..)) => "a string literal token".to_string(), None => "EOF".to_string(), }; diff --git a/src/syntax/syntax_tree/declaration.rs b/src/syntax/syntax_tree/declaration.rs index 895ef24..026b4fc 100644 --- a/src/syntax/syntax_tree/declaration.rs +++ b/src/syntax/syntax_tree/declaration.rs @@ -38,7 +38,7 @@ impl SourceElement for Declaration { /// /// ``` ebnf /// Function: -/// 'function' Identifier '(' ParameterList? ')' Block +/// 'fn' Identifier '(' ParameterList? ')' Block /// ; /// /// ParameterList: diff --git a/src/syntax/syntax_tree/expression.rs b/src/syntax/syntax_tree/expression.rs index e1371f1..266ee86 100644 --- a/src/syntax/syntax_tree/expression.rs +++ b/src/syntax/syntax_tree/expression.rs @@ -1 +1,110 @@ //! Syntax tree nodes for expressions. + +use getset::Getters; + +use crate::{ + base::{ + source_file::{SourceElement, Span}, + Handler, + }, + lexical::{ + token::{Punctuation, StringLiteral, Token}, + token_stream::Delimiter, + }, + syntax::{ + error::{Error, UnexpectedSyntax}, + parser::{Parser, Reading}, + }, +}; + +/// Syntax Synopsis: +/// +/// ``` ebnf +/// ParenthesizedCondition: +/// '(' Condition ')'; +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] +pub struct ParenthesizedCondition { + /// The opening parenthesis. + #[get = "pub"] + pub open_paren: Punctuation, + /// The condition within the parenthesis. + #[get = "pub"] + pub condition: Box, + /// The closing parenthesis. + #[get = "pub"] + pub close_paren: Punctuation, +} + +impl ParenthesizedCondition { + /// Dissolves the parenthesized condition into its components + #[must_use] + pub fn dissolve(self) -> (Punctuation, Condition, Punctuation) { + (self.open_paren, *self.condition, self.close_paren) + } +} + +impl SourceElement for ParenthesizedCondition { + fn span(&self) -> Span { + self.open_paren + .span() + .join(&self.close_paren.span()) + .expect("The span of the parenthesis is invalid.") + } +} + +/// Syntax Synopsis: +/// +/// ``` ebnf +/// Condition: StringLiteral; +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] +pub struct Condition { + /// The value of the condition. + #[get = "pub"] + pub value: StringLiteral, +} + +impl SourceElement for Condition { + fn span(&self) -> Span { + self.value.span() + } +} + +impl<'a> Parser<'a> { + /// Parses a [`Condition`]. + pub fn parse_condition(&mut self, handler: &impl Handler) -> Option { + match self.next_significant_token() { + Reading::Atomic(Token::StringLiteral(s)) => Some(Condition { value: s }), + unexpected => { + // make progress + self.forward(); + + handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { + expected: crate::syntax::error::SyntaxKind::Expression, + found: unexpected.into_token(), + })); + + None + } + } + } + + /// Parses a [`ParenthesizedCondition`]. + pub fn parse_parenthesized_condition( + &mut self, + handler: &impl Handler, + ) -> Option { + let token_tree = self.step_into( + Delimiter::Parenthesis, + |parser| parser.parse_condition(handler), + handler, + )?; + + Some(ParenthesizedCondition { + open_paren: token_tree.open, + condition: Box::new(token_tree.tree?), + close_paren: token_tree.close, + }) + } +} diff --git a/src/syntax/syntax_tree/statement.rs b/src/syntax/syntax_tree/statement.rs index bf07b9e..08d987d 100644 --- a/src/syntax/syntax_tree/statement.rs +++ b/src/syntax/syntax_tree/statement.rs @@ -8,7 +8,7 @@ use crate::{ Handler, }, lexical::{ - token::{LiteralCommand, Punctuation, Token}, + token::{CommandLiteral, Keyword, KeywordKind, Punctuation, Token}, token_stream::Delimiter, }, syntax::{ @@ -17,20 +17,22 @@ use crate::{ }, }; +use super::expression::ParenthesizedCondition; + /// Syntax Synopsis: /// /// ``` ebnf /// Statement: /// Block -/// | Conditional +/// | LiteralCommand /// ; /// ``` #[allow(missing_docs)] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Statement { Block(Block), - LiteralCommand(LiteralCommand), - // Conditional(Conditional), + LiteralCommand(CommandLiteral), + Conditional(Conditional), } impl SourceElement for Statement { @@ -38,7 +40,7 @@ impl SourceElement for Statement { match self { Self::Block(block) => block.span(), Self::LiteralCommand(literal_command) => literal_command.span(), - //Self::Conditional(conditional) => conditional.span(), + Self::Conditional(conditional) => conditional.span(), } } } @@ -80,6 +82,87 @@ impl SourceElement for Block { } } +/// Syntax Synopsis: +/// +/// ``` ebnf +/// Conditional: +/// 'if' ParenthizedCondition Block ('else' Block)? +/// ; +/// ```` +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] +pub struct Conditional { + /// The `if` keyword. + #[get = "pub"] + if_keyword: Keyword, + /// The condition of the conditional. + #[get = "pub"] + condition: ParenthesizedCondition, + /// The block of the conditional. + #[get = "pub"] + block: Block, + /// The `else` statement. + #[get = "pub"] + r#else: Option, +} + +impl Conditional { + /// Dissolves the [`Conditional`] into its components. + #[must_use] + pub fn dissolve(self) -> (Keyword, ParenthesizedCondition, Block, Option) { + (self.if_keyword, self.condition, self.block, self.r#else) + } +} + +impl SourceElement for Conditional { + fn span(&self) -> Span { + self.r#else.as_ref().map_or_else( + || { + self.if_keyword + .span() + .join(&self.block.span()) + .expect("The span of the conditional is invalid.") + }, + |r#else| { + self.if_keyword + .span() + .join(&r#else.span()) + .expect("The span of the else conditional is invalid.") + }, + ) + } +} + +/// Syntax Synopsis: +/// +/// ``` ebnf +/// Else: +/// 'else' Block +/// ; +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] +pub struct Else { + /// The `else` keyword. + #[get = "pub"] + else_keyword: Keyword, + /// The block of the else statement. + #[get = "pub"] + block: Box, +} + +impl Else { + /// Dissolves the [`Else`] into its components. + #[must_use] + pub fn dissolve(self) -> (Keyword, Box) { + (self.else_keyword, self.block) + } +} + +impl SourceElement for Else { + fn span(&self) -> Span { + self.else_keyword.span().join(&self.block.span()).unwrap() + } +} + impl<'a> Parser<'a> { /// Parses a [`Block`]. pub fn parse_block(&mut self, handler: &impl Handler) -> Option { @@ -123,7 +206,7 @@ impl<'a> Parser<'a> { pub fn parse_statement(&mut self, handler: &impl Handler) -> Option { match self.stop_at_significant() { // variable declaration - Reading::Atomic(Token::LiteralCommand(command)) => { + Reading::Atomic(Token::CommandLiteral(command)) => { self.forward(); Some(Statement::LiteralCommand(command)) } @@ -134,6 +217,46 @@ impl<'a> Parser<'a> { Some(Statement::Block(block)) } + // conditional statement + Reading::Atomic(Token::Keyword(if_keyword)) + if if_keyword.keyword == KeywordKind::If => + { + // eat the if keyword + self.forward(); + + let condition = self.parse_parenthesized_condition(handler)?; + + let block = self.parse_block(handler)?; + + match self.stop_at_significant() { + // else statement + Reading::Atomic(Token::Keyword(else_keyword)) + if else_keyword.keyword == KeywordKind::Else => + { + // eat the else keyword + self.forward(); + + let else_block = self.parse_block(handler)?; + + Some(Statement::Conditional(Conditional { + if_keyword, + condition, + block, + r#else: Some(Else { + else_keyword, + block: Box::new(else_block), + }), + })) + } + // no else statement + _ => Some(Statement::Conditional(Conditional { + if_keyword, + condition, + block, + r#else: None, + })), + } + } // other unexpected => { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax {