From 0d0df920ee9e5d9ed77959e73c546d197119dd0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20H=C3=B6lting?= <87192362+moritz-hoelting@users.noreply.github.com> Date: Mon, 24 Feb 2025 23:02:07 +0100 Subject: [PATCH] implement variable declaration parsing --- .github/workflows/test.yml | 2 +- src/lexical/token.rs | 103 ++++++++++++++---- src/semantic/mod.rs | 94 +++++++++++++--- src/syntax/error.rs | 9 +- src/syntax/parser.rs | 10 +- src/syntax/syntax_tree/expression.rs | 31 +++++- src/syntax/syntax_tree/statement.rs | 157 ++++++++++++++++++++++++--- src/transpile/transpiler.rs | 25 ++++- 8 files changed, 363 insertions(+), 68 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 86d120c..0c09f89 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,4 +16,4 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - run: cargo test --verbose + - run: cargo test --verbose --all-features diff --git a/src/lexical/token.rs b/src/lexical/token.rs index 54bcb84..d2d0069 100644 --- a/src/lexical/token.rs +++ b/src/lexical/token.rs @@ -1,6 +1,12 @@ //! Contains the [`Token`] struct and its related types. -use std::{borrow::Cow, collections::HashMap, fmt::Display, str::FromStr, sync::OnceLock}; +use std::{ + borrow::Cow, + collections::HashMap, + fmt::{Debug, Display}, + str::FromStr, + sync::OnceLock, +}; use crate::base::{ self, @@ -44,6 +50,8 @@ pub enum KeywordKind { Tag, Of, Replace, + Int, + Bool, } impl Display for KeywordKind { @@ -107,6 +115,8 @@ impl KeywordKind { Self::Tag => "tag", Self::Of => "of", Self::Replace => "replace", + Self::Int => "int", + Self::Bool => "bool", } } @@ -141,7 +151,8 @@ pub enum Token { Identifier(Identifier), Keyword(Keyword), Punctuation(Punctuation), - Numeric(Numeric), + Integer(Integer), + Boolean(Boolean), Comment(Comment), DocComment(DocComment), CommandLiteral(CommandLiteral), @@ -156,7 +167,8 @@ impl SourceElement for Token { Self::Identifier(token) => token.span(), Self::Keyword(token) => token.span(), Self::Punctuation(token) => token.span(), - Self::Numeric(token) => token.span(), + Self::Integer(token) => token.span(), + Self::Boolean(token) => token.span(), Self::Comment(token) => token.span(), Self::DocComment(token) => token.span(), Self::CommandLiteral(token) => token.span(), @@ -227,20 +239,72 @@ impl SourceElement for Punctuation { } } -/// Represents a hardcoded numeric literal value in the source code. +/// Represents a hardcoded numeric integer literal value in the source code. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Numeric { +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Integer { /// Is the span that makes up the token. pub span: Span, } -impl SourceElement for Numeric { +impl SourceElement for Integer { fn span(&self) -> Span { self.span.clone() } } +impl Integer { + /// Returns the integer value of the token. + #[must_use] + pub fn as_i64(&self) -> i64 { + self.span.str().parse().unwrap() + } +} + +impl Debug for Integer { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut s = f.debug_struct("Integer"); + s.field("value", &self.as_i64()); + s.field("span", &self.span); + s.finish() + } +} + +/// Represents a hardcoded boolean literal value in the source code. +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Boolean { + /// Is the span that makes up the token. + pub span: Span, +} + +impl SourceElement for Boolean { + fn span(&self) -> Span { + self.span.clone() + } +} + +impl Boolean { + /// Returns the boolean value of the token. + #[must_use] + pub fn value(&self) -> bool { + match self.span.str() { + "true" => true, + "false" => false, + _ => unreachable!("Invalid boolean literal"), + } + } +} + +impl Debug for Boolean { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut s = f.debug_struct("Boolean"); + s.field("value", &self.value()); + s.field("span", &self.span); + s.finish() + } +} + /// Represents a hardcoded string literal value in the source code. #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -590,16 +654,13 @@ impl Token { let word = span.str(); // Checks if the word is a keyword - KeywordKind::from_str(word).ok().map_or_else( - || Identifier { span: span.clone() }.into(), - |kw| { - Keyword { - span: span.clone(), - keyword: kw, - } - .into() - }, - ) + if let Ok(kw) = KeywordKind::from_str(word) { + Keyword { span, keyword: kw }.into() + } else if bool::from_str(word).is_ok() { + Boolean { span }.into() + } else { + Identifier { span }.into() + } } /// Handles a sequence starting with a slash @@ -684,11 +745,11 @@ impl Token { } /// Handles a sequence of digits - fn handle_numeric_literal(iter: &mut SourceIterator, start: usize) -> Self { + fn handle_integer_literal(iter: &mut SourceIterator, start: usize) -> Self { // Tokenizes the whole number part Self::walk_iter(iter, |character| character.is_ascii_digit()); - Numeric { + Integer { span: Self::create_span(start, iter), } .into() @@ -871,9 +932,9 @@ impl Token { else if character == '`' { Self::handle_macro_string_literal(iter, start) } - // Found numeric literal + // Found integer literal else if character.is_ascii_digit() { - Ok(Self::handle_numeric_literal(iter, start)) + Ok(Self::handle_integer_literal(iter, start)) } // Found a punctuation else if character.is_ascii_punctuation() { diff --git a/src/semantic/mod.rs b/src/semantic/mod.rs index aa1e7d0..0cef6de 100644 --- a/src/semantic/mod.rs +++ b/src/semantic/mod.rs @@ -11,7 +11,7 @@ use error::{ use crate::{ base::{self, source_file::SourceElement as _, Handler}, - lexical::token::{MacroStringLiteral, MacroStringLiteralPart}, + lexical::token::{KeywordKind, MacroStringLiteral, MacroStringLiteralPart}, syntax::syntax_tree::{ condition::{ BinaryCondition, Condition, ParenthesizedCondition, PrimaryCondition, UnaryCondition, @@ -24,7 +24,7 @@ use crate::{ Conditional, Else, ExecuteBlock, ExecuteBlockHead, ExecuteBlockHeadItem as _, ExecuteBlockTail, }, - Block, Grouping, Run, Semicolon, Statement, + Block, Grouping, Run, Semicolon, SemicolonStatement, Statement, VariableDeclaration, }, AnyStringLiteral, }, @@ -291,17 +291,20 @@ impl Semicolon { macro_names: &HashSet, handler: &impl Handler, ) -> Result<(), error::Error> { - match self.expression() { - Expression::Primary(Primary::FunctionCall(func)) => { - func.analyze_semantics(function_names, macro_names, handler) - } - Expression::Primary(unexpected) => { - let error = error::Error::UnexpectedExpression(UnexpectedExpression( - Expression::Primary(unexpected.clone()), - )); - handler.receive(error.clone()); - Err(error) - } + match self.statement() { + SemicolonStatement::Expression(expr) => match expr { + Expression::Primary(Primary::FunctionCall(func)) => { + func.analyze_semantics(function_names, macro_names, handler) + } + Expression::Primary(unexpected) => { + let error = error::Error::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(unexpected.clone()), + )); + handler.receive(error.clone()); + Err(error) + } + }, + SemicolonStatement::VariableDeclaration(decl) => decl.analyze_semantics(handler), } } } @@ -456,7 +459,7 @@ impl Primary { Self::FunctionCall(func) => { func.analyze_semantics(function_names, macro_names, handler) } - Self::Lua(_) | Self::StringLiteral(_) => Ok(()), + Self::Lua(_) | Self::StringLiteral(_) | Self::Integer(_) | Self::Boolean(_) => Ok(()), Self::MacroStringLiteral(literal) => literal.analyze_semantics(macro_names, handler), } } @@ -514,6 +517,69 @@ impl AnyStringLiteral { } } +impl VariableDeclaration { + /// Analyzes the semantics of a variable declaration. + pub fn analyze_semantics( + &self, + handler: &impl Handler, + ) -> Result<(), error::Error> { + match self.expression() { + Expression::Primary(Primary::Integer(num)) => { + if self.variable_type().keyword == KeywordKind::Bool { + let err = error::Error::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(Primary::Integer(num.clone())), + )); + handler.receive(err.clone()); + Err(err) + } else { + Ok(()) + } + } + Expression::Primary(Primary::Boolean(bool)) => { + if self.variable_type().keyword == KeywordKind::Int { + let err = error::Error::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(Primary::Boolean(bool.clone())), + )); + handler.receive(err.clone()); + Err(err) + } else { + Ok(()) + } + } + Expression::Primary(Primary::StringLiteral(str)) => { + if matches!( + self.variable_type().keyword, + KeywordKind::Int | KeywordKind::Bool + ) { + let err = error::Error::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(Primary::StringLiteral(str.clone())), + )); + handler.receive(err.clone()); + Err(err) + } else { + Ok(()) + } + } + Expression::Primary(Primary::MacroStringLiteral(str)) => { + if matches!( + self.variable_type().keyword, + KeywordKind::Int | KeywordKind::Bool + ) { + let err = error::Error::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(Primary::MacroStringLiteral(str.clone())), + )); + handler.receive(err.clone()); + Err(err) + } else { + Ok(()) + } + } + + Expression::Primary(_) => Ok(()), + } + } +} + impl PrimaryCondition { /// Analyzes the semantics of a primary condition. pub fn analyze_semantics( diff --git a/src/syntax/error.rs b/src/syntax/error.rs index a69883a..00991b3 100644 --- a/src/syntax/error.rs +++ b/src/syntax/error.rs @@ -32,7 +32,8 @@ pub enum SyntaxKind { Keyword(KeywordKind), Identifier, Declaration, - Numeric, + Integer, + Boolean, StringLiteral, MacroStringLiteral, AnyStringLiteral, @@ -69,7 +70,8 @@ impl SyntaxKind { Self::Punctuation(char) => format!("a punctuation token `{char}`"), Self::Keyword(keyword) => format!("a keyword token `{}`", keyword.as_str()), Self::Declaration => "a declaration token".to_string(), - Self::Numeric => "a numeric token".to_string(), + Self::Integer => "an integer token".to_string(), + Self::Boolean => "a boolean token".to_string(), Self::StringLiteral => "a string literal".to_string(), Self::MacroStringLiteral => "a macro string literal".to_string(), Self::AnyStringLiteral => "a (macro) string literal".to_string(), @@ -106,7 +108,8 @@ impl Display for UnexpectedSyntax { Some(Token::Punctuation(punctuation)) => { format!("a punctuation token `{}`", punctuation.punctuation) } - Some(Token::Numeric(..)) => "a numeric token".to_string(), + Some(Token::Integer(..)) => "an integer token".to_string(), + Some(Token::Boolean(..)) => "a boolean token".to_string(), Some(Token::CommandLiteral(..)) => "a literal command token".to_string(), Some(Token::StringLiteral(..)) => "a string literal token".to_string(), Some(Token::MacroStringLiteral(..)) => "a macro string literal token".to_string(), diff --git a/src/syntax/parser.rs b/src/syntax/parser.rs index bf2460d..93f2a40 100644 --- a/src/syntax/parser.rs +++ b/src/syntax/parser.rs @@ -7,7 +7,7 @@ use crate::{ base::{self, Handler}, lexical::{ token::{ - Identifier, Keyword, KeywordKind, MacroStringLiteral, Numeric, Punctuation, + Identifier, Integer, Keyword, KeywordKind, MacroStringLiteral, Punctuation, StringLiteral, Token, }, token_stream::{Delimited, Delimiter, TokenStream, TokenTree}, @@ -399,16 +399,16 @@ impl<'a> Frame<'a> { } } - /// Expects the next [`Token`] to be an [`Numeric`], and returns it. + /// Expects the next [`Token`] to be an [`Integer`], and returns it. /// /// # Errors /// If the next [`Token`] is not an [`Identifier`]. - pub fn parse_numeric(&mut self, handler: &impl Handler) -> ParseResult { + pub fn parse_integer(&mut self, handler: &impl Handler) -> ParseResult { match self.next_significant_token() { - Reading::Atomic(Token::Numeric(ident)) => Ok(ident), + Reading::Atomic(Token::Integer(ident)) => Ok(ident), found => { let err = Error::UnexpectedSyntax(UnexpectedSyntax { - expected: SyntaxKind::Numeric, + expected: SyntaxKind::Integer, found: found.into_token(), }); handler.receive(err.clone()); diff --git a/src/syntax/syntax_tree/expression.rs b/src/syntax/syntax_tree/expression.rs index cd1b52c..9e24553 100644 --- a/src/syntax/syntax_tree/expression.rs +++ b/src/syntax/syntax_tree/expression.rs @@ -11,7 +11,8 @@ use crate::{ }, lexical::{ token::{ - Identifier, Keyword, KeywordKind, MacroStringLiteral, Punctuation, StringLiteral, Token, + Boolean, Identifier, Integer, Keyword, KeywordKind, MacroStringLiteral, Punctuation, + StringLiteral, Token, }, token_stream::Delimiter, }, @@ -53,8 +54,10 @@ impl SourceElement for Expression { /// /// ``` ebnf /// Primary: -/// FunctionCall +/// Integer +/// | Boolean /// | StringLiteral +/// | FunctionCall /// | MacroStringLiteral /// | LuaCode /// ``` @@ -62,8 +65,10 @@ impl SourceElement for Expression { #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, EnumAsInner)] pub enum Primary { - FunctionCall(FunctionCall), + Integer(Integer), + Boolean(Boolean), StringLiteral(StringLiteral), + FunctionCall(FunctionCall), MacroStringLiteral(MacroStringLiteral), Lua(Box), } @@ -71,8 +76,10 @@ pub enum Primary { impl SourceElement for Primary { fn span(&self) -> Span { match self { - Self::FunctionCall(function_call) => function_call.span(), + Self::Integer(int) => int.span(), + Self::Boolean(bool) => bool.span(), Self::StringLiteral(string_literal) => string_literal.span(), + Self::FunctionCall(function_call) => function_call.span(), Self::MacroStringLiteral(macro_string_literal) => macro_string_literal.span(), Self::Lua(lua_code) => lua_code.span(), } @@ -224,6 +231,22 @@ impl<'a> Parser<'a> { } } + // integer expression + Reading::Atomic(Token::Integer(int)) => { + // eat the int + self.forward(); + + Ok(Primary::Integer(int)) + } + + // boolean expression + Reading::Atomic(Token::Boolean(bool)) => { + // eat the bool + self.forward(); + + Ok(Primary::Boolean(bool)) + } + // string literal expression Reading::Atomic(Token::StringLiteral(literal)) => { // eat the string literal diff --git a/src/syntax/syntax_tree/statement.rs b/src/syntax/syntax_tree/statement.rs index 63398f0..f9ac978 100644 --- a/src/syntax/syntax_tree/statement.rs +++ b/src/syntax/syntax_tree/statement.rs @@ -12,11 +12,11 @@ use crate::{ Handler, }, lexical::{ - token::{CommandLiteral, DocComment, Keyword, KeywordKind, Punctuation, Token}, + token::{CommandLiteral, DocComment, Identifier, Keyword, KeywordKind, Punctuation, Token}, token_stream::Delimiter, }, syntax::{ - error::ParseResult, + error::{Error, ParseResult, SyntaxKind, UnexpectedSyntax}, parser::{Parser, Reading}, }, }; @@ -189,7 +189,7 @@ impl SourceElement for Grouping { /// Syntax Synopsis: /// ``` ebnf /// Semicolon: -/// Expression ';' +/// SemicolonStatement ';' /// ; /// ``` #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -197,7 +197,7 @@ impl SourceElement for Grouping { pub struct Semicolon { /// The expression of the semicolon statement. #[get = "pub"] - expression: Expression, + statement: SemicolonStatement, /// The semicolon of the semicolon statement. #[get = "pub"] semicolon: Punctuation, @@ -205,7 +205,7 @@ pub struct Semicolon { impl SourceElement for Semicolon { fn span(&self) -> Span { - self.expression + self.statement .span() .join(&self.semicolon.span()) .expect("The span of the semicolon statement is invalid.") @@ -215,8 +215,69 @@ impl SourceElement for Semicolon { impl Semicolon { /// Dissolves the [`Semicolon`] into its components. #[must_use] - pub fn dissolve(self) -> (Expression, Punctuation) { - (self.expression, self.semicolon) + pub fn dissolve(self) -> (SemicolonStatement, Punctuation) { + (self.statement, self.semicolon) + } +} + +/// Represents a statement that ends with a semicolon in the syntax tree. +/// +/// Syntax Synopsis: +/// ``` ebnf +/// SemicolonStatement: +/// (Expression | VariableDeclaration) +/// ';' +/// ; +/// ``` +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SemicolonStatement { + /// An expression that ends with a semicolon. + Expression(Expression), + /// A variable declaration. + VariableDeclaration(VariableDeclaration), +} + +impl SourceElement for SemicolonStatement { + fn span(&self) -> Span { + match self { + Self::Expression(expression) => expression.span(), + Self::VariableDeclaration(declaration) => declaration.span(), + } + } +} + +/// Represents a variable declaration in the syntax tree. +/// +/// Syntax Synopsis: +/// +/// ```ebnf +/// LuaCode: +/// ('int' | 'bool') identifier '=' Expression ';' +/// ``` +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Getters)] +pub struct VariableDeclaration { + /// The type of the variable. + #[get = "pub"] + variable_type: Keyword, + /// The identifier of the variable. + #[get = "pub"] + identifier: Identifier, + /// The equals sign of the variable declaration. + #[get = "pub"] + equals: Punctuation, + /// The expression of the variable declaration. + #[get = "pub"] + expression: Expression, +} + +impl SourceElement for VariableDeclaration { + fn span(&self) -> Span { + self.variable_type + .span() + .join(&self.expression.span()) + .expect("The span of the variable declaration is invalid.") } } @@ -333,17 +394,77 @@ impl<'a> Parser<'a> { } // semicolon statement - _ => { - let expression = self.parse_expression(handler)?; - let semicolon = self.parse_punctuation(';', true, handler)?; - - tracing::trace!("Parsed semicolon statement: {:?}", expression); - - Ok(Statement::Semicolon(Semicolon { - expression, - semicolon, - })) - } + _ => self.parse_semicolon(handler).map(Statement::Semicolon), } } + + /// Parses a [`Semicolon`]. + #[tracing::instrument(level = "trace", skip_all)] + pub fn parse_semicolon( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + let statement = match self.stop_at_significant() { + Reading::Atomic(Token::Keyword(keyword)) + if matches!(keyword.keyword, KeywordKind::Int | KeywordKind::Bool) => + { + self.parse_variable_declaration(handler) + .map(SemicolonStatement::VariableDeclaration) + } + _ => self + .parse_expression(handler) + .map(SemicolonStatement::Expression), + }?; + + let semicolon = self.parse_punctuation(';', true, handler)?; + + Ok(Semicolon { + statement, + semicolon, + }) + } + + /// Parses a [`VariableDeclaration`]. + #[tracing::instrument(level = "trace", skip_all)] + pub fn parse_variable_declaration( + &mut self, + handler: &impl Handler, + ) -> ParseResult { + let variable_type = match self.stop_at_significant() { + Reading::Atomic(Token::Keyword(keyword)) + if matches!(keyword.keyword, KeywordKind::Int | KeywordKind::Bool) => + { + self.forward(); + keyword + } + unexpected => { + let err = Error::UnexpectedSyntax(UnexpectedSyntax { + expected: SyntaxKind::Either(&[ + SyntaxKind::Keyword(KeywordKind::Int), + SyntaxKind::Keyword(KeywordKind::Bool), + ]), + found: unexpected.into_token(), + }); + handler.receive(err.clone()); + return Err(err); + } + }; + + // read identifier + self.stop_at_significant(); + let identifier = self.parse_identifier(handler)?; + + // read equals sign + let equals = self.parse_punctuation('=', true, handler)?; + + // read expression + let expression = self.parse_expression(handler)?; + + Ok(VariableDeclaration { + variable_type, + identifier, + equals, + expression, + }) + } } diff --git a/src/transpile/transpiler.rs b/src/transpile/transpiler.rs index 9830c6b..b10c710 100644 --- a/src/transpile/transpiler.rs +++ b/src/transpile/transpiler.rs @@ -392,6 +392,8 @@ impl Transpiler { Expression::Primary(Primary::Lua(lua)) => { lua.eval_string(handler).map(Option::unwrap_or_default) } + Expression::Primary(Primary::Integer(num)) => Ok(num.span.str().to_string()), + Expression::Primary(Primary::Boolean(bool)) => Ok(bool.span.str().to_string()), Expression::Primary(Primary::StringLiteral(string)) => { Ok(string.str_content().to_string()) } @@ -459,6 +461,20 @@ impl Transpiler { Expression::Primary(Primary::FunctionCall(func)) => { self.transpile_function_call(func, handler).map(Some) } + Expression::Primary(Primary::Integer(num)) => { + let error = TranspileError::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(Primary::Integer(num.clone())), + )); + handler.receive(error.clone()); + Err(error) + } + Expression::Primary(Primary::Boolean(bool)) => { + let error = TranspileError::UnexpectedExpression(UnexpectedExpression( + Expression::Primary(Primary::Boolean(bool.clone())), + )); + handler.receive(error.clone()); + Err(error) + } Expression::Primary(Primary::StringLiteral(string)) => { Ok(Some(Command::Raw(string.str_content().to_string()))) } @@ -501,8 +517,9 @@ impl Transpiler { Ok(Some(Command::Group(commands))) } } - #[allow(clippy::match_wildcard_for_single_variants)] - Statement::Semicolon(semi) => match semi.expression() { + Statement::Semicolon(semi) => match semi.statement() { + #[expect(clippy::match_wildcard_for_single_variants)] + SemicolonStatement::Expression(expr) => match expr { Expression::Primary(Primary::FunctionCall(func)) => { self.transpile_function_call(func, handler).map(Some) } @@ -512,6 +529,10 @@ impl Transpiler { )); handler.receive(error.clone()); Err(error) + } + }, + SemicolonStatement::VariableDeclaration(_) => { + todo!("Variable declarations are not yet supported.") } }, }