//! Provides a way to parse a token stream into an abstract syntax tree. use derive_more::{Deref, DerefMut}; use enum_as_inner::EnumAsInner; use crate::{ base::Handler, lexical::{ token::{Identifier, Keyword, KeywordKind, Numeric, Punctuation, Token}, token_stream::{Delimited, Delimiter, TokenStream, TokenTree}, }, }; use super::error::{Error, SyntaxKind, UnexpectedSyntax}; /// Represents a parser that reads a token stream and constructs an abstract syntax tree. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut)] pub struct Parser<'a> { #[deref] #[deref_mut] current_frame: Frame<'a>, stack: Vec>, } impl<'a> Parser<'a> { /// Creates a new parser from the given token stream. #[must_use] pub fn new(token_stream: &'a TokenStream) -> Self { Self { current_frame: Frame { token_provider: TokenProvider::TokenStream(token_stream), current_index: 0, }, stack: Vec::new(), } } /// Steps into the [`Delimited`] token stream and parses the content within the delimiters. /// /// The parser's position must be at the delimited token stream. pub fn step_into( &mut self, delimiter: Delimiter, f: impl FnOnce(&mut Self) -> Option, handler: &dyn Handler, ) -> Option> { self.current_frame.stop_at_significant(); let raw_token_tree = self .current_frame .token_provider .token_stream() .get(self.current_frame.current_index); // move after the whole delimited list self.current_frame.forward(); let expected = match delimiter { Delimiter::Parenthesis => '(', Delimiter::Brace => '{', Delimiter::Bracket => '[', }; let delimited_stream = if let Some(token_tree) = raw_token_tree { match token_tree { TokenTree::Delimited(delimited_tree) if delimited_tree.delimiter == delimiter => { delimited_tree } found => { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Punctuation(expected), found: Some(match found { TokenTree::Token(token) => token.clone(), TokenTree::Delimited(delimited_tree) => { Token::Punctuation(delimited_tree.open.clone()) } }), })); return None; } } } else { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Punctuation(expected), found: self.get_reading(None).into_token(), })); return None; }; // creates a new frame let new_frame = Frame { token_provider: TokenProvider::Delimited(delimited_stream), current_index: 0, }; // pushes the current frame onto the stack and replaces the current frame with the new one self.stack .push(std::mem::replace(&mut self.current_frame, new_frame)); let open = delimited_stream.open.clone(); let tree = f(self); // pops the current frame off the stack let new_frame = self.stack.pop()?; // the current frame must be at the end if !self.current_frame.is_exhausted() { let expected = match self .current_frame .token_provider .as_delimited() .unwrap() .delimiter { Delimiter::Parenthesis => ')', Delimiter::Brace => '}', Delimiter::Bracket => ']', }; handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Punctuation(expected), found: self.peek().into_token(), })); } let close_punctuation = self .current_frame .token_provider .as_delimited() .unwrap() .close .clone(); // replaces the current frame with the popped one self.current_frame = new_frame; Some(DelimitedTree { open, tree, close: close_punctuation, }) } /// Tries to parse the given function, and if it fails, resets the current index to the /// `current_index` before the function call. pub fn try_parse(&mut self, f: impl FnOnce(&mut Self) -> Option) -> Option { let current_index = self.current_frame.current_index; let result = f(self); if result.is_none() { self.current_frame.current_index = current_index; } result } } /// Represents a result of [`Parser::step_into()`] function. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct DelimitedTree { /// The opening delimiter. pub open: Punctuation, /// The tree inside the delimiter. pub tree: Option, /// The closing delimiter. pub close: Punctuation, } /// Provides a way to iterate over a token stream. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, EnumAsInner)] pub enum TokenProvider<'a> { /// Iterating at the top level of the token stream. TokenStream(&'a TokenStream), /// Iterating inside a delimited token stream. Delimited(&'a Delimited), } impl<'a> TokenProvider<'a> { /// Gets the token stream of the current token provider. #[must_use] pub fn token_stream(&self) -> &'a TokenStream { match self { TokenProvider::TokenStream(token_stream) => token_stream, TokenProvider::Delimited(delimited) => &delimited.token_stream, } } } /// Represents a single frame of the parser's stack, responsible for reading a token stream in /// that given token stream level. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Frame<'a> { token_provider: TokenProvider<'a>, current_index: usize, } impl<'a> Frame<'a> { /// Checks if the current [`Frame`] doesn't have any more significant [`TokenTree`]s to /// parse. #[must_use] pub fn is_exhausted(&self) -> bool { let token_stream = self.token_provider.token_stream(); for i in self.current_index..self.token_provider.token_stream().len() { if !matches!( token_stream.get(i), Some(TokenTree::Token( Token::WhiteSpaces(..) | Token::Comment(..) )) ) { return false; } } true } /// Checks if the current [`Frame`] has reached the end of the [`TokenStream`]. #[must_use] pub fn is_end(&self) -> bool { self.current_index >= self.token_provider.token_stream().len() } fn get_reading(&self, token: Option<&TokenTree>) -> Reading { token.map_or_else( || match self.token_provider { // end of file TokenProvider::TokenStream(..) => Reading::Eof, TokenProvider::Delimited(delimited) => { Reading::DelimitedEnd(delimited.close.clone()) } }, |token| match token { TokenTree::Token(token) => Reading::Atomic(token.clone()), TokenTree::Delimited(delimited) => Reading::IntoDelimited(delimited.open.clone()), }, ) } /// Returns a [`Token`] pointing by the `current_index` of the [`Frame`]. #[must_use] pub fn peek(&self) -> Reading { self.get_reading(self.token_provider.token_stream().get(self.current_index)) } /// Returns a [`Token`] pointing by the `current_index` with the given index offset of the /// [`Frame`]. /// /// # Returns /// /// `None` if `offset + current_index` is less than zero or greter than /// `self.token_provider.token_stream().len() + 1` #[must_use] pub fn peek_offset(&self, offset: isize) -> Option { let index = self.current_index.checked_add(offset.try_into().ok()?)?; if index > self.token_provider.token_stream().len() + 1 { return None; } Some(self.get_reading(self.token_provider.token_stream().get(index))) } /// Returns a [`Token`] pointing by the `current_index` of the [`Frame`] and increments the /// `current_index` by 1. pub fn next_token(&mut self) -> Reading { let token = self.peek(); // increment the index self.forward(); token } /// Forwards the `current_index` by 1 if the [`Frame`] is not exhausted. pub fn forward(&mut self) { // increment the index if !self.is_end() { self.current_index += 1; } } /// Skips any insignificant [`Token`]s, returns the next significant [`Token`] found, and /// increments the `current_index` afterward. pub fn next_significant_token(&mut self) -> Reading { let token = self.stop_at_significant(); // increment the index self.forward(); token } /// Makes the current [`Frame`] point to the significant [`Token`] if currently not. /// /// # Returns /// The significant [`Token`] if found, otherwise `None`. pub fn stop_at_significant(&mut self) -> Reading { while !self.is_end() { let token = self.peek(); if !matches!( token, Reading::Atomic(Token::WhiteSpaces(..) | Token::Comment(..)) ) { return token; } self.forward(); } match self.token_provider { TokenProvider::TokenStream(..) => Reading::Eof, TokenProvider::Delimited(delimited) => Reading::DelimitedEnd(delimited.close.clone()), } } /// Makes the current position stops at the first token that satisfies the predicate. pub fn stop_at(&mut self, predicate: impl Fn(&Reading) -> bool) -> Reading { while !self.is_end() { let token = self.peek(); if predicate(&token) { return token; } self.current_index += 1; } match self.token_provider { TokenProvider::TokenStream(..) => Reading::Eof, TokenProvider::Delimited(delimited) => Reading::DelimitedEnd(delimited.close.clone()), } } /// Expects the next [`Token`] to be an [`Identifier`], and returns it. /// /// # Errors /// If the next [`Token`] is not an [`Identifier`]. pub fn parse_identifier(&mut self, handler: &impl Handler) -> Option { match self.next_significant_token() { Reading::Atomic(Token::Identifier(ident)) => Some(ident), found => { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Identifier, found: found.into_token(), })); None } } } /// Expects the next [`Token`] to be an [`Numeric`], and returns it. /// /// # Errors /// If the next [`Token`] is not an [`Identifier`]. pub fn parse_numeric(&mut self, handler: &dyn Handler) -> Option { match self.next_significant_token() { Reading::Atomic(Token::Numeric(ident)) => Some(ident), found => { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Numeric, found: found.into_token(), })); None } } } /// Expects the next [`Token`] to be a [`Keyword`] of specific kind, and returns it. /// /// # Errors /// If the next [`Token`] is not a [`Keyword`] of specific kind. pub fn parse_keyword( &mut self, expected: KeywordKind, handler: &dyn Handler, ) -> Option { match self.next_significant_token() { Reading::Atomic(Token::Keyword(keyword_token)) if keyword_token.keyword == expected => { Some(keyword_token) } found => { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Keyword(expected), found: found.into_token(), })); None } } } /// Expects the next [`Token`] to be a [`Punctuation`] of specific kind, and returns it. /// /// # Errors /// If the next [`Token`] is not a [`Punctuation`] of specific kind. pub fn parse_punctuation( &mut self, expected: char, skip_insignificant: bool, handler: &dyn Handler, ) -> Option { match if skip_insignificant { self.next_significant_token() } else { self.next_token() } { Reading::Atomic(Token::Punctuation(punctuation_token)) if punctuation_token.punctuation == expected => { Some(punctuation_token) } found => { handler.receive(Error::UnexpectedSyntax(UnexpectedSyntax { expected: SyntaxKind::Punctuation(expected), found: found.into_token(), })); None } } } } /// Represents the read value of the [`Frame`]. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Reading { /// A singular token. Atomic(Token), /// Found an openning delimiter token, which means that the parser can step into a new /// delimited frame. IntoDelimited(Punctuation), /// Found a closing delimiter token, which means that the parser should step out of the current /// delimited frame. DelimitedEnd(Punctuation), /// End of file. Eof, } impl Reading { /// Gets the read token inside the [`Reading`] as `Option` /// /// # Returns /// /// Returns `None` if the [`Reading`] is [`Reading::Eof`]. #[must_use] pub fn into_token(self) -> Option { match self { Self::Atomic(token) => Some(token), Self::IntoDelimited(punc) | Self::DelimitedEnd(punc) => Some(Token::Punctuation(punc)), Self::Eof => None, } } }