Add tokenizing module
This commit is contained in:
commit
830b3b10d9
|
@ -0,0 +1,2 @@
|
||||||
|
/target
|
||||||
|
/Cargo.lock
|
|
@ -0,0 +1,14 @@
|
||||||
|
[package]
|
||||||
|
name = "shulkerscript-lang"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
colored = "2.1.0"
|
||||||
|
derive_more = { version = "0.99.17", default-features = false, features = ["deref", "from", "deref_mut"] }
|
||||||
|
getset = "0.1.2"
|
||||||
|
strum = { version = "0.26.2", features = ["derive"] }
|
||||||
|
strum_macros = "0.26.2"
|
||||||
|
thiserror = "1.0.58"
|
|
@ -0,0 +1,5 @@
|
||||||
|
/// Represents a trait responsible for handling diagnostics in the interpreter.
|
||||||
|
pub trait Handler<T> {
|
||||||
|
/// Receive an error and handles it.
|
||||||
|
fn receive(&self, error: T);
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
use std::io;
|
||||||
|
|
||||||
|
/// An error that occurred during compilation.
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("An error occurred while reading the file.")]
|
||||||
|
IoError(#[from] io::Error),
|
||||||
|
#[error("An error occured while tokenizing the source code.")]
|
||||||
|
TokenizeError(#[from] crate::lexical::token::TokenizeError),
|
||||||
|
#[error("An error occurred")]
|
||||||
|
Other(&'static str),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A specialized [`Result`] type for this crate.
|
||||||
|
pub type Result<T> = std::result::Result<T, Error>;
|
|
@ -0,0 +1,75 @@
|
||||||
|
//! Module containing structures and implementations for logging messages to the user.
|
||||||
|
|
||||||
|
use colored::Colorize;
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use super::source_file::Span;
|
||||||
|
|
||||||
|
/// Represent the severity of a log message to be printed to the console.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum Severity {
|
||||||
|
Error,
|
||||||
|
Info,
|
||||||
|
Warning,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Struct implementing [`Display`] that represents a log message to be displayed to the user.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Message<T> {
|
||||||
|
/// The severity of the log message.
|
||||||
|
pub severity: Severity,
|
||||||
|
|
||||||
|
/// The message to be displayed.
|
||||||
|
pub display: T,
|
||||||
|
}
|
||||||
|
impl<T> Message<T> {
|
||||||
|
/// Create a new log message with the given severity and message to be displayed.
|
||||||
|
pub fn new(severity: Severity, display: T) -> Self {
|
||||||
|
Self { severity, display }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Display> Display for Message<T> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
let log_header = (match self.severity {
|
||||||
|
Severity::Error => "[error]:".red(),
|
||||||
|
Severity::Info => "[info]:".green(),
|
||||||
|
Severity::Warning => "[warning]:".yellow(),
|
||||||
|
})
|
||||||
|
.bold();
|
||||||
|
|
||||||
|
let message_part = &self.display.to_string().bold();
|
||||||
|
|
||||||
|
write!(f, "{log_header} {message_part}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Structure implementing [`Display`] that prints the particular span of the source code.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct SourceCodeDisplay<'a, T> {
|
||||||
|
/// The span of the source code to be printed.
|
||||||
|
pub span: &'a Span,
|
||||||
|
|
||||||
|
/// The help message to be displayed.
|
||||||
|
pub help_display: Option<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> SourceCodeDisplay<'a, T> {
|
||||||
|
/// Create a new source code display with the given span and help message to be displayed.
|
||||||
|
pub fn new(span: &'a Span, help_display: Option<T>) -> Self {
|
||||||
|
Self { span, help_display }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: std::fmt::Display> Display for SourceCodeDisplay<'a, T> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.span.str())?;
|
||||||
|
|
||||||
|
if let Some(help_display) = &self.help_display {
|
||||||
|
write!(f, "\n\n{help_display}")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,12 @@
|
||||||
|
//! The base module contains the core functionality of the `ShulkerScript` language.
|
||||||
|
|
||||||
|
pub mod source_file;
|
||||||
|
|
||||||
|
mod error;
|
||||||
|
#[doc(inline)]
|
||||||
|
pub use error::{Error, Result};
|
||||||
|
|
||||||
|
mod diagnostic;
|
||||||
|
pub use diagnostic::Handler;
|
||||||
|
|
||||||
|
pub mod log;
|
|
@ -0,0 +1,359 @@
|
||||||
|
//! Module for handling source files and their elements.
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
cmp::Ordering,
|
||||||
|
fmt::Debug,
|
||||||
|
fs,
|
||||||
|
iter::{Iterator, Peekable},
|
||||||
|
ops::Range,
|
||||||
|
path::PathBuf,
|
||||||
|
str::CharIndices,
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
|
use getset::{CopyGetters, Getters};
|
||||||
|
|
||||||
|
use super::Error;
|
||||||
|
|
||||||
|
/// Represents a source file that contains the source code.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct SourceFile {
|
||||||
|
path: PathBuf,
|
||||||
|
content: String,
|
||||||
|
lines: Vec<Range<usize>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::missing_fields_in_debug)]
|
||||||
|
impl Debug for SourceFile {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("SourceFile")
|
||||||
|
.field("path", &self.path)
|
||||||
|
.field("lines", &self.lines)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceFile {
|
||||||
|
fn new(path: PathBuf, content: String) -> Arc<Self> {
|
||||||
|
let lines = get_line_byte_positions(&content);
|
||||||
|
|
||||||
|
Arc::new(Self {
|
||||||
|
path,
|
||||||
|
content,
|
||||||
|
lines,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the content of the source file
|
||||||
|
#[must_use]
|
||||||
|
pub fn content(&self) -> &str {
|
||||||
|
&self.content
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the line of the source file at the given line number.
|
||||||
|
///
|
||||||
|
/// Numbering starts at 1.
|
||||||
|
#[must_use]
|
||||||
|
pub fn get_line(&self, line: usize) -> Option<&str> {
|
||||||
|
if line == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let line = line - 1;
|
||||||
|
self.lines
|
||||||
|
.get(line)
|
||||||
|
.map(|range| &self.content()[range.clone()])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the [`SourceIterator`] for the source file.
|
||||||
|
#[must_use]
|
||||||
|
pub fn iter<'a>(self: &'a Arc<Self>) -> SourceIterator<'a> {
|
||||||
|
SourceIterator {
|
||||||
|
source_file: self,
|
||||||
|
iterator: self.content().char_indices().peekable(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the number of lines in the source file.
|
||||||
|
#[must_use]
|
||||||
|
pub fn line_amount(&self) -> usize {
|
||||||
|
self.lines.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Load the source file from the given file path.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// - [`Error::IoError`]: Error occurred when reading the file contents.
|
||||||
|
pub fn load(path: PathBuf) -> Result<Arc<Self>, Error> {
|
||||||
|
let source = fs::read_to_string(&path).map_err(Error::IoError)?;
|
||||||
|
Ok(Self::new(path, source))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the [`Location`] of a given byte index
|
||||||
|
#[must_use]
|
||||||
|
pub fn get_location(&self, byte_index: usize) -> Option<Location> {
|
||||||
|
if self.content.is_char_boundary(byte_index) {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
// get the line number by binary searching the line ranges
|
||||||
|
let line = self
|
||||||
|
.lines
|
||||||
|
.binary_search_by(|range| {
|
||||||
|
if range.contains(&byte_index) {
|
||||||
|
Ordering::Equal
|
||||||
|
} else if byte_index < range.start {
|
||||||
|
Ordering::Greater
|
||||||
|
} else {
|
||||||
|
Ordering::Less
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.ok()?;
|
||||||
|
|
||||||
|
let line_starting_byte_index = self.lines[line].start;
|
||||||
|
let line_str = self.get_line(line + 1).unwrap();
|
||||||
|
|
||||||
|
// get the column number by iterating through the utf-8 characters (starts at 1)
|
||||||
|
let column = line_str
|
||||||
|
.char_indices()
|
||||||
|
.take_while(|(i, _)| *i + line_starting_byte_index < byte_index)
|
||||||
|
.count()
|
||||||
|
+ 1;
|
||||||
|
|
||||||
|
Some(Location {
|
||||||
|
line: line + 1,
|
||||||
|
column,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a range of characters in a source file.
|
||||||
|
#[derive(Clone, Getters, CopyGetters)]
|
||||||
|
pub struct Span {
|
||||||
|
/// Get the start byte index of the span.
|
||||||
|
#[get_copy = "pub"]
|
||||||
|
start: usize,
|
||||||
|
|
||||||
|
/// Get the end byte index of the span (exclusive).
|
||||||
|
#[get_copy = "pub"]
|
||||||
|
end: usize,
|
||||||
|
|
||||||
|
/// Get the source file that the span is located in.
|
||||||
|
#[get = "pub"]
|
||||||
|
source_file: Arc<SourceFile>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::missing_fields_in_debug)]
|
||||||
|
impl Debug for Span {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Span")
|
||||||
|
.field("start", &self.start)
|
||||||
|
.field("end", &self.end)
|
||||||
|
.field("content", &self.str())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for Span {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
Arc::ptr_eq(&self.source_file, &other.source_file)
|
||||||
|
&& self.start == other.start
|
||||||
|
&& self.end == other.end
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Eq for Span {}
|
||||||
|
|
||||||
|
#[allow(clippy::non_canonical_partial_ord_impl)]
|
||||||
|
impl PartialOrd for Span {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
let self_ptr_value = Arc::as_ptr(&self.source_file) as usize;
|
||||||
|
let other_ptr_value = Arc::as_ptr(&other.source_file) as usize;
|
||||||
|
|
||||||
|
Some(self_ptr_value.cmp(&other_ptr_value).then_with(|| {
|
||||||
|
self.start
|
||||||
|
.cmp(&other.start)
|
||||||
|
.then_with(|| self.end.cmp(&other.end))
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for Span {
|
||||||
|
fn cmp(&self, other: &Self) -> Ordering {
|
||||||
|
let self_ptr_value = Arc::as_ptr(&self.source_file) as usize;
|
||||||
|
let other_ptr_value = Arc::as_ptr(&other.source_file) as usize;
|
||||||
|
|
||||||
|
self_ptr_value
|
||||||
|
.cmp(&other_ptr_value)
|
||||||
|
.then_with(|| self.start.cmp(&other.start))
|
||||||
|
.then_with(|| self.end.cmp(&other.end))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::hash::Hash for Span {
|
||||||
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||||
|
self.start.hash(state);
|
||||||
|
self.end.hash(state);
|
||||||
|
Arc::as_ptr(&self.source_file).hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Span {
|
||||||
|
/// Create a span from the given start and end byte indices in the source file.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `start`: The start byte index of the span.
|
||||||
|
/// - `end`: The end byte index of the span (exclusive).
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(source_file: Arc<SourceFile>, start: usize, end: usize) -> Option<Self> {
|
||||||
|
if start > end
|
||||||
|
|| !source_file.content().is_char_boundary(start)
|
||||||
|
|| source_file.content().len() < end
|
||||||
|
|| (source_file.content().len() + 1 != end
|
||||||
|
&& !source_file.content().is_char_boundary(end))
|
||||||
|
{
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
start,
|
||||||
|
end,
|
||||||
|
source_file,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a span from the given start byte index to the end of the source file.
|
||||||
|
#[must_use]
|
||||||
|
pub fn to_end(source_file: Arc<SourceFile>, start: usize) -> Option<Self> {
|
||||||
|
if !source_file.content().is_char_boundary(start) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some(Self {
|
||||||
|
start,
|
||||||
|
end: source_file.content().len(),
|
||||||
|
source_file,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the string slice of the source code that the span represents.
|
||||||
|
#[must_use]
|
||||||
|
pub fn str(&self) -> &str {
|
||||||
|
&self.source_file.content()[self.start..self.end]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the starting [`Location`] of the span.
|
||||||
|
#[must_use]
|
||||||
|
pub fn start_location(&self) -> Location {
|
||||||
|
self.source_file.get_location(self.start).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the ending [`Location`] of the span.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if the end of the span is the end of the source file.
|
||||||
|
#[must_use]
|
||||||
|
pub fn end_location(&self) -> Option<Location> {
|
||||||
|
self.source_file.get_location(self.end)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Join the starting position of this span with the end position of the given span.
|
||||||
|
#[must_use]
|
||||||
|
pub fn join(&self, end: &Self) -> Option<Self> {
|
||||||
|
if !Arc::ptr_eq(&self.source_file, &end.source_file) || self.start > end.end {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(Self {
|
||||||
|
start: self.start,
|
||||||
|
end: end.end,
|
||||||
|
source_file: self.source_file.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pointing to a particular location in a source file.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
|
||||||
|
pub struct Location {
|
||||||
|
/// Line number of the location (starts at 1).
|
||||||
|
pub line: usize,
|
||||||
|
|
||||||
|
/// Column number of the location (starts at 1).
|
||||||
|
pub column: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents an element that is located within a source file.
|
||||||
|
pub trait SourceElement {
|
||||||
|
/// Get the span location of the element.
|
||||||
|
fn span(&self) -> Span;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: SourceElement> SourceElement for Box<T> {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.as_ref().span()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterator iterating over the characters in a source file that can be peeked at.
|
||||||
|
#[derive(Debug, Clone, CopyGetters)]
|
||||||
|
pub struct SourceIterator<'a> {
|
||||||
|
/// Get the source file that the iterator is iterating over.
|
||||||
|
#[get_copy = "pub"]
|
||||||
|
source_file: &'a Arc<SourceFile>,
|
||||||
|
iterator: Peekable<CharIndices<'a>>,
|
||||||
|
}
|
||||||
|
impl<'a> SourceIterator<'a> {
|
||||||
|
/// Peek at the next character in the source file.
|
||||||
|
pub fn peek(&mut self) -> Option<(usize, char)> {
|
||||||
|
self.iterator.peek().copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> Iterator for SourceIterator<'a> {
|
||||||
|
type Item = (usize, char);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.iterator.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the byte positions of the lines in the given text.
|
||||||
|
fn get_line_byte_positions(text: &str) -> Vec<Range<usize>> {
|
||||||
|
let mut current_position = 0;
|
||||||
|
let mut results = Vec::new();
|
||||||
|
|
||||||
|
let mut skip = false;
|
||||||
|
|
||||||
|
for (byte, char) in text.char_indices() {
|
||||||
|
if skip {
|
||||||
|
skip = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// lf
|
||||||
|
if char == '\n' {
|
||||||
|
#[allow(clippy::range_plus_one)]
|
||||||
|
results.push(current_position..byte + 1);
|
||||||
|
|
||||||
|
current_position = byte + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// crlf
|
||||||
|
if char == '\r' {
|
||||||
|
if text.as_bytes().get(byte + 1) == Some(&b'\n') {
|
||||||
|
results.push(current_position..byte + 2);
|
||||||
|
|
||||||
|
current_position = byte + 2;
|
||||||
|
|
||||||
|
skip = true;
|
||||||
|
} else {
|
||||||
|
#[allow(clippy::range_plus_one)]
|
||||||
|
results.push(current_position..byte + 1);
|
||||||
|
|
||||||
|
current_position = byte + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// add the last line
|
||||||
|
results.push(current_position..text.len());
|
||||||
|
|
||||||
|
results
|
||||||
|
}
|
|
@ -0,0 +1,62 @@
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use getset::Getters;
|
||||||
|
|
||||||
|
use crate::base::{
|
||||||
|
log::{Message, Severity, SourceCodeDisplay},
|
||||||
|
source_file::Span,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::token_stream::Delimiter;
|
||||||
|
|
||||||
|
/// Represents an error that occurred during the lexical analysis of the source code.
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, thiserror::Error)]
|
||||||
|
pub enum Error {
|
||||||
|
#[error("Comment is not terminated.")]
|
||||||
|
UnterminatedDelimitedComment(#[from] UnterminatedDelimitedComment),
|
||||||
|
#[error("Delimiter is not terminated.")]
|
||||||
|
UndelimitedDelimiter(#[from] UndelimitedDelimiter),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Source code contains an unclosed `/*` comment.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Getters, thiserror::Error)]
|
||||||
|
pub struct UnterminatedDelimitedComment {
|
||||||
|
/// Span of the unclosed `/*` that starts the comment.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for UnterminatedDelimitedComment {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}\n{}",
|
||||||
|
Message::new(Severity::Error, "found an unclosed `/*` comment"),
|
||||||
|
SourceCodeDisplay::new(&self.span, Option::<i32>::None)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Delimiter is not closed by its corresponding closing pair.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Getters, thiserror::Error)]
|
||||||
|
pub struct UndelimitedDelimiter {
|
||||||
|
/// Span of the opening delimiter.
|
||||||
|
pub opening_span: Span,
|
||||||
|
|
||||||
|
/// Kind of the delimiter.
|
||||||
|
pub delimiter: Delimiter,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for UndelimitedDelimiter {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"{}\n{}",
|
||||||
|
Message::new(Severity::Error, "found an undelimited delimiter"),
|
||||||
|
SourceCodeDisplay::new(
|
||||||
|
&self.opening_span,
|
||||||
|
Some("this delimiter is not closed by its corresponding closing pair")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,8 @@
|
||||||
|
//! The lexical module is responsible for converting raw text into a stream of tokens that the parser can understand.
|
||||||
|
|
||||||
|
pub mod token_stream;
|
||||||
|
|
||||||
|
pub mod token;
|
||||||
|
|
||||||
|
mod error;
|
||||||
|
pub use error::Error;
|
|
@ -0,0 +1,411 @@
|
||||||
|
//! Contains the [`Token`] struct and its related types.
|
||||||
|
|
||||||
|
use std::{collections::HashMap, str::FromStr, sync::OnceLock};
|
||||||
|
|
||||||
|
use crate::base::{
|
||||||
|
source_file::{SourceElement, SourceIterator, Span},
|
||||||
|
Handler,
|
||||||
|
};
|
||||||
|
use derive_more::From;
|
||||||
|
use strum::IntoEnumIterator;
|
||||||
|
use strum_macros::EnumIter;
|
||||||
|
|
||||||
|
use super::{error::UnterminatedDelimitedComment, Error};
|
||||||
|
|
||||||
|
/// Is an enumeration representing keywords in shulkerscript.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, EnumIter)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum KeywordKind {
|
||||||
|
Function,
|
||||||
|
If,
|
||||||
|
Else,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for KeywordKind {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
self.as_str().to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an error that is returned when a string cannot be parsed into a [`Keyword`] in [`FromStr`]
|
||||||
|
/// trait implementation.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, thiserror::Error)]
|
||||||
|
#[error("invalid string representation of keyword.")]
|
||||||
|
pub struct KeywordParseError;
|
||||||
|
|
||||||
|
impl FromStr for KeywordKind {
|
||||||
|
type Err = KeywordParseError;
|
||||||
|
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
static STRING_KEYWORD_MAP: OnceLock<HashMap<&'static str, KeywordKind>> = OnceLock::new();
|
||||||
|
let map = STRING_KEYWORD_MAP.get_or_init(|| {
|
||||||
|
let mut map = HashMap::new();
|
||||||
|
|
||||||
|
for keyword in Self::iter() {
|
||||||
|
map.insert(keyword.as_str(), keyword);
|
||||||
|
}
|
||||||
|
|
||||||
|
map
|
||||||
|
});
|
||||||
|
|
||||||
|
map.get(s).copied().ok_or(KeywordParseError)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl KeywordKind {
|
||||||
|
/// Gets the string representation of the keyword as a `&str`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn as_str(self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
Self::Function => "fn",
|
||||||
|
Self::If => "if",
|
||||||
|
Self::Else => "else",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an enumeration containing all kinds of tokens in the Flux programming language.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum Token {
|
||||||
|
WhiteSpaces(WhiteSpaces),
|
||||||
|
Identifier(Identifier),
|
||||||
|
Keyword(Keyword),
|
||||||
|
Punctuation(Punctuation),
|
||||||
|
Numeric(Numeric),
|
||||||
|
Comment(Comment),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
/// Returns the span of the token.
|
||||||
|
#[must_use]
|
||||||
|
pub fn span(&self) -> &Span {
|
||||||
|
match self {
|
||||||
|
Self::WhiteSpaces(token) => &token.span,
|
||||||
|
Self::Identifier(token) => &token.span,
|
||||||
|
Self::Keyword(token) => &token.span,
|
||||||
|
Self::Punctuation(token) => &token.span,
|
||||||
|
Self::Numeric(token) => &token.span,
|
||||||
|
Self::Comment(token) => &token.span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Token {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
match self {
|
||||||
|
Self::WhiteSpaces(token) => token.span(),
|
||||||
|
Self::Identifier(token) => token.span(),
|
||||||
|
Self::Keyword(token) => token.span(),
|
||||||
|
Self::Punctuation(token) => token.span(),
|
||||||
|
Self::Numeric(token) => token.span(),
|
||||||
|
Self::Comment(token) => token.span(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a contiguous sequence of whitespace characters.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct WhiteSpaces {
|
||||||
|
/// Is the span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for WhiteSpaces {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Represents a contiguous sequence of characters that are valid in an identifier.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Identifier {
|
||||||
|
/// Is the span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Identifier {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a contiguous sequence of characters that are reserved for a keyword.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Keyword {
|
||||||
|
/// Is the span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
|
||||||
|
/// Is the [`KeywordKind`] that the token represents.
|
||||||
|
pub keyword: KeywordKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Keyword {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a single ASCII punctuation character.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Punctuation {
|
||||||
|
/// Is the span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
|
||||||
|
/// Is the ASCII punctuation character that the token represents.
|
||||||
|
pub punctuation: char,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Punctuation {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a hardcoded numeric literal value in the source code.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Numeric {
|
||||||
|
/// Is the span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Numeric {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an enumeration representing the two kinds of comments in the Flux programming language.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub enum CommentKind {
|
||||||
|
/// A comment that starts with `//` and ends at the end of the line.
|
||||||
|
Line,
|
||||||
|
|
||||||
|
/// A comment that starts with `/*` and ends with `*/`.
|
||||||
|
Delimited,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a portion of the source code that is ignored by the interpreter.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Comment {
|
||||||
|
/// Is the span that makes up the token.
|
||||||
|
pub span: Span,
|
||||||
|
|
||||||
|
/// Is the kind of comment that the token represents.
|
||||||
|
pub kind: CommentKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceElement for Comment {
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.span.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an error that can occur when invoking the [`Token::tokenize`] method.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum TokenizeError {
|
||||||
|
#[error("encountered a fatal lexical error that causes the process to stop.")]
|
||||||
|
FatalLexicalError,
|
||||||
|
|
||||||
|
#[error("the iterator argument is at the end of the source code.")]
|
||||||
|
EndOfSourceCodeIteratorArgument,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
/// Increments the iterator while the predicate returns true.
|
||||||
|
pub fn walk_iter(iter: &mut SourceIterator, predicate: impl Fn(char) -> bool) {
|
||||||
|
while let Some((_, character)) = iter.peek() {
|
||||||
|
if !predicate(character) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
iter.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a span from the given start location to the current location of the iterator.
|
||||||
|
fn create_span(start: usize, iter: &mut SourceIterator) -> Span {
|
||||||
|
iter.peek().map_or_else(
|
||||||
|
|| Span::to_end(iter.source_file().clone(), start).unwrap(),
|
||||||
|
|(index, _)| Span::new(iter.source_file().clone(), start, index).unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if the given character is a valid first character of an identifier.
|
||||||
|
fn is_first_identifier_character(character: char) -> bool {
|
||||||
|
character == '_'
|
||||||
|
|| (!character.is_control()
|
||||||
|
&& !character.is_whitespace()
|
||||||
|
&& !character.is_ascii_punctuation()
|
||||||
|
&& !character.is_ascii_digit())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Checks if the given character is a valid character of an identifier.
|
||||||
|
fn is_identifier_character(character: char) -> bool {
|
||||||
|
character == '_'
|
||||||
|
|| (!character.is_control()
|
||||||
|
&& !character.is_whitespace()
|
||||||
|
&& !character.is_ascii_punctuation())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a contiguous sequence of whitespace characters.
|
||||||
|
fn handle_whitespace(iter: &mut SourceIterator, start: usize) -> Self {
|
||||||
|
Self::walk_iter(iter, char::is_whitespace);
|
||||||
|
|
||||||
|
WhiteSpaces {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a contiguous sequence of characters that are valid in an identifier.
|
||||||
|
fn handle_identifier_and_keyword(iter: &mut SourceIterator, start: usize) -> Self {
|
||||||
|
Self::walk_iter(iter, Self::is_identifier_character);
|
||||||
|
|
||||||
|
let span = Self::create_span(start, iter);
|
||||||
|
let word = span.str();
|
||||||
|
|
||||||
|
// Checks if the word is a keyword
|
||||||
|
KeywordKind::from_str(word).ok().map_or_else(
|
||||||
|
|| Identifier { span: span.clone() }.into(),
|
||||||
|
|kw| {
|
||||||
|
Keyword {
|
||||||
|
span: span.clone(),
|
||||||
|
keyword: kw,
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
},
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a sequence starting with a slash
|
||||||
|
fn handle_comment(
|
||||||
|
iter: &mut SourceIterator,
|
||||||
|
start: usize,
|
||||||
|
character: char,
|
||||||
|
handler: &impl Handler<Error>,
|
||||||
|
) -> Result<Self, TokenizeError> {
|
||||||
|
// Single line comment
|
||||||
|
if let Some((_, '/')) = iter.peek() {
|
||||||
|
iter.next();
|
||||||
|
|
||||||
|
Self::walk_iter(iter, |character| !(character == '\n' || character == '\r'));
|
||||||
|
|
||||||
|
let is_cr = iter
|
||||||
|
.peek()
|
||||||
|
.map_or(false, |(_, character)| character == '\r');
|
||||||
|
|
||||||
|
if let (true, Some((_, '\n'))) = (is_cr, iter.next()) {
|
||||||
|
// skips the crlf
|
||||||
|
iter.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Comment {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
kind: CommentKind::Line,
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
// Delimited comment
|
||||||
|
else if let Some((_, '*')) = iter.peek() {
|
||||||
|
iter.next();
|
||||||
|
|
||||||
|
let mut is_terminated = false;
|
||||||
|
|
||||||
|
while let Some((_, character)) = iter.next() {
|
||||||
|
if character == '*' {
|
||||||
|
if let Some((_, '/')) = iter.peek() {
|
||||||
|
iter.next();
|
||||||
|
|
||||||
|
is_terminated = true;
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Checks if the comment is terminated
|
||||||
|
if is_terminated {
|
||||||
|
Ok(Comment {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
kind: CommentKind::Delimited,
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
} else {
|
||||||
|
handler.receive(
|
||||||
|
UnterminatedDelimitedComment {
|
||||||
|
span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(),
|
||||||
|
}
|
||||||
|
.into(),
|
||||||
|
);
|
||||||
|
return Err(TokenizeError::FatalLexicalError);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Just a single slash punctuation
|
||||||
|
else {
|
||||||
|
Ok(Punctuation {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
punctuation: character,
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a sequence of digits
|
||||||
|
fn handle_numeric_literal(iter: &mut SourceIterator, start: usize) -> Self {
|
||||||
|
// Tokenizes the whole number part
|
||||||
|
Self::walk_iter(iter, |character| character.is_ascii_digit());
|
||||||
|
|
||||||
|
Numeric {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
}
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lexes the source code from the given iterator.
|
||||||
|
///
|
||||||
|
/// The tokenization starts at the current location of the iterator. The function moves the
|
||||||
|
/// iterator at least once and forwards it until it makes a token. After the token is made, the
|
||||||
|
/// iterator is left at the next character that is not part of the token.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// - [`TokenizeError::EndOfSourceCodeIteratorArgument`] - The iterator argument is at the end of the
|
||||||
|
/// source code.
|
||||||
|
/// - [`TokenizeError::FatalLexicalError`] - A fatal lexical error occurred.
|
||||||
|
pub fn tokenize(
|
||||||
|
iter: &mut SourceIterator,
|
||||||
|
handler: &impl Handler<Error>,
|
||||||
|
) -> Result<Self, TokenizeError> {
|
||||||
|
// Gets the first character
|
||||||
|
let (start, character) = iter
|
||||||
|
.next()
|
||||||
|
.ok_or(TokenizeError::EndOfSourceCodeIteratorArgument)?;
|
||||||
|
|
||||||
|
// Found white spaces
|
||||||
|
if character.is_whitespace() {
|
||||||
|
Ok(Self::handle_whitespace(iter, start))
|
||||||
|
}
|
||||||
|
// Found identifier/keyword
|
||||||
|
else if Self::is_first_identifier_character(character) {
|
||||||
|
Ok(Self::handle_identifier_and_keyword(iter, start))
|
||||||
|
}
|
||||||
|
// Found comment/single slash punctuation
|
||||||
|
else if character == '/' {
|
||||||
|
Self::handle_comment(iter, start, character, handler)
|
||||||
|
}
|
||||||
|
// Found numeric literal
|
||||||
|
else if character.is_ascii_digit() {
|
||||||
|
Ok(Self::handle_numeric_literal(iter, start))
|
||||||
|
}
|
||||||
|
// Found a punctuation
|
||||||
|
else if character.is_ascii_punctuation() {
|
||||||
|
Ok(Punctuation {
|
||||||
|
span: Self::create_span(start, iter),
|
||||||
|
punctuation: character,
|
||||||
|
}
|
||||||
|
.into())
|
||||||
|
} else {
|
||||||
|
unreachable!("all cases covered before")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,195 @@
|
||||||
|
//! Contains the [`TokenStream`] struct and its related types.
|
||||||
|
|
||||||
|
use std::{fmt::Debug, sync::Arc};
|
||||||
|
|
||||||
|
use derive_more::{Deref, From};
|
||||||
|
|
||||||
|
use crate::base::{source_file::SourceFile, Handler};
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
error::{self, UndelimitedDelimiter},
|
||||||
|
token::{Punctuation, Token, TokenizeError},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Is a list of well structured [`TokenTree`]s.
|
||||||
|
///
|
||||||
|
/// This struct is the final output of the lexical analysis phase and is meant to be used by the
|
||||||
|
/// next stage of the compilation process.
|
||||||
|
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Deref)]
|
||||||
|
pub struct TokenStream {
|
||||||
|
#[deref]
|
||||||
|
token_trees: Vec<TokenTree>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for TokenStream {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_list().entries(self.token_trees.iter()).finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenStream {
|
||||||
|
/// Tokenizes the given source code.
|
||||||
|
///
|
||||||
|
/// This function tokenizes the given iterator of source code by calling the
|
||||||
|
/// [`Token::tokenize()`] repeatedly until the iterator is exhausted.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
/// - `source_file_iterator`: The iterator that iterates over the source code.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// A tuple containing the stream of successfully tokenized tokens and a list of lexical errors
|
||||||
|
/// encountered during tokenization.
|
||||||
|
#[must_use]
|
||||||
|
pub fn tokenize(source_file: &Arc<SourceFile>, handler: &impl Handler<error::Error>) -> Self {
|
||||||
|
// The list of token trees that will be returned.
|
||||||
|
let mut tokens = Vec::new();
|
||||||
|
let mut source_file_iterator = source_file.iter();
|
||||||
|
|
||||||
|
// Tokenize the source code.
|
||||||
|
loop {
|
||||||
|
match Token::tokenize(&mut source_file_iterator, handler) {
|
||||||
|
Ok(token) => tokens.push(token),
|
||||||
|
Err(TokenizeError::EndOfSourceCodeIteratorArgument) => {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(TokenizeError::FatalLexicalError) => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// reverse to use pop() instead of remove(0)
|
||||||
|
tokens.reverse();
|
||||||
|
|
||||||
|
// stucture the tokens into a token stream
|
||||||
|
let mut token_trees = Vec::new();
|
||||||
|
while let Some(token_tree) = Self::handle_token(&mut tokens, handler) {
|
||||||
|
token_trees.push(token_tree);
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { token_trees }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a token.
|
||||||
|
fn handle_token(
|
||||||
|
tokens: &mut Vec<Token>,
|
||||||
|
handler: &impl Handler<error::Error>,
|
||||||
|
) -> Option<TokenTree> {
|
||||||
|
tokens
|
||||||
|
.pop()
|
||||||
|
.and_then(|token| Self::handle_popped_token(tokens, token, handler))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a token after it has been popped.
|
||||||
|
fn handle_popped_token(
|
||||||
|
tokens: &mut Vec<Token>,
|
||||||
|
popped_token: Token,
|
||||||
|
handler: &dyn Handler<error::Error>,
|
||||||
|
) -> Option<TokenTree> {
|
||||||
|
match popped_token {
|
||||||
|
Token::Punctuation(punc) if punc.punctuation == '{' => {
|
||||||
|
Self::handle_delimited(tokens, punc, Delimiter::Brace, handler)
|
||||||
|
.map(TokenTree::Delimited)
|
||||||
|
}
|
||||||
|
Token::Punctuation(punc) if punc.punctuation == '[' => {
|
||||||
|
Self::handle_delimited(tokens, punc, Delimiter::Bracket, handler)
|
||||||
|
.map(TokenTree::Delimited)
|
||||||
|
}
|
||||||
|
Token::Punctuation(punc) if punc.punctuation == '(' => {
|
||||||
|
Self::handle_delimited(tokens, punc, Delimiter::Parenthesis, handler)
|
||||||
|
.map(TokenTree::Delimited)
|
||||||
|
}
|
||||||
|
token => Some(TokenTree::Token(token)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Handles a delimited token.
|
||||||
|
fn handle_delimited(
|
||||||
|
tokens: &mut Vec<Token>,
|
||||||
|
open: Punctuation,
|
||||||
|
delimiter: Delimiter,
|
||||||
|
handler: &dyn Handler<error::Error>,
|
||||||
|
) -> Option<Delimited> {
|
||||||
|
let mut token_trees = Vec::new();
|
||||||
|
|
||||||
|
while let Some(token) = tokens.pop() {
|
||||||
|
match (token, delimiter) {
|
||||||
|
(Token::Punctuation(p), Delimiter::Brace) if p.punctuation == '}' => {
|
||||||
|
return Some(Delimited {
|
||||||
|
open,
|
||||||
|
token_stream: Self { token_trees },
|
||||||
|
close: p,
|
||||||
|
delimiter,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
(Token::Punctuation(punc), Delimiter::Bracket) if punc.punctuation == ']' => {
|
||||||
|
return Some(Delimited {
|
||||||
|
open,
|
||||||
|
token_stream: Self { token_trees },
|
||||||
|
close: punc,
|
||||||
|
delimiter,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
(Token::Punctuation(punc), Delimiter::Parenthesis) if punc.punctuation == ')' => {
|
||||||
|
return Some(Delimited {
|
||||||
|
open,
|
||||||
|
token_stream: Self { token_trees },
|
||||||
|
close: punc,
|
||||||
|
delimiter,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
(token, _) => {
|
||||||
|
let Some(token_tree) = Self::handle_popped_token(tokens, token, handler) else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
token_trees.push(token_tree);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
handler.receive(error::Error::UndelimitedDelimiter(UndelimitedDelimiter {
|
||||||
|
opening_span: open.span,
|
||||||
|
delimiter,
|
||||||
|
}));
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Dissolves this struct into a tuple of its components.
|
||||||
|
#[must_use]
|
||||||
|
pub fn dissolve(self) -> Vec<TokenTree> {
|
||||||
|
self.token_trees
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an enumeration of either a [`Token`] or a [`Delimited`].
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum TokenTree {
|
||||||
|
Token(Token),
|
||||||
|
Delimited(Delimited),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an enumeration of the different types of delimiters in the [`Delimited`].
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum Delimiter {
|
||||||
|
Parenthesis,
|
||||||
|
Brace,
|
||||||
|
Bracket,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents a list of tokens enclosed by a pair of delimiters.
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct Delimited {
|
||||||
|
/// The opening delimiter.
|
||||||
|
pub open: Punctuation,
|
||||||
|
|
||||||
|
/// The stream of tokens inside the delimiter.
|
||||||
|
pub token_stream: TokenStream,
|
||||||
|
|
||||||
|
/// The closing delimiter.
|
||||||
|
pub close: Punctuation,
|
||||||
|
|
||||||
|
/// The type of delimiter.
|
||||||
|
pub delimiter: Delimiter,
|
||||||
|
}
|
|
@ -0,0 +1,70 @@
|
||||||
|
//! The `ShulkerScript` language.
|
||||||
|
//!
|
||||||
|
//! `ShulkerScript` is a simple, imperative scripting language for creating Minecraft data packs.
|
||||||
|
|
||||||
|
#![deny(
|
||||||
|
missing_docs,
|
||||||
|
missing_debug_implementations,
|
||||||
|
missing_copy_implementations,
|
||||||
|
clippy::all,
|
||||||
|
clippy::pedantic,
|
||||||
|
clippy::nursery,
|
||||||
|
rustdoc::broken_intra_doc_links,
|
||||||
|
clippy::missing_errors_doc
|
||||||
|
)]
|
||||||
|
#![allow(clippy::missing_panics_doc, clippy::missing_const_for_fn)]
|
||||||
|
|
||||||
|
pub mod base;
|
||||||
|
pub mod lexical;
|
||||||
|
|
||||||
|
use std::{cell::Cell, fmt::Display, path::PathBuf};
|
||||||
|
|
||||||
|
use base::{source_file::SourceFile, Handler, Result};
|
||||||
|
|
||||||
|
use crate::{base::Error, lexical::token_stream::TokenStream};
|
||||||
|
|
||||||
|
/// Compiles the given source code.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// - If an error occurs while reading the file.
|
||||||
|
pub fn compile(path: PathBuf) -> Result<()> {
|
||||||
|
let source_file = SourceFile::load(path)?;
|
||||||
|
|
||||||
|
let printer = Printer::new();
|
||||||
|
|
||||||
|
let tokens = TokenStream::tokenize(&source_file, &printer);
|
||||||
|
|
||||||
|
println!("{tokens:#?}");
|
||||||
|
|
||||||
|
if printer.has_printed() {
|
||||||
|
return Err(Error::Other(
|
||||||
|
"An error occurred while tokenizing the source code.",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Printer {
|
||||||
|
printed: Cell<bool>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Printer {
|
||||||
|
/// Creates a new [`Printer`].
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
printed: Cell::new(false),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_printed(&self) -> bool {
|
||||||
|
self.printed.get()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E: Display> Handler<E> for Printer {
|
||||||
|
fn receive(&self, error: E) {
|
||||||
|
eprintln!("{error}");
|
||||||
|
self.printed.set(true);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue