commit 830b3b10d9eff513e46a2d4a81ed4b23c1bbadf5
Author: Moritz Hölting <87192362+moritz-hoelting@users.noreply.github.com>
Date:   Wed Mar 27 19:27:11 2024 +0100

    Add tokenizing module

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4fffb2f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..3b6b0f0
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "shulkerscript-lang"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+colored = "2.1.0"
+derive_more = { version = "0.99.17", default-features = false, features = ["deref", "from", "deref_mut"] }
+getset = "0.1.2"
+strum = { version = "0.26.2", features = ["derive"] }
+strum_macros = "0.26.2"
+thiserror = "1.0.58"
diff --git a/src/base/diagnostic.rs b/src/base/diagnostic.rs
new file mode 100644
index 0000000..420aa00
--- /dev/null
+++ b/src/base/diagnostic.rs
@@ -0,0 +1,5 @@
+/// Represents a trait responsible for handling diagnostics in the interpreter.
+pub trait Handler<T> {
+    /// Receive an error and handles it.
+    fn receive(&self, error: T);
+}
diff --git a/src/base/error.rs b/src/base/error.rs
new file mode 100644
index 0000000..53484e6
--- /dev/null
+++ b/src/base/error.rs
@@ -0,0 +1,16 @@
+use std::io;
+
+/// An error that occurred during compilation.
+#[allow(missing_docs)]
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("An error occurred while reading the file.")]
+    IoError(#[from] io::Error),
+    #[error("An error occured while tokenizing the source code.")]
+    TokenizeError(#[from] crate::lexical::token::TokenizeError),
+    #[error("An error occurred")]
+    Other(&'static str),
+}
+
+/// A specialized [`Result`] type for this crate.
+pub type Result<T> = std::result::Result<T, Error>;
diff --git a/src/base/log.rs b/src/base/log.rs
new file mode 100644
index 0000000..f11eb98
--- /dev/null
+++ b/src/base/log.rs
@@ -0,0 +1,75 @@
+//! Module containing structures and implementations for logging messages to the user.
+
+use colored::Colorize;
+use std::fmt::Display;
+
+use super::source_file::Span;
+
+/// Represent the severity of a log message to be printed to the console.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
+#[allow(missing_docs)]
+pub enum Severity {
+    Error,
+    Info,
+    Warning,
+}
+
+/// Struct implementing [`Display`] that represents a log message to be displayed to the user.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Message<T> {
+    /// The severity of the log message.
+    pub severity: Severity,
+
+    /// The message to be displayed.
+    pub display: T,
+}
+impl<T> Message<T> {
+    /// Create a new log message with the given severity and message to be displayed.
+    pub fn new(severity: Severity, display: T) -> Self {
+        Self { severity, display }
+    }
+}
+
+impl<T: Display> Display for Message<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let log_header = (match self.severity {
+            Severity::Error => "[error]:".red(),
+            Severity::Info => "[info]:".green(),
+            Severity::Warning => "[warning]:".yellow(),
+        })
+        .bold();
+
+        let message_part = &self.display.to_string().bold();
+
+        write!(f, "{log_header} {message_part}")
+    }
+}
+
+/// Structure implementing [`Display`] that prints the particular span of the source code.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct SourceCodeDisplay<'a, T> {
+    /// The span of the source code to be printed.
+    pub span: &'a Span,
+
+    /// The help message to be displayed.
+    pub help_display: Option<T>,
+}
+
+impl<'a, T> SourceCodeDisplay<'a, T> {
+    /// Create a new source code display with the given span and help message to be displayed.
+    pub fn new(span: &'a Span, help_display: Option<T>) -> Self {
+        Self { span, help_display }
+    }
+}
+
+impl<'a, T: std::fmt::Display> Display for SourceCodeDisplay<'a, T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.span.str())?;
+
+        if let Some(help_display) = &self.help_display {
+            write!(f, "\n\n{help_display}")?;
+        }
+
+        Ok(())
+    }
+}
diff --git a/src/base/mod.rs b/src/base/mod.rs
new file mode 100644
index 0000000..80c4b19
--- /dev/null
+++ b/src/base/mod.rs
@@ -0,0 +1,12 @@
+//! The base module contains the core functionality of the `ShulkerScript` language.
+
+pub mod source_file;
+
+mod error;
+#[doc(inline)]
+pub use error::{Error, Result};
+
+mod diagnostic;
+pub use diagnostic::Handler;
+
+pub mod log;
diff --git a/src/base/source_file.rs b/src/base/source_file.rs
new file mode 100644
index 0000000..58a31b5
--- /dev/null
+++ b/src/base/source_file.rs
@@ -0,0 +1,359 @@
+//! Module for handling source files and their elements.
+
+use std::{
+    cmp::Ordering,
+    fmt::Debug,
+    fs,
+    iter::{Iterator, Peekable},
+    ops::Range,
+    path::PathBuf,
+    str::CharIndices,
+    sync::Arc,
+};
+
+use getset::{CopyGetters, Getters};
+
+use super::Error;
+
+/// Represents a source file that contains the source code.
+#[derive(Clone)]
+pub struct SourceFile {
+    path: PathBuf,
+    content: String,
+    lines: Vec<Range<usize>>,
+}
+
+#[allow(clippy::missing_fields_in_debug)]
+impl Debug for SourceFile {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("SourceFile")
+            .field("path", &self.path)
+            .field("lines", &self.lines)
+            .finish()
+    }
+}
+
+impl SourceFile {
+    fn new(path: PathBuf, content: String) -> Arc<Self> {
+        let lines = get_line_byte_positions(&content);
+
+        Arc::new(Self {
+            path,
+            content,
+            lines,
+        })
+    }
+
+    /// Get the content of the source file
+    #[must_use]
+    pub fn content(&self) -> &str {
+        &self.content
+    }
+
+    /// Get the line of the source file at the given line number.
+    ///
+    /// Numbering starts at 1.
+    #[must_use]
+    pub fn get_line(&self, line: usize) -> Option<&str> {
+        if line == 0 {
+            return None;
+        }
+
+        let line = line - 1;
+        self.lines
+            .get(line)
+            .map(|range| &self.content()[range.clone()])
+    }
+
+    /// Get the [`SourceIterator`] for the source file.
+    #[must_use]
+    pub fn iter<'a>(self: &'a Arc<Self>) -> SourceIterator<'a> {
+        SourceIterator {
+            source_file: self,
+            iterator: self.content().char_indices().peekable(),
+        }
+    }
+
+    /// Get the number of lines in the source file.
+    #[must_use]
+    pub fn line_amount(&self) -> usize {
+        self.lines.len()
+    }
+
+    /// Load the source file from the given file path.
+    ///
+    /// # Errors
+    /// - [`Error::IoError`]: Error occurred when reading the file contents.
+    pub fn load(path: PathBuf) -> Result<Arc<Self>, Error> {
+        let source = fs::read_to_string(&path).map_err(Error::IoError)?;
+        Ok(Self::new(path, source))
+    }
+
+    /// Get the [`Location`] of a given byte index
+    #[must_use]
+    pub fn get_location(&self, byte_index: usize) -> Option<Location> {
+        if self.content.is_char_boundary(byte_index) {
+            None
+        } else {
+            // get the line number by binary searching the line ranges
+            let line = self
+                .lines
+                .binary_search_by(|range| {
+                    if range.contains(&byte_index) {
+                        Ordering::Equal
+                    } else if byte_index < range.start {
+                        Ordering::Greater
+                    } else {
+                        Ordering::Less
+                    }
+                })
+                .ok()?;
+
+            let line_starting_byte_index = self.lines[line].start;
+            let line_str = self.get_line(line + 1).unwrap();
+
+            // get the column number by iterating through the utf-8 characters (starts at 1)
+            let column = line_str
+                .char_indices()
+                .take_while(|(i, _)| *i + line_starting_byte_index < byte_index)
+                .count()
+                + 1;
+
+            Some(Location {
+                line: line + 1,
+                column,
+            })
+        }
+    }
+}
+
+/// Represents a range of characters in a source file.
+#[derive(Clone, Getters, CopyGetters)]
+pub struct Span {
+    /// Get the start byte index of the span.
+    #[get_copy = "pub"]
+    start: usize,
+
+    /// Get the end byte index of the span (exclusive).
+    #[get_copy = "pub"]
+    end: usize,
+
+    /// Get the source file that the span is located in.
+    #[get = "pub"]
+    source_file: Arc<SourceFile>,
+}
+
+#[allow(clippy::missing_fields_in_debug)]
+impl Debug for Span {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Span")
+            .field("start", &self.start)
+            .field("end", &self.end)
+            .field("content", &self.str())
+            .finish()
+    }
+}
+
+impl PartialEq for Span {
+    fn eq(&self, other: &Self) -> bool {
+        Arc::ptr_eq(&self.source_file, &other.source_file)
+            && self.start == other.start
+            && self.end == other.end
+    }
+}
+
+impl Eq for Span {}
+
+#[allow(clippy::non_canonical_partial_ord_impl)]
+impl PartialOrd for Span {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        let self_ptr_value = Arc::as_ptr(&self.source_file) as usize;
+        let other_ptr_value = Arc::as_ptr(&other.source_file) as usize;
+
+        Some(self_ptr_value.cmp(&other_ptr_value).then_with(|| {
+            self.start
+                .cmp(&other.start)
+                .then_with(|| self.end.cmp(&other.end))
+        }))
+    }
+}
+
+impl Ord for Span {
+    fn cmp(&self, other: &Self) -> Ordering {
+        let self_ptr_value = Arc::as_ptr(&self.source_file) as usize;
+        let other_ptr_value = Arc::as_ptr(&other.source_file) as usize;
+
+        self_ptr_value
+            .cmp(&other_ptr_value)
+            .then_with(|| self.start.cmp(&other.start))
+            .then_with(|| self.end.cmp(&other.end))
+    }
+}
+
+impl std::hash::Hash for Span {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.start.hash(state);
+        self.end.hash(state);
+        Arc::as_ptr(&self.source_file).hash(state);
+    }
+}
+
+impl Span {
+    /// Create a span from the given start and end byte indices in the source file.
+    ///
+    /// # Parameters
+    /// - `start`: The start byte index of the span.
+    /// - `end`: The end byte index of the span (exclusive).
+    #[must_use]
+    pub fn new(source_file: Arc<SourceFile>, start: usize, end: usize) -> Option<Self> {
+        if start > end
+            || !source_file.content().is_char_boundary(start)
+            || source_file.content().len() < end
+            || (source_file.content().len() + 1 != end
+                && !source_file.content().is_char_boundary(end))
+        {
+            return None;
+        }
+
+        Some(Self {
+            start,
+            end,
+            source_file,
+        })
+    }
+
+    /// Create a span from the given start byte index to the end of the source file.
+    #[must_use]
+    pub fn to_end(source_file: Arc<SourceFile>, start: usize) -> Option<Self> {
+        if !source_file.content().is_char_boundary(start) {
+            return None;
+        }
+        Some(Self {
+            start,
+            end: source_file.content().len(),
+            source_file,
+        })
+    }
+
+    /// Get the string slice of the source code that the span represents.
+    #[must_use]
+    pub fn str(&self) -> &str {
+        &self.source_file.content()[self.start..self.end]
+    }
+
+    /// Get the starting [`Location`] of the span.
+    #[must_use]
+    pub fn start_location(&self) -> Location {
+        self.source_file.get_location(self.start).unwrap()
+    }
+
+    /// Get the ending [`Location`] of the span.
+    ///
+    /// Returns [`None`] if the end of the span is the end of the source file.
+    #[must_use]
+    pub fn end_location(&self) -> Option<Location> {
+        self.source_file.get_location(self.end)
+    }
+
+    /// Join the starting position of this span with the end position of the given span.
+    #[must_use]
+    pub fn join(&self, end: &Self) -> Option<Self> {
+        if !Arc::ptr_eq(&self.source_file, &end.source_file) || self.start > end.end {
+            return None;
+        }
+
+        Some(Self {
+            start: self.start,
+            end: end.end,
+            source_file: self.source_file.clone(),
+        })
+    }
+}
+
+/// Pointing to a particular location in a source file.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
+pub struct Location {
+    /// Line number of the location (starts at 1).
+    pub line: usize,
+
+    /// Column number of the location (starts at 1).
+    pub column: usize,
+}
+
+/// Represents an element that is located within a source file.
+pub trait SourceElement {
+    /// Get the span location of the element.
+    fn span(&self) -> Span;
+}
+
+impl<T: SourceElement> SourceElement for Box<T> {
+    fn span(&self) -> Span {
+        self.as_ref().span()
+    }
+}
+
+/// Iterator iterating over the characters in a source file that can be peeked at.
+#[derive(Debug, Clone, CopyGetters)]
+pub struct SourceIterator<'a> {
+    /// Get the source file that the iterator is iterating over.
+    #[get_copy = "pub"]
+    source_file: &'a Arc<SourceFile>,
+    iterator: Peekable<CharIndices<'a>>,
+}
+impl<'a> SourceIterator<'a> {
+    /// Peek at the next character in the source file.
+    pub fn peek(&mut self) -> Option<(usize, char)> {
+        self.iterator.peek().copied()
+    }
+}
+impl<'a> Iterator for SourceIterator<'a> {
+    type Item = (usize, char);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.iterator.next()
+    }
+}
+
+/// Get the byte positions of the lines in the given text.
+fn get_line_byte_positions(text: &str) -> Vec<Range<usize>> {
+    let mut current_position = 0;
+    let mut results = Vec::new();
+
+    let mut skip = false;
+
+    for (byte, char) in text.char_indices() {
+        if skip {
+            skip = false;
+            continue;
+        }
+
+        // lf
+        if char == '\n' {
+            #[allow(clippy::range_plus_one)]
+            results.push(current_position..byte + 1);
+
+            current_position = byte + 1;
+        }
+
+        // crlf
+        if char == '\r' {
+            if text.as_bytes().get(byte + 1) == Some(&b'\n') {
+                results.push(current_position..byte + 2);
+
+                current_position = byte + 2;
+
+                skip = true;
+            } else {
+                #[allow(clippy::range_plus_one)]
+                results.push(current_position..byte + 1);
+
+                current_position = byte + 1;
+            }
+        }
+    }
+
+    // add the last line
+    results.push(current_position..text.len());
+
+    results
+}
diff --git a/src/lexical/error.rs b/src/lexical/error.rs
new file mode 100644
index 0000000..d80dfd4
--- /dev/null
+++ b/src/lexical/error.rs
@@ -0,0 +1,62 @@
+use std::fmt::Display;
+
+use getset::Getters;
+
+use crate::base::{
+    log::{Message, Severity, SourceCodeDisplay},
+    source_file::Span,
+};
+
+use super::token_stream::Delimiter;
+
+/// Represents an error that occurred during the lexical analysis of the source code.
+#[allow(missing_docs)]
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, thiserror::Error)]
+pub enum Error {
+    #[error("Comment is not terminated.")]
+    UnterminatedDelimitedComment(#[from] UnterminatedDelimitedComment),
+    #[error("Delimiter is not terminated.")]
+    UndelimitedDelimiter(#[from] UndelimitedDelimiter),
+}
+
+/// Source code contains an unclosed `/*` comment.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Getters, thiserror::Error)]
+pub struct UnterminatedDelimitedComment {
+    /// Span of the unclosed `/*` that starts the comment.
+    pub span: Span,
+}
+
+impl Display for UnterminatedDelimitedComment {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}\n{}",
+            Message::new(Severity::Error, "found an unclosed `/*` comment"),
+            SourceCodeDisplay::new(&self.span, Option::<i32>::None)
+        )
+    }
+}
+
+/// Delimiter is not closed by its corresponding closing pair.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Getters, thiserror::Error)]
+pub struct UndelimitedDelimiter {
+    /// Span of the opening delimiter.
+    pub opening_span: Span,
+
+    /// Kind of the delimiter.
+    pub delimiter: Delimiter,
+}
+
+impl Display for UndelimitedDelimiter {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "{}\n{}",
+            Message::new(Severity::Error, "found an undelimited delimiter"),
+            SourceCodeDisplay::new(
+                &self.opening_span,
+                Some("this delimiter is not closed by its corresponding closing pair")
+            )
+        )
+    }
+}
diff --git a/src/lexical/mod.rs b/src/lexical/mod.rs
new file mode 100644
index 0000000..440fef4
--- /dev/null
+++ b/src/lexical/mod.rs
@@ -0,0 +1,8 @@
+//! The lexical module is responsible for converting raw text into a stream of tokens that the parser can understand.
+
+pub mod token_stream;
+
+pub mod token;
+
+mod error;
+pub use error::Error;
diff --git a/src/lexical/token.rs b/src/lexical/token.rs
new file mode 100644
index 0000000..7871734
--- /dev/null
+++ b/src/lexical/token.rs
@@ -0,0 +1,411 @@
+//! Contains the [`Token`] struct and its related types.
+
+use std::{collections::HashMap, str::FromStr, sync::OnceLock};
+
+use crate::base::{
+    source_file::{SourceElement, SourceIterator, Span},
+    Handler,
+};
+use derive_more::From;
+use strum::IntoEnumIterator;
+use strum_macros::EnumIter;
+
+use super::{error::UnterminatedDelimitedComment, Error};
+
+/// Is an enumeration representing keywords in shulkerscript.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, EnumIter)]
+#[allow(missing_docs)]
+pub enum KeywordKind {
+    Function,
+    If,
+    Else,
+}
+
+impl ToString for KeywordKind {
+    fn to_string(&self) -> String {
+        self.as_str().to_string()
+    }
+}
+
+/// Is an error that is returned when a string cannot be parsed into a [`Keyword`] in [`FromStr`]
+/// trait implementation.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default, thiserror::Error)]
+#[error("invalid string representation of keyword.")]
+pub struct KeywordParseError;
+
+impl FromStr for KeywordKind {
+    type Err = KeywordParseError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        static STRING_KEYWORD_MAP: OnceLock<HashMap<&'static str, KeywordKind>> = OnceLock::new();
+        let map = STRING_KEYWORD_MAP.get_or_init(|| {
+            let mut map = HashMap::new();
+
+            for keyword in Self::iter() {
+                map.insert(keyword.as_str(), keyword);
+            }
+
+            map
+        });
+
+        map.get(s).copied().ok_or(KeywordParseError)
+    }
+}
+
+impl KeywordKind {
+    /// Gets the string representation of the keyword as a `&str`.
+    #[must_use]
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Function => "fn",
+            Self::If => "if",
+            Self::Else => "else",
+        }
+    }
+}
+
+/// Is an enumeration containing all kinds of tokens in the Flux programming language.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)]
+#[allow(missing_docs)]
+pub enum Token {
+    WhiteSpaces(WhiteSpaces),
+    Identifier(Identifier),
+    Keyword(Keyword),
+    Punctuation(Punctuation),
+    Numeric(Numeric),
+    Comment(Comment),
+}
+
+impl Token {
+    /// Returns the span of the token.
+    #[must_use]
+    pub fn span(&self) -> &Span {
+        match self {
+            Self::WhiteSpaces(token) => &token.span,
+            Self::Identifier(token) => &token.span,
+            Self::Keyword(token) => &token.span,
+            Self::Punctuation(token) => &token.span,
+            Self::Numeric(token) => &token.span,
+            Self::Comment(token) => &token.span,
+        }
+    }
+}
+
+impl SourceElement for Token {
+    fn span(&self) -> Span {
+        match self {
+            Self::WhiteSpaces(token) => token.span(),
+            Self::Identifier(token) => token.span(),
+            Self::Keyword(token) => token.span(),
+            Self::Punctuation(token) => token.span(),
+            Self::Numeric(token) => token.span(),
+            Self::Comment(token) => token.span(),
+        }
+    }
+}
+
+/// Represents a contiguous sequence of whitespace characters.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct WhiteSpaces {
+    /// Is the span that makes up the token.
+    pub span: Span,
+}
+
+impl SourceElement for WhiteSpaces {
+    fn span(&self) -> Span {
+        self.span.clone()
+    }
+}
+/// Represents a contiguous sequence of characters that are valid in an identifier.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Identifier {
+    /// Is the span that makes up the token.
+    pub span: Span,
+}
+
+impl SourceElement for Identifier {
+    fn span(&self) -> Span {
+        self.span.clone()
+    }
+}
+
+/// Represents a contiguous sequence of characters that are reserved for a keyword.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Keyword {
+    /// Is the span that makes up the token.
+    pub span: Span,
+
+    /// Is the [`KeywordKind`] that the token represents.
+    pub keyword: KeywordKind,
+}
+
+impl SourceElement for Keyword {
+    fn span(&self) -> Span {
+        self.span.clone()
+    }
+}
+
+/// Represents a single ASCII punctuation character.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Punctuation {
+    /// Is the span that makes up the token.
+    pub span: Span,
+
+    /// Is the ASCII punctuation character that the token represents.
+    pub punctuation: char,
+}
+
+impl SourceElement for Punctuation {
+    fn span(&self) -> Span {
+        self.span.clone()
+    }
+}
+
+/// Represents a hardcoded numeric literal value in the source code.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Numeric {
+    /// Is the span that makes up the token.
+    pub span: Span,
+}
+
+impl SourceElement for Numeric {
+    fn span(&self) -> Span {
+        self.span.clone()
+    }
+}
+
+/// Is an enumeration representing the two kinds of comments in the Flux programming language.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum CommentKind {
+    /// A comment that starts with `//` and ends at the end of the line.
+    Line,
+
+    /// A comment that starts with `/*` and ends with `*/`.
+    Delimited,
+}
+
+/// Represents a portion of the source code that is ignored by the interpreter.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Comment {
+    /// Is the span that makes up the token.
+    pub span: Span,
+
+    /// Is the kind of comment that the token represents.
+    pub kind: CommentKind,
+}
+
+impl SourceElement for Comment {
+    fn span(&self) -> Span {
+        self.span.clone()
+    }
+}
+
+/// Is an error that can occur when invoking the [`Token::tokenize`] method.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, thiserror::Error, From)]
+#[allow(missing_docs)]
+pub enum TokenizeError {
+    #[error("encountered a fatal lexical error that causes the process to stop.")]
+    FatalLexicalError,
+
+    #[error("the iterator argument is at the end of the source code.")]
+    EndOfSourceCodeIteratorArgument,
+}
+
+impl Token {
+    /// Increments the iterator while the predicate returns true.
+    pub fn walk_iter(iter: &mut SourceIterator, predicate: impl Fn(char) -> bool) {
+        while let Some((_, character)) = iter.peek() {
+            if !predicate(character) {
+                break;
+            }
+
+            iter.next();
+        }
+    }
+
+    /// Creates a span from the given start location to the current location of the iterator.
+    fn create_span(start: usize, iter: &mut SourceIterator) -> Span {
+        iter.peek().map_or_else(
+            || Span::to_end(iter.source_file().clone(), start).unwrap(),
+            |(index, _)| Span::new(iter.source_file().clone(), start, index).unwrap(),
+        )
+    }
+
+    /// Checks if the given character is a valid first character of an identifier.
+    fn is_first_identifier_character(character: char) -> bool {
+        character == '_'
+            || (!character.is_control()
+                && !character.is_whitespace()
+                && !character.is_ascii_punctuation()
+                && !character.is_ascii_digit())
+    }
+
+    /// Checks if the given character is a valid character of an identifier.
+    fn is_identifier_character(character: char) -> bool {
+        character == '_'
+            || (!character.is_control()
+                && !character.is_whitespace()
+                && !character.is_ascii_punctuation())
+    }
+
+    /// Handles a contiguous sequence of whitespace characters.
+    fn handle_whitespace(iter: &mut SourceIterator, start: usize) -> Self {
+        Self::walk_iter(iter, char::is_whitespace);
+
+        WhiteSpaces {
+            span: Self::create_span(start, iter),
+        }
+        .into()
+    }
+
+    /// Handles a contiguous sequence of characters that are valid in an identifier.
+    fn handle_identifier_and_keyword(iter: &mut SourceIterator, start: usize) -> Self {
+        Self::walk_iter(iter, Self::is_identifier_character);
+
+        let span = Self::create_span(start, iter);
+        let word = span.str();
+
+        // Checks if the word is a keyword
+        KeywordKind::from_str(word).ok().map_or_else(
+            || Identifier { span: span.clone() }.into(),
+            |kw| {
+                Keyword {
+                    span: span.clone(),
+                    keyword: kw,
+                }
+                .into()
+            },
+        )
+    }
+
+    /// Handles a sequence starting with a slash
+    fn handle_comment(
+        iter: &mut SourceIterator,
+        start: usize,
+        character: char,
+        handler: &impl Handler<Error>,
+    ) -> Result<Self, TokenizeError> {
+        // Single line comment
+        if let Some((_, '/')) = iter.peek() {
+            iter.next();
+
+            Self::walk_iter(iter, |character| !(character == '\n' || character == '\r'));
+
+            let is_cr = iter
+                .peek()
+                .map_or(false, |(_, character)| character == '\r');
+
+            if let (true, Some((_, '\n'))) = (is_cr, iter.next()) {
+                // skips the crlf
+                iter.next();
+            }
+
+            Ok(Comment {
+                span: Self::create_span(start, iter),
+                kind: CommentKind::Line,
+            }
+            .into())
+        }
+        // Delimited comment
+        else if let Some((_, '*')) = iter.peek() {
+            iter.next();
+
+            let mut is_terminated = false;
+
+            while let Some((_, character)) = iter.next() {
+                if character == '*' {
+                    if let Some((_, '/')) = iter.peek() {
+                        iter.next();
+
+                        is_terminated = true;
+
+                        break;
+                    }
+                }
+            }
+
+            // Checks if the comment is terminated
+            if is_terminated {
+                Ok(Comment {
+                    span: Self::create_span(start, iter),
+                    kind: CommentKind::Delimited,
+                }
+                .into())
+            } else {
+                handler.receive(
+                    UnterminatedDelimitedComment {
+                        span: Span::new(iter.source_file().clone(), start, start + 2).unwrap(),
+                    }
+                    .into(),
+                );
+                return Err(TokenizeError::FatalLexicalError);
+            }
+        }
+        // Just a single slash punctuation
+        else {
+            Ok(Punctuation {
+                span: Self::create_span(start, iter),
+                punctuation: character,
+            }
+            .into())
+        }
+    }
+
+    /// Handles a sequence of digits
+    fn handle_numeric_literal(iter: &mut SourceIterator, start: usize) -> Self {
+        // Tokenizes the whole number part
+        Self::walk_iter(iter, |character| character.is_ascii_digit());
+
+        Numeric {
+            span: Self::create_span(start, iter),
+        }
+        .into()
+    }
+
+    /// Lexes the source code from the given iterator.
+    ///
+    /// The tokenization starts at the current location of the iterator. The function moves the
+    /// iterator at least once and forwards it until it makes a token. After the token is made, the
+    /// iterator is left at the next character that is not part of the token.
+    ///
+    /// # Errors
+    /// - [`TokenizeError::EndOfSourceCodeIteratorArgument`] - The iterator argument is at the end of the
+    ///   source code.
+    /// - [`TokenizeError::FatalLexicalError`] - A fatal lexical error occurred.
+    pub fn tokenize(
+        iter: &mut SourceIterator,
+        handler: &impl Handler<Error>,
+    ) -> Result<Self, TokenizeError> {
+        // Gets the first character
+        let (start, character) = iter
+            .next()
+            .ok_or(TokenizeError::EndOfSourceCodeIteratorArgument)?;
+
+        // Found white spaces
+        if character.is_whitespace() {
+            Ok(Self::handle_whitespace(iter, start))
+        }
+        // Found identifier/keyword
+        else if Self::is_first_identifier_character(character) {
+            Ok(Self::handle_identifier_and_keyword(iter, start))
+        }
+        // Found comment/single slash punctuation
+        else if character == '/' {
+            Self::handle_comment(iter, start, character, handler)
+        }
+        // Found numeric literal
+        else if character.is_ascii_digit() {
+            Ok(Self::handle_numeric_literal(iter, start))
+        }
+        // Found a punctuation
+        else if character.is_ascii_punctuation() {
+            Ok(Punctuation {
+                span: Self::create_span(start, iter),
+                punctuation: character,
+            }
+            .into())
+        } else {
+            unreachable!("all cases covered before")
+        }
+    }
+}
diff --git a/src/lexical/token_stream.rs b/src/lexical/token_stream.rs
new file mode 100644
index 0000000..b43657a
--- /dev/null
+++ b/src/lexical/token_stream.rs
@@ -0,0 +1,195 @@
+//! Contains the [`TokenStream`] struct and its related types.
+
+use std::{fmt::Debug, sync::Arc};
+
+use derive_more::{Deref, From};
+
+use crate::base::{source_file::SourceFile, Handler};
+
+use super::{
+    error::{self, UndelimitedDelimiter},
+    token::{Punctuation, Token, TokenizeError},
+};
+
+/// Is a list of well structured [`TokenTree`]s.
+///
+/// This struct is the final output of the lexical analysis phase and is meant to be used by the
+/// next stage of the compilation process.
+#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Deref)]
+pub struct TokenStream {
+    #[deref]
+    token_trees: Vec<TokenTree>,
+}
+
+impl Debug for TokenStream {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_list().entries(self.token_trees.iter()).finish()
+    }
+}
+
+impl TokenStream {
+    /// Tokenizes the given source code.
+    ///
+    /// This function tokenizes the given iterator of source code by calling the
+    /// [`Token::tokenize()`] repeatedly until the iterator is exhausted.
+    ///
+    /// # Parameters
+    /// - `source_file_iterator`: The iterator that iterates over the source code.
+    ///
+    /// # Returns
+    /// A tuple containing the stream of successfully tokenized tokens and a list of lexical errors
+    /// encountered during tokenization.
+    #[must_use]
+    pub fn tokenize(source_file: &Arc<SourceFile>, handler: &impl Handler<error::Error>) -> Self {
+        // The list of token trees that will be returned.
+        let mut tokens = Vec::new();
+        let mut source_file_iterator = source_file.iter();
+
+        // Tokenize the source code.
+        loop {
+            match Token::tokenize(&mut source_file_iterator, handler) {
+                Ok(token) => tokens.push(token),
+                Err(TokenizeError::EndOfSourceCodeIteratorArgument) => {
+                    break;
+                }
+                Err(TokenizeError::FatalLexicalError) => (),
+            }
+        }
+
+        // reverse to use pop() instead of remove(0)
+        tokens.reverse();
+
+        // stucture the tokens into a token stream
+        let mut token_trees = Vec::new();
+        while let Some(token_tree) = Self::handle_token(&mut tokens, handler) {
+            token_trees.push(token_tree);
+        }
+
+        Self { token_trees }
+    }
+
+    /// Handles a token.
+    fn handle_token(
+        tokens: &mut Vec<Token>,
+        handler: &impl Handler<error::Error>,
+    ) -> Option<TokenTree> {
+        tokens
+            .pop()
+            .and_then(|token| Self::handle_popped_token(tokens, token, handler))
+    }
+
+    /// Handles a token after it has been popped.
+    fn handle_popped_token(
+        tokens: &mut Vec<Token>,
+        popped_token: Token,
+        handler: &dyn Handler<error::Error>,
+    ) -> Option<TokenTree> {
+        match popped_token {
+            Token::Punctuation(punc) if punc.punctuation == '{' => {
+                Self::handle_delimited(tokens, punc, Delimiter::Brace, handler)
+                    .map(TokenTree::Delimited)
+            }
+            Token::Punctuation(punc) if punc.punctuation == '[' => {
+                Self::handle_delimited(tokens, punc, Delimiter::Bracket, handler)
+                    .map(TokenTree::Delimited)
+            }
+            Token::Punctuation(punc) if punc.punctuation == '(' => {
+                Self::handle_delimited(tokens, punc, Delimiter::Parenthesis, handler)
+                    .map(TokenTree::Delimited)
+            }
+            token => Some(TokenTree::Token(token)),
+        }
+    }
+
+    /// Handles a delimited token.
+    fn handle_delimited(
+        tokens: &mut Vec<Token>,
+        open: Punctuation,
+        delimiter: Delimiter,
+        handler: &dyn Handler<error::Error>,
+    ) -> Option<Delimited> {
+        let mut token_trees = Vec::new();
+
+        while let Some(token) = tokens.pop() {
+            match (token, delimiter) {
+                (Token::Punctuation(p), Delimiter::Brace) if p.punctuation == '}' => {
+                    return Some(Delimited {
+                        open,
+                        token_stream: Self { token_trees },
+                        close: p,
+                        delimiter,
+                    });
+                }
+                (Token::Punctuation(punc), Delimiter::Bracket) if punc.punctuation == ']' => {
+                    return Some(Delimited {
+                        open,
+                        token_stream: Self { token_trees },
+                        close: punc,
+                        delimiter,
+                    })
+                }
+                (Token::Punctuation(punc), Delimiter::Parenthesis) if punc.punctuation == ')' => {
+                    return Some(Delimited {
+                        open,
+                        token_stream: Self { token_trees },
+                        close: punc,
+                        delimiter,
+                    })
+                }
+                (token, _) => {
+                    let Some(token_tree) = Self::handle_popped_token(tokens, token, handler) else {
+                        break;
+                    };
+
+                    token_trees.push(token_tree);
+                }
+            }
+        }
+
+        handler.receive(error::Error::UndelimitedDelimiter(UndelimitedDelimiter {
+            opening_span: open.span,
+            delimiter,
+        }));
+
+        None
+    }
+
+    /// Dissolves this struct into a tuple of its components.
+    #[must_use]
+    pub fn dissolve(self) -> Vec<TokenTree> {
+        self.token_trees
+    }
+}
+
+/// Is an enumeration of either a [`Token`] or a [`Delimited`].
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, From)]
+#[allow(missing_docs)]
+pub enum TokenTree {
+    Token(Token),
+    Delimited(Delimited),
+}
+
+/// Is an enumeration of the different types of delimiters in the [`Delimited`].
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[allow(missing_docs)]
+pub enum Delimiter {
+    Parenthesis,
+    Brace,
+    Bracket,
+}
+
+/// Represents a list of tokens enclosed by a pair of delimiters.
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Delimited {
+    /// The opening delimiter.
+    pub open: Punctuation,
+
+    /// The stream of tokens inside the delimiter.
+    pub token_stream: TokenStream,
+
+    /// The closing delimiter.
+    pub close: Punctuation,
+
+    /// The type of delimiter.
+    pub delimiter: Delimiter,
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..4d90758
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,70 @@
+//! The `ShulkerScript` language.
+//!
+//! `ShulkerScript` is a simple, imperative scripting language for creating Minecraft data packs.
+
+#![deny(
+    missing_docs,
+    missing_debug_implementations,
+    missing_copy_implementations,
+    clippy::all,
+    clippy::pedantic,
+    clippy::nursery,
+    rustdoc::broken_intra_doc_links,
+    clippy::missing_errors_doc
+)]
+#![allow(clippy::missing_panics_doc, clippy::missing_const_for_fn)]
+
+pub mod base;
+pub mod lexical;
+
+use std::{cell::Cell, fmt::Display, path::PathBuf};
+
+use base::{source_file::SourceFile, Handler, Result};
+
+use crate::{base::Error, lexical::token_stream::TokenStream};
+
+/// Compiles the given source code.
+///
+/// # Errors
+/// - If an error occurs while reading the file.
+pub fn compile(path: PathBuf) -> Result<()> {
+    let source_file = SourceFile::load(path)?;
+
+    let printer = Printer::new();
+
+    let tokens = TokenStream::tokenize(&source_file, &printer);
+
+    println!("{tokens:#?}");
+
+    if printer.has_printed() {
+        return Err(Error::Other(
+            "An error occurred while tokenizing the source code.",
+        ));
+    }
+
+    Ok(())
+}
+
+struct Printer {
+    printed: Cell<bool>,
+}
+
+impl Printer {
+    /// Creates a new [`Printer`].
+    fn new() -> Self {
+        Self {
+            printed: Cell::new(false),
+        }
+    }
+
+    fn has_printed(&self) -> bool {
+        self.printed.get()
+    }
+}
+
+impl<E: Display> Handler<E> for Printer {
+    fn receive(&self, error: E) {
+        eprintln!("{error}");
+        self.printed.set(true);
+    }
+}