137 lines
4.4 KiB
Python
137 lines
4.4 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import os
|
|
from pathlib import Path
|
|
from collections import defaultdict, deque
|
|
|
|
ebnf_blocks = []
|
|
rule_defs = {}
|
|
rule_deps = defaultdict(set)
|
|
|
|
ebnf_fence_start = re.compile(r"^\s*///\s*```\s*ebnf\s*$")
|
|
ebnf_fence_end = re.compile(r"^\s*///\s*```\s*$")
|
|
doc_comment_prefix = re.compile(r"^\s*///\s?(.*)$")
|
|
|
|
rule_start_pattern = re.compile(r"^\s*([A-Za-z_]\w*)\s*:")
|
|
rule_ref_pattern = re.compile(r"\b([A-Za-z_]\w*)\b")
|
|
|
|
|
|
def find_project_root() -> Path | None:
|
|
current = Path.cwd()
|
|
while current != current.parent:
|
|
cargo_toml = current / "Cargo.toml"
|
|
if cargo_toml.exists():
|
|
text = cargo_toml.read_text(encoding="utf-8")
|
|
if re.search(r'(?m)^\s*name\s*=\s*"shulkerscript"\s*$', text):
|
|
return current
|
|
current = current.parent
|
|
return None
|
|
|
|
|
|
root_dir = find_project_root()
|
|
if not root_dir:
|
|
raise SystemExit(
|
|
"Could not find Cargo.toml of package 'shulkerscript' in this or any parent directory."
|
|
)
|
|
|
|
if Path.cwd() != root_dir:
|
|
os.chdir(root_dir)
|
|
print(f"Changed working directory to {root_dir}")
|
|
|
|
previous_rules = set()
|
|
|
|
with open("grammar.md", "r", encoding="utf-8") as f:
|
|
rule_header_pattern = re.compile(r"## (\w+)")
|
|
for line in f:
|
|
m = rule_header_pattern.match(line)
|
|
if m:
|
|
previous_rules.add(m.group(1))
|
|
|
|
for path in Path(".").rglob("*.rs"):
|
|
with path.open(encoding="utf-8") as f:
|
|
in_block = False
|
|
current_block_lines = []
|
|
|
|
for line in f:
|
|
if not in_block and ebnf_fence_start.match(line):
|
|
in_block = True
|
|
current_block_lines = []
|
|
continue
|
|
if in_block:
|
|
if ebnf_fence_end.match(line):
|
|
block_text = "\n".join(current_block_lines)
|
|
|
|
ebnf_blocks.append(block_text)
|
|
|
|
current_rule_name = None
|
|
current_rule_lines = []
|
|
for ln in current_block_lines:
|
|
m = rule_start_pattern.match(ln)
|
|
if m:
|
|
if current_rule_name:
|
|
full_def = "\n".join(current_rule_lines)
|
|
rule_defs[current_rule_name] = full_def
|
|
refs = set(rule_ref_pattern.findall(full_def))
|
|
refs.discard(current_rule_name)
|
|
rule_deps[current_rule_name].update(refs)
|
|
current_rule_name = m.group(1)
|
|
current_rule_lines = [ln]
|
|
else:
|
|
if current_rule_name:
|
|
current_rule_lines.append(ln)
|
|
|
|
if current_rule_name:
|
|
full_def = "\n".join(current_rule_lines)
|
|
|
|
rule_defs[current_rule_name] = full_def
|
|
refs = set(rule_ref_pattern.findall(full_def))
|
|
refs.discard(current_rule_name)
|
|
rule_deps[current_rule_name].update(refs)
|
|
|
|
in_block = False
|
|
continue
|
|
|
|
m = doc_comment_prefix.match(line)
|
|
if m:
|
|
current_block_lines.append(m.group(1))
|
|
|
|
if "Program" not in rule_defs:
|
|
raise SystemExit("Root rule 'Program' not found in EBNF definitions")
|
|
|
|
visited = set()
|
|
order = []
|
|
queue = deque(["Program"])
|
|
|
|
while queue:
|
|
rule = queue.popleft()
|
|
if rule not in visited and rule in rule_defs:
|
|
visited.add(rule)
|
|
order.append(rule)
|
|
for dep in sorted(rule_deps[rule]):
|
|
if dep not in visited:
|
|
queue.append(dep)
|
|
|
|
unused_rules = sorted(set(rule_defs.keys()) - visited)
|
|
|
|
if len(unused_rules) > 0:
|
|
print(
|
|
f"Appending {len(unused_rules)} unused rules to the end: {', '.join(unused_rules)}"
|
|
)
|
|
|
|
order.extend(unused_rules)
|
|
|
|
with open("grammar.md", "w", encoding="utf-8") as out:
|
|
out.write("# Grammar of the Shulkerscript language\n\n")
|
|
|
|
for rule in order:
|
|
out.write(f"## {rule}\n\n```ebnf\n{rule_defs[rule]}\n```\n\n")
|
|
|
|
print(f"Wrote grammar.md with {len(order)} rules.")
|
|
added_rules = set(rule_defs.keys()) - previous_rules
|
|
if len(added_rules) > 0:
|
|
print(f"Added rules for: {', '.join(added_rules)}")
|
|
removed_rules = previous_rules - set(rule_defs.keys())
|
|
if len(removed_rules) > 0:
|
|
print(f"Removed rules for: {', '.join(removed_rules)}")
|