blob: a4ce9c120298ace33af8589dd288da73a19fa21a [file] [log] [blame] [edit]
//! Simple hand-written ungrammar parser.
use std::collections::HashMap;
use crate::{
error::{bail, format_err, Result},
lexer::{self, TokenKind},
Grammar, Node, NodeData, Rule, Token, TokenData,
};
macro_rules! bail {
($loc:expr, $($tt:tt)*) => {{
let err = $crate::error::format_err!($($tt)*)
.with_location($loc);
return Err(err);
}};
}
pub(crate) fn parse(tokens: Vec<lexer::Token>) -> Result<Grammar> {
let mut p = Parser::new(tokens);
while !p.is_eof() {
node(&mut p)?;
}
p.finish()
}
#[derive(Default)]
struct Parser {
grammar: Grammar,
tokens: Vec<lexer::Token>,
node_table: HashMap<String, Node>,
token_table: HashMap<String, Token>,
}
const DUMMY_RULE: Rule = Rule::Node(Node(!0));
impl Parser {
fn new(mut tokens: Vec<lexer::Token>) -> Parser {
tokens.reverse();
Parser {
tokens,
..Parser::default()
}
}
fn peek(&self) -> Option<&lexer::Token> {
self.peek_n(0)
}
fn peek_n(&self, n: usize) -> Option<&lexer::Token> {
self.tokens.iter().nth_back(n)
}
fn bump(&mut self) -> Result<lexer::Token> {
self.tokens
.pop()
.ok_or_else(|| format_err!("unexpected EOF"))
}
fn expect(&mut self, kind: TokenKind, what: &str) -> Result<()> {
let token = self.bump()?;
if token.kind != kind {
bail!(token.loc, "unexpected token, expected `{}`", what);
}
Ok(())
}
fn is_eof(&self) -> bool {
self.tokens.is_empty()
}
fn finish(self) -> Result<Grammar> {
for node_data in &self.grammar.nodes {
if matches!(node_data.rule, DUMMY_RULE) {
crate::error::bail!("Undefined node: {}", node_data.name)
}
}
Ok(self.grammar)
}
fn intern_node(&mut self, name: String) -> Node {
let len = self.node_table.len();
let grammar = &mut self.grammar;
*self.node_table.entry(name.clone()).or_insert_with(|| {
grammar.nodes.push(NodeData {
name,
rule: DUMMY_RULE,
});
Node(len)
})
}
fn intern_token(&mut self, name: String) -> Token {
let len = self.token_table.len();
let grammar = &mut self.grammar;
*self.token_table.entry(name.clone()).or_insert_with(|| {
grammar.tokens.push(TokenData { name });
Token(len)
})
}
}
fn node(p: &mut Parser) -> Result<()> {
let token = p.bump()?;
let node = match token.kind {
TokenKind::Node(it) => p.intern_node(it),
_ => bail!(token.loc, "expected ident"),
};
p.expect(TokenKind::Eq, "=")?;
if !matches!(p.grammar[node].rule, DUMMY_RULE) {
bail!(token.loc, "duplicate rule: `{}`", p.grammar[node].name)
}
let rule = rule(p)?;
p.grammar.nodes[node.0].rule = rule;
Ok(())
}
fn rule(p: &mut Parser) -> Result<Rule> {
if let Some(lexer::Token { kind: TokenKind::Pipe, loc }) = p.peek() {
bail!(
*loc,
"The first element in a sequence of productions or alternatives \
must not have a leading pipe (`|`)"
);
}
let lhs = seq_rule(p)?;
let mut alt = vec![lhs];
while let Some(token) = p.peek() {
if token.kind != TokenKind::Pipe {
break;
}
p.bump()?;
let rule = seq_rule(p)?;
alt.push(rule)
}
let res = if alt.len() == 1 {
alt.pop().unwrap()
} else {
Rule::Alt(alt)
};
Ok(res)
}
fn seq_rule(p: &mut Parser) -> Result<Rule> {
let lhs = atom_rule(p)?;
let mut seq = vec![lhs];
while let Some(rule) = opt_atom_rule(p)? {
seq.push(rule)
}
let res = if seq.len() == 1 {
seq.pop().unwrap()
} else {
Rule::Seq(seq)
};
Ok(res)
}
fn atom_rule(p: &mut Parser) -> Result<Rule> {
match opt_atom_rule(p)? {
Some(it) => Ok(it),
None => {
let token = p.bump()?;
bail!(token.loc, "unexpected token")
}
}
}
fn opt_atom_rule(p: &mut Parser) -> Result<Option<Rule>> {
let token = match p.peek() {
Some(it) => it,
None => return Ok(None),
};
let mut res = match &token.kind {
TokenKind::Node(name) => {
if let Some(lookahead) = p.peek_n(1) {
match lookahead.kind {
TokenKind::Eq => return Ok(None),
TokenKind::Colon => {
let label = name.clone();
p.bump()?;
p.bump()?;
let rule = atom_rule(p)?;
let res = Rule::Labeled {
label,
rule: Box::new(rule),
};
return Ok(Some(res));
}
_ => (),
}
}
match p.peek_n(1) {
Some(token) if token.kind == TokenKind::Eq => return Ok(None),
_ => (),
}
let name = name.clone();
p.bump()?;
let node = p.intern_node(name);
Rule::Node(node)
}
TokenKind::Token(name) => {
let name = name.clone();
p.bump()?;
let token = p.intern_token(name);
Rule::Token(token)
}
TokenKind::LParen => {
p.bump()?;
let rule = rule(p)?;
p.expect(TokenKind::RParen, ")")?;
rule
}
_ => return Ok(None),
};
if let Some(token) = p.peek() {
match &token.kind {
TokenKind::QMark => {
p.bump()?;
res = Rule::Opt(Box::new(res));
}
TokenKind::Star => {
p.bump()?;
res = Rule::Rep(Box::new(res));
}
_ => (),
}
}
Ok(Some(res))
}