blob: 04a839777f6abc05f730ba69c89d84bdb67d7f0e [file] [log] [blame]
//! Parser for .ninja files.
//!
//! See design notes on parsing in doc/design_notes.md.
//!
//! To avoid allocations parsing frequently uses references into the input
//! text, marked with the lifetime `'text`.
use crate::{
eval::{EvalPart, EvalString, Vars},
scanner::{ParseError, ParseResult, Scanner},
smallmap::SmallMap,
};
use std::path::Path;
/// A list of variable bindings, as expressed with syntax like:
/// key = $val
pub type VarList<'text> = SmallMap<&'text str, EvalString<&'text str>>;
pub struct Rule<'text> {
pub name: &'text str,
pub vars: VarList<'text>,
}
pub struct Build<'text> {
pub rule: &'text str,
pub line: usize,
pub outs: Vec<EvalString<&'text str>>,
pub explicit_outs: usize,
pub ins: Vec<EvalString<&'text str>>,
pub explicit_ins: usize,
pub implicit_ins: usize,
pub order_only_ins: usize,
pub validation_ins: usize,
pub vars: VarList<'text>,
}
#[derive(Debug)]
pub struct Pool<'text> {
pub name: &'text str,
pub depth: usize,
}
pub enum Statement<'text> {
Rule(Rule<'text>),
Build(Build<'text>),
Default(Vec<EvalString<&'text str>>),
Include(EvalString<&'text str>),
Subninja(EvalString<&'text str>),
Pool(Pool<'text>),
}
pub struct Parser<'text> {
scanner: Scanner<'text>,
pub vars: Vars<'text>,
/// Reading EvalStrings is very hot when parsing, so we always read into
/// this buffer and then clone it afterwards.
eval_buf: Vec<EvalPart<&'text str>>,
}
impl<'text> Parser<'text> {
pub fn new(buf: &'text [u8]) -> Parser<'text> {
Parser {
scanner: Scanner::new(buf),
vars: Vars::default(),
eval_buf: Vec::with_capacity(16),
}
}
pub fn format_parse_error(&self, filename: &Path, err: ParseError) -> String {
self.scanner.format_parse_error(filename, err)
}
pub fn read(&mut self) -> ParseResult<Option<Statement<'text>>> {
loop {
match self.scanner.peek() {
'\0' => return Ok(None),
'\n' | '\r' => self.scanner.next(),
'#' => self.skip_comment()?,
' ' | '\t' => return self.scanner.parse_error("unexpected whitespace"),
_ => {
let ident = self.read_ident()?;
self.skip_spaces();
match ident {
"rule" => return Ok(Some(Statement::Rule(self.read_rule()?))),
"build" => return Ok(Some(Statement::Build(self.read_build()?))),
"default" => return Ok(Some(Statement::Default(self.read_default()?))),
"include" => {
return Ok(Some(Statement::Include(self.read_eval(false)?)));
}
"subninja" => {
return Ok(Some(Statement::Subninja(self.read_eval(false)?)));
}
"pool" => return Ok(Some(Statement::Pool(self.read_pool()?))),
ident => {
// TODO: The evaluation of global variables should
// be moved out of the parser, so that we can run
// multiple parsers in parallel and then evaluate
// all the variables in series at the end.
let val = self.read_vardef()?.evaluate(&[&self.vars]);
self.vars.insert(ident, val);
}
}
}
}
}
}
/// Read the `= ...` part of a variable definition.
fn read_vardef(&mut self) -> ParseResult<EvalString<&'text str>> {
self.skip_spaces();
self.scanner.expect('=')?;
self.skip_spaces();
// read_eval will error out if there's nothing to read
if self.scanner.peek_newline() {
self.scanner.skip('\r');
self.scanner.expect('\n')?;
return Ok(EvalString::new(Vec::new()));
}
let result = self.read_eval(false);
self.scanner.skip('\r');
self.scanner.expect('\n')?;
result
}
/// Read a collection of ` foo = bar` variables, with leading indent.
fn read_scoped_vars(
&mut self,
variable_name_validator: fn(var: &str) -> bool,
) -> ParseResult<VarList<'text>> {
let mut vars = VarList::default();
while self.scanner.peek() == ' ' {
self.scanner.skip_spaces();
let name = self.read_ident()?;
if !variable_name_validator(name) {
self.scanner
.parse_error(format!("unexpected variable {:?}", name))?;
}
self.skip_spaces();
let val = self.read_vardef()?;
vars.insert(name, val);
}
Ok(vars)
}
fn read_rule(&mut self) -> ParseResult<Rule<'text>> {
let name = self.read_ident()?;
self.scanner.skip('\r');
self.scanner.expect('\n')?;
let vars = self.read_scoped_vars(|var| {
matches!(
var,
"command"
| "depfile"
| "dyndep"
| "description"
| "deps"
| "generator"
| "pool"
| "restat"
| "rspfile"
| "rspfile_content"
| "msvc_deps_prefix"
)
})?;
Ok(Rule { name, vars })
}
fn read_pool(&mut self) -> ParseResult<Pool<'text>> {
let name = self.read_ident()?;
self.scanner.skip('\r');
self.scanner.expect('\n')?;
let vars = self.read_scoped_vars(|var| matches!(var, "depth"))?;
let mut depth = 0;
if let Some((_, val)) = vars.into_iter().next() {
let val = val.evaluate(&[]);
depth = match val.parse::<usize>() {
Ok(d) => d,
Err(err) => return self.scanner.parse_error(format!("pool depth: {}", err)),
}
}
Ok(Pool { name, depth })
}
fn read_unevaluated_paths_to(
&mut self,
v: &mut Vec<EvalString<&'text str>>,
) -> ParseResult<()> {
self.skip_spaces();
while self.scanner.peek() != ':'
&& self.scanner.peek() != '|'
&& !self.scanner.peek_newline()
{
v.push(self.read_eval(true)?);
self.skip_spaces();
}
Ok(())
}
fn read_build(&mut self) -> ParseResult<Build<'text>> {
let line = self.scanner.line;
let mut outs = Vec::new();
self.read_unevaluated_paths_to(&mut outs)?;
let explicit_outs = outs.len();
if self.scanner.peek() == '|' {
self.scanner.next();
self.read_unevaluated_paths_to(&mut outs)?;
}
self.scanner.expect(':')?;
self.skip_spaces();
let rule = self.read_ident()?;
let mut ins = Vec::new();
self.read_unevaluated_paths_to(&mut ins)?;
let explicit_ins = ins.len();
if self.scanner.peek() == '|' {
self.scanner.next();
let peek = self.scanner.peek();
if peek == '|' || peek == '@' {
self.scanner.back();
} else {
self.read_unevaluated_paths_to(&mut ins)?;
}
}
let implicit_ins = ins.len() - explicit_ins;
if self.scanner.peek() == '|' {
self.scanner.next();
if self.scanner.peek() == '@' {
self.scanner.back();
} else {
self.scanner.expect('|')?;
self.read_unevaluated_paths_to(&mut ins)?;
}
}
let order_only_ins = ins.len() - implicit_ins - explicit_ins;
if self.scanner.peek() == '|' {
self.scanner.next();
self.scanner.expect('@')?;
self.read_unevaluated_paths_to(&mut ins)?;
}
let validation_ins = ins.len() - order_only_ins - implicit_ins - explicit_ins;
self.scanner.skip('\r');
self.scanner.expect('\n')?;
let vars = self.read_scoped_vars(|_| true)?;
Ok(Build {
rule,
line,
outs,
explicit_outs,
ins,
explicit_ins,
implicit_ins,
order_only_ins,
validation_ins,
vars,
})
}
fn read_default(&mut self) -> ParseResult<Vec<EvalString<&'text str>>> {
let mut defaults = Vec::new();
self.read_unevaluated_paths_to(&mut defaults)?;
if defaults.is_empty() {
return self.scanner.parse_error("expected path");
}
self.scanner.skip('\r');
self.scanner.expect('\n')?;
Ok(defaults)
}
fn skip_comment(&mut self) -> ParseResult<()> {
loop {
match self.scanner.read() {
'\0' => {
self.scanner.back();
return Ok(());
}
'\n' => return Ok(()),
_ => {}
}
}
}
/// Read an identifier -- rule name, pool name, variable name, etc.
fn read_ident(&mut self) -> ParseResult<&'text str> {
let start = self.scanner.ofs;
while matches!(
self.scanner.read(),
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '.'
) {}
self.scanner.back();
let end = self.scanner.ofs;
if end == start {
return self.scanner.parse_error("failed to scan ident");
}
Ok(self.scanner.slice(start, end))
}
/// Reads an EvalString. Stops at either a newline, or ' ', ':', '|' if
/// stop_at_path_separators is set, without consuming the character that
/// caused it to stop.
fn read_eval(&mut self, stop_at_path_separators: bool) -> ParseResult<EvalString<&'text str>> {
self.eval_buf.clear();
let mut ofs = self.scanner.ofs;
// This match block is copied twice, with the only difference being the check for
// spaces, colons, and pipes in the stop_at_path_separators version. We could remove the
// duplication by adding a match branch like `' ' | ':' | '|' if stop_at_path_separators =>`
// or even moving the `if stop_at_path_separators` inside of the match body, but both of
// those options are ~10% slower on a benchmark test of running the loader on llvm-cmake
// ninja files.
let end = if stop_at_path_separators {
loop {
match self.scanner.read() {
'\0' => return self.scanner.parse_error("unexpected EOF"),
' ' | ':' | '|' | '\n' => {
self.scanner.back();
break self.scanner.ofs;
}
'\r' if self.scanner.peek() == '\n' => {
self.scanner.back();
break self.scanner.ofs;
}
'$' => {
let end = self.scanner.ofs - 1;
if end > ofs {
self.eval_buf
.push(EvalPart::Literal(self.scanner.slice(ofs, end)));
}
let escape = self.read_escape()?;
self.eval_buf.push(escape);
ofs = self.scanner.ofs;
}
_ => {}
}
}
} else {
loop {
match self.scanner.read() {
'\0' => return self.scanner.parse_error("unexpected EOF"),
'\n' => {
self.scanner.back();
break self.scanner.ofs;
}
'\r' if self.scanner.peek() == '\n' => {
self.scanner.back();
break self.scanner.ofs;
}
'$' => {
let end = self.scanner.ofs - 1;
if end > ofs {
self.eval_buf
.push(EvalPart::Literal(self.scanner.slice(ofs, end)));
}
let escape = self.read_escape()?;
self.eval_buf.push(escape);
ofs = self.scanner.ofs;
}
_ => {}
}
}
};
if end > ofs {
self.eval_buf
.push(EvalPart::Literal(self.scanner.slice(ofs, end)));
}
if self.eval_buf.is_empty() {
return self.scanner.parse_error(format!("Expected a string"));
}
Ok(EvalString::new(self.eval_buf.clone()))
}
/// Read a variable name as found after a '$' in an eval.
/// Ninja calls this a "simple" varname and it is the same as read_ident without
/// period allowed(!), I guess because we expect things like
/// foo = $bar.d
/// to parse as a reference to $bar.
fn read_simple_varname(&mut self) -> ParseResult<&'text str> {
let start = self.scanner.ofs;
while matches!(self.scanner.read(), 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-') {}
self.scanner.back();
let end = self.scanner.ofs;
if end == start {
return self.scanner.parse_error("failed to scan variable name");
}
Ok(self.scanner.slice(start, end))
}
/// Read and interpret the text following a '$' escape character.
fn read_escape(&mut self) -> ParseResult<EvalPart<&'text str>> {
Ok(match self.scanner.read() {
'\n' | '\r' => {
self.scanner.skip_spaces();
EvalPart::Literal(self.scanner.slice(0, 0))
}
' ' | '$' | ':' => {
EvalPart::Literal(self.scanner.slice(self.scanner.ofs - 1, self.scanner.ofs))
}
'{' => {
let start = self.scanner.ofs;
loop {
match self.scanner.read() {
'\0' => return self.scanner.parse_error("unexpected EOF"),
'}' => break,
_ => {}
}
}
let end = self.scanner.ofs - 1;
EvalPart::VarRef(self.scanner.slice(start, end))
}
_ => {
// '$' followed by some other text.
self.scanner.back();
let var = self.read_simple_varname()?;
EvalPart::VarRef(var)
}
})
}
fn skip_spaces(&mut self) {
loop {
match self.scanner.read() {
' ' => {}
'$' => {
if self.scanner.peek() != '\n' {
self.scanner.back();
return;
}
self.scanner.next();
}
_ => {
self.scanner.back();
return;
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_case_buffer(test_case: &str) -> Vec<u8> {
let mut buf = test_case.as_bytes().to_vec();
buf.push(0);
buf
}
fn test_for_line_endings(input: &[&str], test: fn(&str)) {
let test_case_lf = input.join("\n");
let test_case_crlf = input.join("\r\n");
for test_case in [test_case_lf, test_case_crlf] {
test(&test_case);
}
}
#[test]
fn parse_defaults() {
test_for_line_endings(&["var = 3", "default a b$var c", ""], |test_case| {
let mut buf = test_case_buffer(test_case);
let mut parser = Parser::new(&mut buf);
let default = match parser.read().unwrap().unwrap() {
Statement::Default(d) => d,
_ => panic!("expected default"),
};
assert_eq!(
default,
vec![
EvalString::new(vec![EvalPart::Literal("a")]),
EvalString::new(vec![EvalPart::Literal("b"), EvalPart::VarRef("var")]),
EvalString::new(vec![EvalPart::Literal("c")]),
]
);
});
}
#[test]
fn parse_dot_in_eval() {
let mut buf = test_case_buffer("x = $y.z\n");
let mut parser = Parser::new(&mut buf);
parser.read().unwrap();
let x = parser.vars.get("x").unwrap();
assert_eq!(x, ".z");
}
#[test]
fn parse_dot_in_rule() {
let mut buf = test_case_buffer("rule x.y\n command = x\n");
let mut parser = Parser::new(&mut buf);
let stmt = parser.read().unwrap().unwrap();
assert!(matches!(
stmt,
Statement::Rule(Rule {
name: "x.y",
vars: _
})
));
}
#[test]
fn parse_trailing_newline() {
let mut buf = test_case_buffer("build$\n foo$\n : $\n touch $\n\n");
let mut parser = Parser::new(&mut buf);
let stmt = parser.read().unwrap().unwrap();
assert!(matches!(
stmt,
Statement::Build(Build { rule: "touch", .. })
));
}
}