| //! DIMCAS CNF parser and writer for the Varisat SAT solver. |
| |
| use std::{borrow::Borrow, io, mem::replace}; |
| |
| use varisat_formula::{CnfFormula, ExtendFormula, Lit, Var}; |
| |
| use anyhow::Error; |
| use thiserror::Error; |
| |
| /// Possible errors while parsing a DIMACS CNF formula. |
| #[derive(Debug, Error)] |
| pub enum ParserError { |
| #[error( |
| "line {}: Unexpected character in DIMACS CNF input: '{}'", |
| line, |
| unexpected |
| )] |
| UnexpectedInput { line: usize, unexpected: char }, |
| #[error( |
| "line {}: Literal index is too large: {}{}...", |
| line, |
| index, |
| final_digit |
| )] |
| LiteralTooLarge { |
| line: usize, |
| index: usize, |
| final_digit: usize, |
| }, |
| #[error("line {}: Invalid header syntax: {}", line, header)] |
| InvalidHeader { line: usize, header: String }, |
| #[error("line {}: Unterminated clause", line)] |
| UnterminatedClause { line: usize }, |
| #[error( |
| "Formula has {} variables while the header specifies {} variables", |
| var_count, |
| header_var_count |
| )] |
| VarCount { |
| var_count: usize, |
| header_var_count: usize, |
| }, |
| #[error( |
| "Formula has {} clauses while the header specifies {} clauses", |
| clause_count, |
| header_clause_count |
| )] |
| ClauseCount { |
| clause_count: usize, |
| header_clause_count: usize, |
| }, |
| #[error("Parser invoked after a previous error")] |
| PreviousError, |
| } |
| |
| /// Variable and clause count present in a DIMACS CNF header. |
| #[derive(Copy, Clone, Debug)] |
| pub struct DimacsHeader { |
| pub var_count: usize, |
| pub clause_count: usize, |
| } |
| |
| /// Parser for DIMACS CNF files. |
| /// |
| /// This parser can consume the input in chunks while also producing the parsed result in chunks. |
| #[derive(Default)] |
| pub struct DimacsParser { |
| formula: CnfFormula, |
| partial_clause: Vec<Lit>, |
| header: Option<DimacsHeader>, |
| |
| line_number: usize, |
| clause_count: usize, |
| partial_lit: usize, |
| negate_next_lit: bool, |
| |
| in_lit: bool, |
| in_comment_or_header: bool, |
| in_header: bool, |
| start_of_line: bool, |
| error: bool, |
| |
| header_line: Vec<u8>, |
| } |
| |
| impl DimacsParser { |
| /// Create a new DIMACS CNF parser. |
| pub fn new() -> DimacsParser { |
| DimacsParser { |
| formula: CnfFormula::new(), |
| partial_clause: vec![], |
| header: None, |
| |
| line_number: 1, |
| clause_count: 0, |
| partial_lit: 0, |
| negate_next_lit: false, |
| |
| in_lit: false, |
| in_comment_or_header: false, |
| in_header: false, |
| start_of_line: true, |
| error: false, |
| |
| header_line: vec![], |
| } |
| } |
| |
| /// Parse the given input and check the header if present. |
| /// |
| /// This parses the whole input into a single [`CnfFormula`](varisat_formula::CnfFormula). |
| /// Incremental parsing is possible using [`parse_incremental`](DimacsParser::parse_incremental) |
| /// or the [`parse_chunk`](DimacsParser::parse_chunk) method. |
| pub fn parse(input: impl io::Read) -> Result<CnfFormula, Error> { |
| Ok(Self::parse_incremental(input, |_| Ok(()))?.take_formula()) |
| } |
| |
| /// Parse the given input incrementally and check the header if present. |
| /// |
| /// The callback is invoked repeatedly with a reference to the parser. The callback can process |
| /// the formula incrementally by calling [`take_formula`](DimacsParser::take_formula) on the |
| /// passed argument. |
| pub fn parse_incremental( |
| input: impl io::Read, |
| mut callback: impl FnMut(&mut DimacsParser) -> Result<(), Error>, |
| ) -> Result<DimacsParser, Error> { |
| use io::BufRead; |
| |
| let mut buffer = io::BufReader::new(input); |
| let mut parser = Self::new(); |
| |
| loop { |
| let data = buffer.fill_buf()?; |
| if data.is_empty() { |
| break; |
| } |
| parser.parse_chunk(data)?; |
| let len = data.len(); |
| buffer.consume(len); |
| |
| callback(&mut parser)?; |
| } |
| parser.eof()?; |
| callback(&mut parser)?; |
| parser.check_header()?; |
| |
| Ok(parser) |
| } |
| |
| /// Parse a chunk of input. |
| /// |
| /// After parsing the last chunk call the [`eof`](DimacsParser::eof) method. |
| /// |
| /// If this method returns an error, the parser is in an invalid state and cannot parse further |
| /// chunks. |
| pub fn parse_chunk(&mut self, chunk: &[u8]) -> Result<(), ParserError> { |
| if self.error { |
| return Err(ParserError::PreviousError); |
| } |
| for &byte in chunk.iter() { |
| if byte == b'\n' { |
| self.line_number += 1; |
| } |
| match byte { |
| b'\n' | b'\r' if self.in_comment_or_header => { |
| if self.in_header { |
| self.in_header = false; |
| self.parse_header_line()?; |
| } |
| self.in_comment_or_header = false; |
| self.start_of_line = true |
| } |
| _ if self.in_comment_or_header => { |
| if self.in_header { |
| self.header_line.push(byte); |
| } |
| } |
| b'0'..=b'9' => { |
| self.in_lit = true; |
| let digit = (byte - b'0') as usize; |
| |
| const CAN_OVERFLOW: usize = Var::max_count() / 10; |
| const OVERFLOW_DIGIT: usize = Var::max_count() % 10; |
| |
| // Overflow check that is fast but still works if LitIdx has the same size as |
| // usize |
| if CAN_OVERFLOW <= self.partial_lit { |
| let carry = (digit <= OVERFLOW_DIGIT) as usize; |
| |
| if CAN_OVERFLOW + carry <= self.partial_lit { |
| self.error = true; |
| return Err(ParserError::LiteralTooLarge { |
| line: self.line_number, |
| index: self.partial_lit, |
| final_digit: digit, |
| }); |
| } |
| } |
| |
| self.partial_lit = self.partial_lit * 10 + digit; |
| |
| self.start_of_line = false |
| } |
| b'-' if !self.negate_next_lit && !self.in_lit => { |
| self.negate_next_lit = true; |
| self.start_of_line = false |
| } |
| b' ' | b'\n' | b'\r' if !self.negate_next_lit || self.in_lit => { |
| self.finish_literal(); |
| self.negate_next_lit = false; |
| self.in_lit = false; |
| self.partial_lit = 0; |
| self.start_of_line = byte != b' '; |
| } |
| b'c' if self.start_of_line => { |
| self.in_comment_or_header = true; |
| } |
| b'p' if self.start_of_line && self.header.is_none() => { |
| self.in_comment_or_header = true; |
| self.in_header = true; |
| self.header_line.push(b'p'); |
| } |
| _ => { |
| self.error = true; |
| return Err(ParserError::UnexpectedInput { |
| line: self.line_number, |
| unexpected: byte as char, |
| }); |
| } |
| } |
| } |
| |
| Ok(()) |
| } |
| |
| /// Finish parsing the input. |
| /// |
| /// This does not check whether the header information was correct, call |
| /// [`check_header`](DimacsParser::check_header) for this. |
| pub fn eof(&mut self) -> Result<(), ParserError> { |
| if self.in_header { |
| self.parse_header_line()?; |
| } |
| |
| self.finish_literal(); |
| |
| if !self.partial_clause.is_empty() { |
| return Err(ParserError::UnterminatedClause { |
| line: self.line_number, |
| }); |
| } |
| |
| Ok(()) |
| } |
| |
| /// Verifies the header information when present. |
| /// |
| /// Does nothing when the input doesn't contain a header. |
| pub fn check_header(&self) -> Result<(), ParserError> { |
| if let Some(header) = self.header { |
| let var_count = self.formula.var_count(); |
| if var_count != header.var_count { |
| return Err(ParserError::VarCount { |
| var_count, |
| header_var_count: header.var_count, |
| }); |
| } |
| |
| if self.clause_count != header.clause_count { |
| return Err(ParserError::ClauseCount { |
| clause_count: self.clause_count, |
| header_clause_count: header.clause_count, |
| }); |
| } |
| } |
| |
| Ok(()) |
| } |
| |
| /// Returns the subformula of everything parsed since the last call to this method. |
| /// |
| /// To parse the whole input into a single [`CnfFormula`](varisat_formula::CnfFormula), simply |
| /// call this method once after calling [`eof`](DimacsParser::eof). For incremental parsing this |
| /// method can be invoked after each call of [`parse_chunk`](DimacsParser::parse_chunk). |
| /// |
| /// The variable count of the returned formula will be the maximum of the variable count so far |
| /// and the variable count of the header if present. |
| pub fn take_formula(&mut self) -> CnfFormula { |
| let mut new_formula = CnfFormula::new(); |
| new_formula.set_var_count(self.formula.var_count()); |
| replace(&mut self.formula, new_formula) |
| } |
| |
| /// Return the DIMACS CNF header data if present. |
| pub fn header(&self) -> Option<DimacsHeader> { |
| self.header |
| } |
| |
| /// Number of clauses parsed. |
| pub fn clause_count(&self) -> usize { |
| self.clause_count |
| } |
| |
| /// Number of variables in the parsed formula. |
| pub fn var_count(&self) -> usize { |
| self.formula.var_count() |
| } |
| |
| fn finish_literal(&mut self) { |
| if self.in_lit { |
| if self.partial_lit == 0 { |
| self.formula.add_clause(&self.partial_clause); |
| self.partial_clause.clear(); |
| self.clause_count += 1; |
| } else { |
| self.partial_clause |
| .push(Var::from_dimacs(self.partial_lit as isize).lit(!self.negate_next_lit)); |
| } |
| } |
| } |
| |
| fn parse_header_line(&mut self) -> Result<(), ParserError> { |
| let header_line = String::from_utf8_lossy(&self.header_line).into_owned(); |
| |
| if !header_line.starts_with("p ") { |
| return self.invalid_header(header_line); |
| } |
| |
| let mut header_values = header_line[2..].split_whitespace(); |
| |
| if header_values.next() != Some("cnf") { |
| return self.invalid_header(header_line); |
| } |
| |
| let var_count: usize = match header_values |
| .next() |
| .and_then(|value| str::parse(value).ok()) |
| { |
| None => return self.invalid_header(header_line), |
| Some(value) => value, |
| }; |
| |
| if var_count > Var::max_count() { |
| self.error = true; |
| return Err(ParserError::LiteralTooLarge { |
| line: self.line_number, |
| index: var_count / 10, |
| final_digit: var_count % 10, |
| }); |
| } |
| |
| let clause_count: usize = match header_values |
| .next() |
| .and_then(|value| str::parse(value).ok()) |
| { |
| None => return self.invalid_header(header_line), |
| Some(value) => value, |
| }; |
| |
| if header_values.next().is_some() { |
| return self.invalid_header(header_line); |
| } |
| |
| self.header = Some(DimacsHeader { |
| var_count, |
| clause_count, |
| }); |
| |
| self.formula.set_var_count(var_count); |
| |
| Ok(()) |
| } |
| |
| fn invalid_header(&mut self, header_line: String) -> Result<(), ParserError> { |
| self.error = true; |
| Err(ParserError::InvalidHeader { |
| line: self.line_number, |
| header: header_line, |
| }) |
| } |
| } |
| |
| /// Write a DIMACS CNF header. |
| /// |
| /// Can be used with [`write_dimacs_clauses`] to implement incremental writing. |
| pub fn write_dimacs_header(target: &mut impl io::Write, header: DimacsHeader) -> io::Result<()> { |
| writeln!( |
| target, |
| "p cnf {var_count} {clause_count}", |
| var_count = header.var_count, |
| clause_count = header.clause_count |
| ) |
| } |
| |
| /// Write an iterator of clauses as headerless DIMACS CNF. |
| /// |
| /// Can be used with [`write_dimacs_header`] to implement incremental writing. |
| pub fn write_dimacs_clauses( |
| target: &mut impl io::Write, |
| clauses: impl IntoIterator<Item = impl IntoIterator<Item = impl Borrow<Lit>>>, |
| ) -> io::Result<()> { |
| for clause in clauses.into_iter() { |
| for lit in clause.into_iter() { |
| itoa::write(&mut *target, lit.borrow().to_dimacs())?; |
| target.write_all(b" ")?; |
| } |
| target.write_all(b"0\n")?; |
| } |
| Ok(()) |
| } |
| |
| /// Write a formula as DIMACS CNF. |
| /// |
| /// Use [`write_dimacs_header`] and [`write_dimacs_clauses`] to implement incremental writing. |
| pub fn write_dimacs(target: &mut impl io::Write, formula: &CnfFormula) -> io::Result<()> { |
| write_dimacs_header( |
| &mut *target, |
| DimacsHeader { |
| var_count: formula.var_count(), |
| clause_count: formula.len(), |
| }, |
| )?; |
| write_dimacs_clauses(&mut *target, formula.iter()) |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| use anyhow::Error; |
| use proptest::{test_runner::TestCaseError, *}; |
| |
| use varisat_formula::{cnf::strategy::*, cnf_formula}; |
| |
| #[test] |
| fn odd_whitespace() -> Result<(), Error> { |
| let parsed = DimacsParser::parse( |
| b"p cnf 4 3 \n 1 \n 2 3\n0 -4 0 2\nccomment \n\n0\n\n" as &[_], |
| )?; |
| |
| let expected = cnf_formula![ |
| 1, 2, 3; |
| -4; |
| 2; |
| ]; |
| |
| assert_eq!(parsed, expected); |
| |
| Ok(()) |
| } |
| |
| macro_rules! expect_error { |
| ( $input:expr, $( $cases:tt )* ) => { |
| match DimacsParser::parse($input as &[_]) { |
| Ok(parsed) => panic!("Expexcted errror but got {:?}", parsed), |
| Err(err) => match err.downcast_ref() { |
| Some(casted_err) => match casted_err { |
| $( $cases )*, |
| _ => panic!("Unexpected error {:?}", casted_err), |
| }, |
| None => panic!("Unexpected error type {:?}", err), |
| } |
| } |
| }; |
| } |
| |
| #[test] |
| fn invalid_headers() { |
| expect_error!(b"pcnf 1 3", ParserError::InvalidHeader { .. } => ()); |
| expect_error!(b"p notcnf 1 3", ParserError::InvalidHeader { .. } => ()); |
| expect_error!(b"p cnf 1", ParserError::InvalidHeader { .. } => ()); |
| expect_error!(b"p cnf 1 2 3", ParserError::InvalidHeader { .. } => ()); |
| expect_error!(b"p cnf foo bar", ParserError::InvalidHeader { .. } => ()); |
| expect_error!(b"p cnf -3 -6", ParserError::InvalidHeader { .. } => ()); |
| |
| expect_error!( |
| format!("p cnf {} 4", Var::max_var().to_dimacs() + 1).as_bytes(), |
| ParserError::LiteralTooLarge { .. } => () |
| ); |
| DimacsParser::parse(format!("p cnf {} 0", Var::max_var().to_dimacs()).as_bytes()).unwrap(); |
| |
| expect_error!(b"p cnf 4 18446744073709551616", ParserError::InvalidHeader { .. } => ()); |
| |
| expect_error!( |
| b"p cnf 1 2\np cnf 1 2\n", |
| ParserError::UnexpectedInput { unexpected: 'p', .. } => () |
| ); |
| } |
| |
| #[test] |
| fn invalid_header_data() { |
| expect_error!( |
| b"p cnf 1 1\n 2 0", |
| ParserError::VarCount { var_count: 2, header_var_count: 1 } => () |
| ); |
| |
| expect_error!( |
| b"p cnf 10 1\n 1 0 0", |
| ParserError::ClauseCount { clause_count: 2, header_clause_count: 1 } => () |
| ); |
| |
| expect_error!( |
| b"p cnf 10 4\n 1 0", |
| ParserError::ClauseCount { clause_count: 1, header_clause_count: 4 } => () |
| ); |
| } |
| |
| #[test] |
| fn syntax_errors() { |
| expect_error!( |
| b"1 2 ?foo", |
| ParserError::UnexpectedInput { unexpected: '?', .. } => () |
| ); |
| |
| expect_error!( |
| b"1 2 - 3 0", |
| ParserError::UnexpectedInput { unexpected: ' ', .. } => () |
| ); |
| |
| expect_error!( |
| b"1 2 -\n3 0", |
| ParserError::UnexpectedInput { unexpected: '\n', .. } => () |
| ); |
| |
| expect_error!( |
| b"1 2 --3 0", |
| ParserError::UnexpectedInput { unexpected: '-', .. } => () |
| ); |
| |
| expect_error!( |
| b"1 2-3 0", |
| ParserError::UnexpectedInput { unexpected: '-', .. } => () |
| ); |
| } |
| |
| #[test] |
| fn unterminated_clause() { |
| expect_error!( |
| b"1 2 3", |
| ParserError::UnterminatedClause { .. } => () |
| ); |
| } |
| |
| #[test] |
| fn literal_too_large() { |
| expect_error!( |
| format!("1 {} 2 0", Var::max_var().to_dimacs() + 1).as_bytes(), |
| ParserError::LiteralTooLarge { .. } => () |
| ); |
| |
| assert_eq!( |
| DimacsParser::parse(format!("1 {} 2 0", Var::max_var().to_dimacs()).as_bytes()) |
| .unwrap(), |
| cnf_formula![ |
| 1, Var::max_var().to_dimacs(), 2; |
| ] |
| ); |
| } |
| |
| proptest! { |
| |
| #[test] |
| fn roundtrip(input in cnf_formula(1..100usize, 0..1000, 0..10)) { |
| let mut buf = vec![]; |
| |
| write_dimacs(&mut buf, &input)?; |
| |
| let parsed = DimacsParser::parse(&buf[..]).map_err(|e| TestCaseError::fail(e.to_string()))?; |
| |
| prop_assert_eq!(parsed, input); |
| } |
| } |
| } |