blob: d50b2d996aac9c054b07586b91dcba2c10c7f8c1 [file] [log] [blame]
#![forbid(unsafe_code)]
use std::cmp;
use std::collections::HashSet;
use std::env;
use std::fs::File;
use std::io::{self, BufReader, Read};
use xml::reader::XmlEvent;
use xml::ParserConfig;
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut file;
let mut stdin;
let source: &mut dyn Read = if let Some(file_name) = env::args().nth(1) {
file = File::open(file_name).map_err(|e| format!("Cannot open input file: {e}"))?;
&mut file
} else {
stdin = io::stdin();
&mut stdin
};
let reader = ParserConfig::new()
.whitespace_to_characters(true)
.ignore_comments(false)
.create_reader(BufReader::new(source));
let mut processing_instructions = 0;
let mut elements = 0;
let mut character_blocks = 0;
let mut cdata_blocks = 0;
let mut characters = 0;
let mut comment_blocks = 0;
let mut comment_characters = 0;
let mut namespaces = HashSet::new();
let mut depth = 0;
let mut max_depth = 0;
for e in reader {
let e = e.map_err(|e| format!("Error parsing XML document: {e}"))?;
match e {
XmlEvent::StartDocument { version, encoding, standalone } =>
println!(
"XML document version {}, encoded in {}, {}standalone",
version, encoding, if standalone.unwrap_or(false) { "" } else { "not " }
),
XmlEvent::EndDocument => println!("Document finished"),
XmlEvent::ProcessingInstruction { .. } => processing_instructions += 1,
XmlEvent::Whitespace(_) => {} // can't happen due to configuration
XmlEvent::Characters(s) => {
character_blocks += 1;
characters += s.len();
}
XmlEvent::CData(s) => {
cdata_blocks += 1;
characters += s.len();
}
XmlEvent::Comment(s) => {
comment_blocks += 1;
comment_characters += s.len();
}
XmlEvent::StartElement { namespace, .. } => {
depth += 1;
max_depth = cmp::max(max_depth, depth);
elements += 1;
namespaces.extend(namespace.0.into_values());
}
XmlEvent::EndElement { .. } => {
depth -= 1;
}
};
}
namespaces.remove(xml::namespace::NS_EMPTY_URI);
namespaces.remove(xml::namespace::NS_XMLNS_URI);
namespaces.remove(xml::namespace::NS_XML_URI);
println!("Elements: {elements}, maximum depth: {max_depth}");
println!("Namespaces (excluding built-in): {}", namespaces.len());
println!("Characters: {characters}, characters blocks: {character_blocks}, CDATA blocks: {cdata_blocks}");
println!("Comment blocks: {comment_blocks}, comment characters: {comment_characters}");
println!("Processing instructions (excluding built-in): {processing_instructions}");
Ok(())
}