vendor/syntect-5.2.0/examples/synstats.rs - toolchain/rustc - Git at Google

 //! An example of using syntect for code analysis.
 //! Basically a fancy lines of code count program that works
 //! for all languages Sublime Text supports and also counts things
 //! like number of functions and number of types defined.
 //!
 //! Another thing it does that other line count programs can't always
 //! do is properly count comments in embedded syntaxes. For example
 //! JS, CSS and Ruby comments embedded in ERB files.
 use syntect::parsing::{SyntaxSet, ParseState, ScopeStackOp, ScopeStack};
 use syntect::highlighting::{ScopeSelector, ScopeSelectors};
 use syntect::easy::{ScopeRegionIterator};

 use std::path::Path;
 use std::io::{BufRead, BufReader};
 use std::fs::File;
 use walkdir::{DirEntry, WalkDir};
 use std::str::FromStr;

 #[derive(Debug)]
 struct Selectors {
     comment: ScopeSelector,
     doc_comment: ScopeSelectors,
     function: ScopeSelector,
     types: ScopeSelectors,
 }

 impl Default for Selectors {
     fn default() -> Selectors {
         Selectors {
             comment: ScopeSelector::from_str("comment - comment.block.attribute").unwrap(),
             doc_comment: ScopeSelectors::from_str("comment.line.documentation, comment.block.documentation").unwrap(),
             function: ScopeSelector::from_str("entity.name.function").unwrap(),
             types: ScopeSelectors::from_str("entity.name.class, entity.name.struct, entity.name.enum, entity.name.type").unwrap(),
         }
     }
 }

 #[derive(Debug, Default)]
 struct Stats {
     selectors: Selectors,
     files: usize,
     functions: usize,
     types: usize,
     lines: usize,
     chars: usize,
     code_lines: usize,
     comment_lines: usize,
     comment_chars: usize,
     comment_words: usize,
     doc_comment_lines: usize,
     doc_comment_words: usize,
 }

 fn print_stats(stats: &Stats) {
     println!();
     println!("################## Stats ###################");
     println!("File count:                           {:>6}", stats.files);
     println!("Total characters:                     {:>6}", stats.chars);
     println!();
     println!("Function count:                       {:>6}", stats.functions);
     println!("Type count (structs, enums, classes): {:>6}", stats.types);
     println!();
     println!("Code lines (traditional SLOC):        {:>6}", stats.code_lines);
     println!("Total lines (w/ comments & blanks):   {:>6}", stats.lines);
     println!("Comment lines (comment but no code):  {:>6}", stats.comment_lines);
     println!("Blank lines (lines-blank-comment):    {:>6}", stats.lines-stats.code_lines-stats.comment_lines);
     println!();
     println!("Lines with a documentation comment:   {:>6}", stats.doc_comment_lines);
     println!("Total words written in doc comments:  {:>6}", stats.doc_comment_words);
     println!("Total words written in all comments:  {:>6}", stats.comment_words);
     println!("Characters of comment:                {:>6}", stats.comment_chars);
 }

 fn is_ignored(entry: &DirEntry) -> bool {
     entry.file_name()
          .to_str()
          .map(|s| s.starts_with('.') && s.len() > 1 || s.ends_with(".md"))
          .unwrap_or(false)
 }

 fn count_line(ops: &[(usize, ScopeStackOp)], line: &str, stack: &mut ScopeStack, stats: &mut Stats) {
     stats.lines += 1;

     let mut line_has_comment = false;
     let mut line_has_doc_comment = false;
     let mut line_has_code = false;
     for (s, op) in ScopeRegionIterator::new(ops, line) {
         stack.apply(op).unwrap();
         if s.is_empty() { // in this case we don't care about blank tokens
             continue;
         }
         if stats.selectors.comment.does_match(stack.as_slice()).is_some() {
             let words = s.split_whitespace().filter(|w| w.chars().all(|c| c.is_alphanumeric() || c == '.' || c == '\'')).count();
             if stats.selectors.doc_comment.does_match(stack.as_slice()).is_some() {
                 line_has_doc_comment = true;
                 stats.doc_comment_words += words;
             }
             stats.comment_chars += s.len();
             stats.comment_words += words;
             line_has_comment = true;
         } else if !s.chars().all(|c| c.is_whitespace()) {
             line_has_code = true;
         }
         if stats.selectors.function.does_match(stack.as_slice()).is_some() {
             stats.functions += 1;
         }
         if stats.selectors.types.does_match(stack.as_slice()).is_some() {
             stats.types += 1;
         }
     }
     if line_has_comment && !line_has_code {
         stats.comment_lines += 1;
     }
     if line_has_doc_comment {
         stats.doc_comment_lines += 1;
     }
     if line_has_code {
         stats.code_lines += 1;
     }
 }

 fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) {
     let syntax = match ss.find_syntax_for_file(path).unwrap_or(None) {
         Some(syntax) => syntax,
         None => return
     };
     stats.files += 1;
     let mut state = ParseState::new(syntax);

     let f = File::open(path).unwrap();
     let mut reader = BufReader::new(f);
     let mut line = String::new();
     let mut stack = ScopeStack::new();
     while reader.read_line(&mut line).unwrap() > 0 {
         {
             let ops = state.parse_line(&line, ss).unwrap();
             stats.chars += line.len();
             count_line(&ops, &line, &mut stack, stats);
         }
         line.clear();
     }
 }

 fn main() {
     let ss = SyntaxSet::load_defaults_newlines(); // note we load the version with newlines

     let args: Vec<String> = std::env::args().collect();
     let path = if args.len() < 2 {
         "."
     } else {
         &args[1]
     };

     println!("################## Files ###################");
     let mut stats = Stats::default();
     let walker = WalkDir::new(path).into_iter();
     for entry in walker.filter_entry(|e| !is_ignored(e)) {
         let entry = entry.unwrap();
         if entry.file_type().is_file() {
             println!("{}", entry.path().display());
             count(&ss, entry.path(), &mut stats);
         }
     }

     // println!("{:?}", stats);
     print_stats(&stats);
 }
	//! An example of using syntect for code analysis.
	//! Basically a fancy lines of code count program that works
	//! for all languages Sublime Text supports and also counts things
	//! like number of functions and number of types defined.
	//!
	//! Another thing it does that other line count programs can't always
	//! do is properly count comments in embedded syntaxes. For example
	//! JS, CSS and Ruby comments embedded in ERB files.
	use syntect::parsing::{SyntaxSet, ParseState, ScopeStackOp, ScopeStack};
	use syntect::highlighting::{ScopeSelector, ScopeSelectors};
	use syntect::easy::{ScopeRegionIterator};

	use std::path::Path;
	use std::io::{BufRead, BufReader};
	use std::fs::File;
	use walkdir::{DirEntry, WalkDir};
	use std::str::FromStr;

	#[derive(Debug)]
	struct Selectors {
	comment: ScopeSelector,
	doc_comment: ScopeSelectors,
	function: ScopeSelector,
	types: ScopeSelectors,
	}

	impl Default for Selectors {
	fn default() -> Selectors {
	Selectors {
	comment: ScopeSelector::from_str("comment - comment.block.attribute").unwrap(),
	doc_comment: ScopeSelectors::from_str("comment.line.documentation, comment.block.documentation").unwrap(),
	function: ScopeSelector::from_str("entity.name.function").unwrap(),
	types: ScopeSelectors::from_str("entity.name.class, entity.name.struct, entity.name.enum, entity.name.type").unwrap(),
	}
	}
	}

	#[derive(Debug, Default)]
	struct Stats {
	selectors: Selectors,
	files: usize,
	functions: usize,
	types: usize,
	lines: usize,
	chars: usize,
	code_lines: usize,
	comment_lines: usize,
	comment_chars: usize,
	comment_words: usize,
	doc_comment_lines: usize,
	doc_comment_words: usize,
	}

	fn print_stats(stats: &Stats) {
	println!();
	println!("################## Stats ###################");
	println!("File count: {:>6}", stats.files);
	println!("Total characters: {:>6}", stats.chars);
	println!();
	println!("Function count: {:>6}", stats.functions);
	println!("Type count (structs, enums, classes): {:>6}", stats.types);
	println!();
	println!("Code lines (traditional SLOC): {:>6}", stats.code_lines);
	println!("Total lines (w/ comments & blanks): {:>6}", stats.lines);
	println!("Comment lines (comment but no code): {:>6}", stats.comment_lines);
	println!("Blank lines (lines-blank-comment): {:>6}", stats.lines-stats.code_lines-stats.comment_lines);
	println!();
	println!("Lines with a documentation comment: {:>6}", stats.doc_comment_lines);
	println!("Total words written in doc comments: {:>6}", stats.doc_comment_words);
	println!("Total words written in all comments: {:>6}", stats.comment_words);
	println!("Characters of comment: {:>6}", stats.comment_chars);
	}

	fn is_ignored(entry: &DirEntry) -> bool {
	entry.file_name()
	.to_str()
	.map(\|s\| s.starts_with('.') && s.len() > 1 \|\| s.ends_with(".md"))
	.unwrap_or(false)
	}

	fn count_line(ops: &[(usize, ScopeStackOp)], line: &str, stack: &mut ScopeStack, stats: &mut Stats) {
	stats.lines += 1;

	let mut line_has_comment = false;
	let mut line_has_doc_comment = false;
	let mut line_has_code = false;
	for (s, op) in ScopeRegionIterator::new(ops, line) {
	stack.apply(op).unwrap();
	if s.is_empty() { // in this case we don't care about blank tokens
	continue;
	}
	if stats.selectors.comment.does_match(stack.as_slice()).is_some() {
	let words = s.split_whitespace().filter(\|w\| w.chars().all(\|c\| c.is_alphanumeric() \|\| c == '.' \|\| c == '\'')).count();
	if stats.selectors.doc_comment.does_match(stack.as_slice()).is_some() {
	line_has_doc_comment = true;
	stats.doc_comment_words += words;
	}
	stats.comment_chars += s.len();
	stats.comment_words += words;
	line_has_comment = true;
	} else if !s.chars().all(\|c\| c.is_whitespace()) {
	line_has_code = true;
	}
	if stats.selectors.function.does_match(stack.as_slice()).is_some() {
	stats.functions += 1;
	}
	if stats.selectors.types.does_match(stack.as_slice()).is_some() {
	stats.types += 1;
	}
	}
	if line_has_comment && !line_has_code {
	stats.comment_lines += 1;
	}
	if line_has_doc_comment {
	stats.doc_comment_lines += 1;
	}
	if line_has_code {
	stats.code_lines += 1;
	}
	}

	fn count(ss: &SyntaxSet, path: &Path, stats: &mut Stats) {
	let syntax = match ss.find_syntax_for_file(path).unwrap_or(None) {
	Some(syntax) => syntax,
	None => return
	};
	stats.files += 1;
	let mut state = ParseState::new(syntax);

	let f = File::open(path).unwrap();
	let mut reader = BufReader::new(f);
	let mut line = String::new();
	let mut stack = ScopeStack::new();
	while reader.read_line(&mut line).unwrap() > 0 {
	{
	let ops = state.parse_line(&line, ss).unwrap();
	stats.chars += line.len();
	count_line(&ops, &line, &mut stack, stats);
	}
	line.clear();
	}
	}

	fn main() {
	let ss = SyntaxSet::load_defaults_newlines(); // note we load the version with newlines

	let args: Vec<String> = std::env::args().collect();
	let path = if args.len() < 2 {
	"."
	} else {
	&args[1]
	};

	println!("################## Files ###################");
	let mut stats = Stats::default();
	let walker = WalkDir::new(path).into_iter();
	for entry in walker.filter_entry(\|e\| !is_ignored(e)) {
	let entry = entry.unwrap();
	if entry.file_type().is_file() {
	println!("{}", entry.path().display());
	count(&ss, entry.path(), &mut stats);
	}
	}

	// println!("{:?}", stats);
	print_stats(&stats);
	}