vendor/minifier-0.3.1/src/html.rs - toolchain/rustc - Git at Google

 // Take a look at the license at the top of the repository in the LICENSE file.

 use regex::{Captures, Regex};

 fn condense_whitespace(source: &str) -> String {
     let lower_source = source.to_lowercase();
     if lower_source.find("<textarea").is_none() && lower_source.find("<pre").is_none() {
         // maybe should be better not to recreate Regex every time?
         let re = Regex::new(r">\s+<").unwrap();
         let source = re.replace_all(source, "> <").into_owned();
         let re = Regex::new(r"\s{2,}|[\r\n]").unwrap();
         re.replace_all(&source, " ").into_owned()
     } else {
         source.trim().to_owned()
     }
 }

 fn condense(source: &str) -> String {
     let re = Regex::new(r"<(style|script)[\w|\s].*?>").unwrap();
     let type_reg = Regex::new(r#"\s*?type="[\w|\s].*?""#).unwrap();
     re.replace_all(source, |caps: &Captures| {
         type_reg.replace_all(&caps[0], "").into_owned()
     })
     .into_owned()
 }

 fn clean_unneeded_tags(source: &str) -> String {
     let useless_tags = [
         "</area>",
         "</base>",
         "<body>",
         "</body>",
         "</br>",
         "</col>",
         "</colgroup>",
         "</dd>",
         "</dt>",
         "<head>",
         "</head>",
         "</hr>",
         "<html>",
         "</html>",
         "</img>",
         "</input>",
         "</li>",
         "</link>",
         "</meta>",
         "</option>",
         "</param>",
         "<tbody>",
         "</tbody>",
         "</td>",
         "</tfoot>",
         "</th>",
         "</thead>",
         "</tr>",
         "</basefont>",
         "</isindex>",
         "</param>",
     ];
     let mut res = source.to_owned();
     for useless_tag in &useless_tags {
         res = res.replace(useless_tag, "");
     }
     res
 }

 fn remove_comments(source: &str) -> String {
     // "build" and "endbuild" should be matched case insensitively.
     let re = Regex::new("<!--(.|\n)*?-->").unwrap();
     re.replace_all(source, |caps: &Captures| {
         if caps[0].replace("<!--", " ").trim().starts_with("[") {
             caps[0].to_owned()
         } else {
             " ".to_owned()
         }
     })
     .into_owned()
 }

 fn unquote_attributes(source: &str) -> String {
     // Some attributes like width, height, etc... don't need quotes.
     let any_tag = Regex::new(r"<\w.*?>").unwrap();
     let extra_spaces = Regex::new(r" \s+|\s +").unwrap();
     let between_words = Regex::new(r"\w\s+\w").unwrap();
     let spaces_before_close = Regex::new(r##""\s+>"##).unwrap();
     let spaces_before_close2 = Regex::new(r"'\s+>").unwrap();
     let extra_spaces2 = Regex::new(r##""\s\s+\w+="|'\s\s+\w+='|"\s\s+\w+=|'\s\s+\w+="##).unwrap();
     let extra_spaces3 = Regex::new(r"\d\s+>").unwrap();
     let quotes_in_tag = Regex::new(r##"([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)""##).unwrap();

     any_tag
         .replace_all(source, |caps: &Captures| {
             let cap = format!("{}", &caps[0]);
             if cap.starts_with("<!") || cap.find("</").is_some() {
                 cap
             } else {
                 let tag = spaces_before_close.replace_all(&cap, "\">").into_owned();
                 let mut tag = spaces_before_close2.replace_all(&tag, "'>").into_owned();
                 let tag_c = tag.clone();

                 let space1_matches: Vec<_> = between_words.find_iter(&tag_c).collect();
                 let space6_matches: Vec<_> = extra_spaces3.find_iter(&tag_c).collect();
                 let mut pos = 0;
                 loop {
                     let replacement = match (space1_matches.get(pos), space6_matches.get(pos)) {
                         (Some(a), Some(b)) => format!("{}{}", a.as_str(), b.as_str()),
                         (None, Some(b)) => format!("{}", b.as_str()),
                         (Some(a), None) => format!("{}", a.as_str()),
                         _ => break,
                     };
                     pos += 1;
                     tag = tag.replace(
                         &replacement,
                         &extra_spaces.replace_all(&replacement, " ").into_owned(),
                     );
                 }
                 let mut output = tag.clone();
                 for caps in extra_spaces2.find_iter(&tag) {
                     let c = caps.as_str().chars().next().unwrap_or('\0');
                     output = output.replace(
                         caps.as_str(),
                         &format!(
                             "{} {}",
                             if c == '\0' {
                                 String::new()
                             } else {
                                 format!("{}", c)
                             },
                             caps.as_str()[1..].trim_start()
                         ),
                     );
                 }
                 tag = quotes_in_tag
                     .replace_all(&output, |caps: &Captures| match &caps[1] {
                         "width" | "height" => format!("{}={}", &caps[1], &caps[2]),
                         x => format!("{}=\"{}\"", x, &caps[2]),
                     })
                     .into_owned();
                 if cap != tag {
                     tag
                 } else {
                     cap
                 }
             }
         })
         .trim()
         .to_owned()
 }

 /// Returns a minified version of the provided HTML source.
 pub fn minify(source: &str) -> String {
     let source = remove_comments(source);
     let source = condense(&source);
     let source = clean_unneeded_tags(&source);
     let source = condense_whitespace(&source);
     unquote_attributes(&source).trim().to_owned()
 }

 #[test]
 fn html_minify_test() {
     let source = r##"<head>
     <title>Some huge title</title>
     <link rel="stylesheet" type="text/css"   href="something.css"   >
     <style type="text/css">
         .some_class {
             color: red;
         }
     </style>
 </head>
 <body>
     <header>
         <div>
             <i>    <b><a href="www.somewhere.com" class="some_class">Narnia</a> </b>    </i>
             <h1    style="width:100%;text-align:center;"   >Big header</h1>
         </div>
     <!-- commeeeeeeeents !!! -->
     </header>
     <div id="some_id">
         <!-- another comment
         on
 multi
 lines -->
         <div id="another_id" class="another_class" width="100">
             <h2>A little sub title</h2>
             <ul>
                 <li>A list!</li>
                 <li>Who doesn't like lists?</li>
                 <li height="12" class="fooool">Well, who cares...</li>
             </ul>
         </div>
     </div>
     <script type="text/javascript"    >
         console.log("foo");
     </script>
     <style type="text/css" src="../foo.css">
     <script src="../foo.js">
 </body>
 "##;

     let expected_result = "<title>Some huge title</title> <link rel=\"stylesheet\" \
                            type=\"text/css\" href=\"something.css\"> <style> .some_class \
                            { color: red; } </style> <header> <div> <i> <b><a \
                            href=\"www.somewhere.com\" class=\"some_class\">Narnia</a> </b> </i> \
                            <h1 style=\"width:100%;text-align:center;\">Big header</h1> </div> \
                            </header> <div id=\"some_id\"> <div id=\"another_id\" \
                            class=\"another_class\" width=100> <h2>A little sub \
                            title</h2> <ul> <li>A list! <li>Who doesn't like lists? \
                            <li height=12 class=\"fooool\">Well, who cares... </ul> </div> \
                            </div> <script > console.log(\"foo\"); </script> <style \
                            src=\"../foo.css\"> <script src=\"../foo.js\">";
     assert_eq!(minify(source), expected_result);
 }

 #[test]
 fn html_keep_important_comments() {
     let source = r#"
 <div>
     <!-- normal comment -->
     <div>content</div>
     <!--[if lte IE 8]>
     <div class="warning">This old browser is unsupported and will most likely display funky things.
     </div>
     <![endif]-->
 </div>
 "#;

     let expected_result =
         "<div> <div>content</div> <!--[if lte IE 8]> <div class=\"warning\">This \
                            old browser is unsupported and will most likely display funky things. \
                            </div> <![endif]--> </div>";
     assert_eq!(minify(source), expected_result);
 }
	// Take a look at the license at the top of the repository in the LICENSE file.

	use regex::{Captures, Regex};

	fn condense_whitespace(source: &str) -> String {
	let lower_source = source.to_lowercase();
	if lower_source.find("<textarea").is_none() && lower_source.find("<pre").is_none() {
	// maybe should be better not to recreate Regex every time?
	let re = Regex::new(r">\s+<").unwrap();
	let source = re.replace_all(source, "> <").into_owned();
	let re = Regex::new(r"\s{2,}\|[\r\n]").unwrap();
	re.replace_all(&source, " ").into_owned()
	} else {
	source.trim().to_owned()
	}
	}

	fn condense(source: &str) -> String {
	let re = Regex::new(r"<(style\|script)[\w\|\s].*?>").unwrap();
	let type_reg = Regex::new(r#"\s?type="[\w\|\s].?""#).unwrap();
	re.replace_all(source, \|caps: &Captures\| {
	type_reg.replace_all(&caps[0], "").into_owned()
	})
	.into_owned()
	}

	fn clean_unneeded_tags(source: &str) -> String {
	let useless_tags = [
	"</area>",
	"</base>",
	"<body>",
	"</body>",
	"</br>",
	"</col>",
	"</colgroup>",
	"</dd>",
	"</dt>",
	"<head>",
	"</head>",
	"</hr>",
	"<html>",
	"</html>",
	"</img>",
	"</input>",
	"</li>",
	"</link>",
	"</meta>",
	"</option>",
	"</param>",
	"<tbody>",
	"</tbody>",
	"</td>",
	"</tfoot>",
	"</th>",
	"</thead>",
	"</tr>",
	"</basefont>",
	"</isindex>",
	"</param>",
	];
	let mut res = source.to_owned();
	for useless_tag in &useless_tags {
	res = res.replace(useless_tag, "");
	}
	res
	}

	fn remove_comments(source: &str) -> String {
	// "build" and "endbuild" should be matched case insensitively.
	let re = Regex::new("<!--(.\|\n)*?-->").unwrap();
	re.replace_all(source, \|caps: &Captures\| {
	if caps[0].replace("<!--", " ").trim().starts_with("[") {
	caps[0].to_owned()
	} else {
	" ".to_owned()
	}
	})
	.into_owned()
	}

	fn unquote_attributes(source: &str) -> String {
	// Some attributes like width, height, etc... don't need quotes.
	let any_tag = Regex::new(r"<\w.*?>").unwrap();
	let extra_spaces = Regex::new(r" \s+\|\s +").unwrap();
	let between_words = Regex::new(r"\w\s+\w").unwrap();
	let spaces_before_close = Regex::new(r##""\s+>"##).unwrap();
	let spaces_before_close2 = Regex::new(r"'\s+>").unwrap();
	let extra_spaces2 = Regex::new(r##""\s\s+\w+="\|'\s\s+\w+='\|"\s\s+\w+=\|'\s\s+\w+="##).unwrap();
	let extra_spaces3 = Regex::new(r"\d\s+>").unwrap();
	let quotes_in_tag = Regex::new(r##"([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)""##).unwrap();

	any_tag
	.replace_all(source, \|caps: &Captures\| {
	let cap = format!("{}", &caps[0]);
	if cap.starts_with("<!") \|\| cap.find("</").is_some() {
	cap
	} else {
	let tag = spaces_before_close.replace_all(&cap, "\">").into_owned();
	let mut tag = spaces_before_close2.replace_all(&tag, "'>").into_owned();
	let tag_c = tag.clone();

	let space1_matches: Vec<_> = between_words.find_iter(&tag_c).collect();
	let space6_matches: Vec<_> = extra_spaces3.find_iter(&tag_c).collect();
	let mut pos = 0;
	loop {
	let replacement = match (space1_matches.get(pos), space6_matches.get(pos)) {
	(Some(a), Some(b)) => format!("{}{}", a.as_str(), b.as_str()),
	(None, Some(b)) => format!("{}", b.as_str()),
	(Some(a), None) => format!("{}", a.as_str()),
	_ => break,
	};
	pos += 1;
	tag = tag.replace(
	&replacement,
	&extra_spaces.replace_all(&replacement, " ").into_owned(),
	);
	}
	let mut output = tag.clone();
	for caps in extra_spaces2.find_iter(&tag) {
	let c = caps.as_str().chars().next().unwrap_or('\0');
	output = output.replace(
	caps.as_str(),
	&format!(
	"{} {}",
	if c == '\0' {
	String::new()
	} else {
	format!("{}", c)
	},
	caps.as_str()[1..].trim_start()
	),
	);
	}
	tag = quotes_in_tag
	.replace_all(&output, \|caps: &Captures\| match &caps[1] {
	"width" \| "height" => format!("{}={}", &caps[1], &caps[2]),
	x => format!("{}=\"{}\"", x, &caps[2]),
	})
	.into_owned();
	if cap != tag {
	tag
	} else {
	cap
	}
	}
	})
	.trim()
	.to_owned()
	}

	/// Returns a minified version of the provided HTML source.
	pub fn minify(source: &str) -> String {
	let source = remove_comments(source);
	let source = condense(&source);
	let source = clean_unneeded_tags(&source);
	let source = condense_whitespace(&source);
	unquote_attributes(&source).trim().to_owned()
	}

	#[test]
	fn html_minify_test() {
	let source = r##"<head>
	<title>Some huge title</title>
	<link rel="stylesheet" type="text/css" href="something.css" >
	<style type="text/css">
	.some_class {
	color: red;
	}
	</style>
	</head>
	<body>
	<header>
	<div>
	<i> <b><a href="www.somewhere.com" class="some_class">Narnia</a> </b> </i>
	<h1 style="width:100%;text-align:center;" >Big header</h1>
	</div>
	<!-- commeeeeeeeents !!! -->
	</header>
	<div id="some_id">
	<!-- another comment
	on
	multi
	lines -->
	<div id="another_id" class="another_class" width="100">
	<h2>A little sub title</h2>
	<ul>
	<li>A list!</li>
	<li>Who doesn't like lists?</li>
	<li height="12" class="fooool">Well, who cares...</li>
	</ul>
	</div>
	</div>
	<script type="text/javascript" >
	console.log("foo");
	</script>
	<style type="text/css" src="../foo.css">
	<script src="../foo.js">
	</body>
	"##;

	let expected_result = "<title>Some huge title</title> <link rel=\"stylesheet\" \
	type=\"text/css\" href=\"something.css\"> <style> .some_class \
	{ color: red; } </style> <header> <div> <i> <b><a \
	href=\"www.somewhere.com\" class=\"some_class\">Narnia</a> </b> </i> \
	<h1 style=\"width:100%;text-align:center;\">Big header</h1> </div> \
	</header> <div id=\"some_id\"> <div id=\"another_id\" \
	class=\"another_class\" width=100> <h2>A little sub \
	title</h2> <ul> <li>A list! <li>Who doesn't like lists? \
	<li height=12 class=\"fooool\">Well, who cares... </ul> </div> \
	</div> <script > console.log(\"foo\"); </script> <style \
	src=\"../foo.css\"> <script src=\"../foo.js\">";
	assert_eq!(minify(source), expected_result);
	}

	#[test]
	fn html_keep_important_comments() {
	let source = r#"
	<div>
	<!-- normal comment -->
	<div>content</div>
	<!--[if lte IE 8]>
	<div class="warning">This old browser is unsupported and will most likely display funky things.
	</div>
	<![endif]-->
	</div>
	"#;

	let expected_result =
	"<div> <div>content</div> <!--[if lte IE 8]> <div class=\"warning\">This \
	old browser is unsupported and will most likely display funky things. \
	</div> <![endif]--> </div>";
	assert_eq!(minify(source), expected_result);
	}