blob: b6fa99362a43a65f7d92383d40046c482795ebdd [file] [log] [blame] [edit]
// Take a look at the license at the top of the repository in the LICENSE file.
use regex::{Captures, Regex};
fn condense_whitespace(source: &str) -> String {
let lower_source = source.to_lowercase();
if lower_source.find("<textarea").is_none() && lower_source.find("<pre").is_none() {
// maybe should be better not to recreate Regex every time?
let re = Regex::new(r">\s+<").unwrap();
let source = re.replace_all(source, "> <").into_owned();
let re = Regex::new(r"\s{2,}|[\r\n]").unwrap();
re.replace_all(&source, " ").into_owned()
} else {
source.trim().to_owned()
}
}
fn condense(source: &str) -> String {
let re = Regex::new(r"<(style|script)[\w|\s].*?>").unwrap();
let type_reg = Regex::new(r#"\s*?type="[\w|\s].*?""#).unwrap();
re.replace_all(source, |caps: &Captures| {
type_reg.replace_all(&caps[0], "").into_owned()
})
.into_owned()
}
fn clean_unneeded_tags(source: &str) -> String {
let useless_tags = [
"</area>",
"</base>",
"<body>",
"</body>",
"</br>",
"</col>",
"</colgroup>",
"</dd>",
"</dt>",
"<head>",
"</head>",
"</hr>",
"<html>",
"</html>",
"</img>",
"</input>",
"</li>",
"</link>",
"</meta>",
"</option>",
"</param>",
"<tbody>",
"</tbody>",
"</td>",
"</tfoot>",
"</th>",
"</thead>",
"</tr>",
"</basefont>",
"</isindex>",
"</param>",
];
let mut res = source.to_owned();
for useless_tag in &useless_tags {
res = res.replace(useless_tag, "");
}
res
}
fn remove_comments(source: &str) -> String {
// "build" and "endbuild" should be matched case insensitively.
let re = Regex::new("<!--(.|\n)*?-->").unwrap();
re.replace_all(source, |caps: &Captures| {
if caps[0].replace("<!--", " ").trim().starts_with("[") {
caps[0].to_owned()
} else {
" ".to_owned()
}
})
.into_owned()
}
fn unquote_attributes(source: &str) -> String {
// Some attributes like width, height, etc... don't need quotes.
let any_tag = Regex::new(r"<\w.*?>").unwrap();
let extra_spaces = Regex::new(r" \s+|\s +").unwrap();
let between_words = Regex::new(r"\w\s+\w").unwrap();
let spaces_before_close = Regex::new(r##""\s+>"##).unwrap();
let spaces_before_close2 = Regex::new(r"'\s+>").unwrap();
let extra_spaces2 = Regex::new(r##""\s\s+\w+="|'\s\s+\w+='|"\s\s+\w+=|'\s\s+\w+="##).unwrap();
let extra_spaces3 = Regex::new(r"\d\s+>").unwrap();
let quotes_in_tag = Regex::new(r##"([a-zA-Z]+)="([a-zA-Z0-9-_\.]+)""##).unwrap();
any_tag
.replace_all(source, |caps: &Captures| {
let cap = format!("{}", &caps[0]);
if cap.starts_with("<!") || cap.find("</").is_some() {
cap
} else {
let tag = spaces_before_close.replace_all(&cap, "\">").into_owned();
let mut tag = spaces_before_close2.replace_all(&tag, "'>").into_owned();
let tag_c = tag.clone();
let space1_matches: Vec<_> = between_words.find_iter(&tag_c).collect();
let space6_matches: Vec<_> = extra_spaces3.find_iter(&tag_c).collect();
let mut pos = 0;
loop {
let replacement = match (space1_matches.get(pos), space6_matches.get(pos)) {
(Some(a), Some(b)) => format!("{}{}", a.as_str(), b.as_str()),
(None, Some(b)) => format!("{}", b.as_str()),
(Some(a), None) => format!("{}", a.as_str()),
_ => break,
};
pos += 1;
tag = tag.replace(
&replacement,
&extra_spaces.replace_all(&replacement, " ").into_owned(),
);
}
let mut output = tag.clone();
for caps in extra_spaces2.find_iter(&tag) {
let c = caps.as_str().chars().next().unwrap_or('\0');
output = output.replace(
caps.as_str(),
&format!(
"{} {}",
if c == '\0' {
String::new()
} else {
format!("{}", c)
},
caps.as_str()[1..].trim_start()
),
);
}
tag = quotes_in_tag
.replace_all(&output, |caps: &Captures| match &caps[1] {
"width" | "height" => format!("{}={}", &caps[1], &caps[2]),
x => format!("{}=\"{}\"", x, &caps[2]),
})
.into_owned();
if cap != tag {
tag
} else {
cap
}
}
})
.trim()
.to_owned()
}
/// Returns a minified version of the provided HTML source.
pub fn minify(source: &str) -> String {
let source = remove_comments(source);
let source = condense(&source);
let source = clean_unneeded_tags(&source);
let source = condense_whitespace(&source);
unquote_attributes(&source).trim().to_owned()
}
#[test]
fn html_minify_test() {
let source = r##"<head>
<title>Some huge title</title>
<link rel="stylesheet" type="text/css" href="something.css" >
<style type="text/css">
.some_class {
color: red;
}
</style>
</head>
<body>
<header>
<div>
<i> <b><a href="www.somewhere.com" class="some_class">Narnia</a> </b> </i>
<h1 style="width:100%;text-align:center;" >Big header</h1>
</div>
<!-- commeeeeeeeents !!! -->
</header>
<div id="some_id">
<!-- another comment
on
multi
lines -->
<div id="another_id" class="another_class" width="100">
<h2>A little sub title</h2>
<ul>
<li>A list!</li>
<li>Who doesn't like lists?</li>
<li height="12" class="fooool">Well, who cares...</li>
</ul>
</div>
</div>
<script type="text/javascript" >
console.log("foo");
</script>
<style type="text/css" src="../foo.css">
<script src="../foo.js">
</body>
"##;
let expected_result = "<title>Some huge title</title> <link rel=\"stylesheet\" \
type=\"text/css\" href=\"something.css\"> <style> .some_class \
{ color: red; } </style> <header> <div> <i> <b><a \
href=\"www.somewhere.com\" class=\"some_class\">Narnia</a> </b> </i> \
<h1 style=\"width:100%;text-align:center;\">Big header</h1> </div> \
</header> <div id=\"some_id\"> <div id=\"another_id\" \
class=\"another_class\" width=100> <h2>A little sub \
title</h2> <ul> <li>A list! <li>Who doesn't like lists? \
<li height=12 class=\"fooool\">Well, who cares... </ul> </div> \
</div> <script > console.log(\"foo\"); </script> <style \
src=\"../foo.css\"> <script src=\"../foo.js\">";
assert_eq!(minify(source), expected_result);
}
#[test]
fn html_keep_important_comments() {
let source = r#"
<div>
<!-- normal comment -->
<div>content</div>
<!--[if lte IE 8]>
<div class="warning">This old browser is unsupported and will most likely display funky things.
</div>
<![endif]-->
</div>
"#;
let expected_result =
"<div> <div>content</div> <!--[if lte IE 8]> <div class=\"warning\">This \
old browser is unsupported and will most likely display funky things. \
</div> <![endif]--> </div>";
assert_eq!(minify(source), expected_result);
}