| // |
| // HTML |
| // |
| html = _{ |
| SOI |
| ~ node_comment* |
| ~ doctype? |
| ~ node* |
| ~ EOI |
| } |
| |
| // |
| // DOCTYPE |
| // |
| doctype = { WSP* ~ chevron_left_bang ~ ^"doctype" ~ WSP* ~ attr* ~ WSP* ~ chevron_right_normal} |
| |
| // |
| // NODES |
| // |
| node = _{ node_comment | node_element | node_text } |
| node_comment = { WSP* ~ (comment_if | comment_normal) ~ WSP* } |
| node_text = { (!(node_element | comment_tag_start | chevron_left_bang) ~ ANY)+ } |
| node_element = { el_void | el_void_xml | el_process_instruct | el_raw_text | el_normal | el_dangling } |
| |
| // |
| // COMMENTS |
| // |
| comment_normal = _{ comment_tag_start ~ comment_body ~ comment_tag_end } |
| comment_body = { (!comment_tag_end ~ ANY)* } |
| comment_tag_start = _{ chevron_left_bang ~ "--" ~ WSP* } |
| comment_tag_end = _{ WSP* ~ "--" ~ chevron_right_normal } |
| |
| // Compatability with old IE browsers... This is not necessary for newer browsers |
| comment_if = _{ comment_if_start ~ comment_if_body ~ comment_if_end } |
| comment_if_body = { (!comment_if_end ~ ANY)* } |
| comment_if_start = _{ comment_tag_start ~ "[" ~ ^"if" } |
| comment_if_end = _{ chevron_left_bang ~ "[" ~ ^"endif" ~ "]" ~ comment_tag_end } |
| |
| // |
| // ATTRIBUTES |
| // |
| attr = { attr_key ~ (equal ~ WSP* ~ (attr_non_quoted | attr_quoted ))? } |
| attr_quoted = ${PUSH(quote) ~ attr_value ~ POP } |
| attr_non_quoted = @{ !quote ~ (!(WSP | chevron_right) ~ ANY)* } |
| attr_key = { WSP* ~ ASCII_ALPHA ~ text_chars* ~ WSP* } |
| attr_value = { WSP* ~ (!PEEK ~ ANY)* ~ WSP* } |
| |
| // |
| // ELEMENTS |
| // |
| el_name = @{ ASCII_ALPHA ~ text_chars* } |
| |
| // Void element aka self-closing element |
| // Ex: <hr> |
| el_void_name_html = @{ |
| ^"area" |
| | ^"base" |
| | ^"br" |
| | ^"col" |
| | ^"command" |
| | ^"embed" |
| | ^"hr" |
| | ^"img" |
| | ^"input" |
| | ^"keygen" |
| | ^"link" |
| | ^"meta" |
| | ^"param" |
| | ^"source" |
| | ^"track" |
| | ^"wbr" |
| | ^"meta" |
| } |
| // NOTE: This should not have to be a rule, but people doesn't know what void elements are... |
| el_void_name_svg = @{ |
| ^"path" |
| | ^"polygon" |
| | ^"rect" |
| | ^"circle" |
| } |
| el_void_name = @{ el_void_name_html | el_void_name_svg } |
| el_void = _{ chevron_left_normal ~ WSP* ~ el_void_name ~ WSP* ~ attr* ~ WSP* ~ (chevron_right_normal | chevron_right_closed) } |
| el_void_xml = _{ chevron_left_normal ~ WSP* ~ el_name ~ WSP* ~ attr* ~ WSP* ~ chevron_right_closed } |
| |
| // Open elements are default element that can take children |
| // and have both a start tag and an end tag |
| // Ex: <html lang="en"></html> |
| el_normal = _{ el_normal_start ~ (!el_normal_end ~ node)* ~ el_normal_end } |
| el_normal_start = _{ chevron_left_normal ~ WSP* ~ PUSH(el_name) ~ WSP* ~ attr* ~ WSP* ~ chevron_right_normal} |
| el_normal_end = { chevron_left_closed ~ WSP* ~ POP ~ WSP* ~ chevron_right_normal} |
| |
| // Raw text elements are elements with text/script content that |
| // might interfere with the normal html syntax |
| el_raw_text_name = { |
| ^"style" |
| | ^"script" |
| | ^"title" |
| | ^"textarea" |
| } |
| el_raw_text_content = { (!el_raw_text_end ~ ANY)* } |
| el_raw_text = _{ el_raw_text_start ~ el_raw_text_content ~ el_raw_text_end } |
| el_raw_text_start = _{ chevron_left_normal ~ WSP* ~ PUSH(el_raw_text_name) ~ WSP* ~ attr* ~ WSP* ~ chevron_right_normal ~ WSP*} |
| el_raw_text_end = { WSP* ~ chevron_left_closed ~ WSP* ~ POP ~ WSP* ~ chevron_right_normal} |
| |
| // XML processing instruction |
| // Ex: <?xml version="1.0" ?> |
| el_process_instruct = { chevron_left_question ~ WSP* ~ el_name? ~ WSP* ~ attr* ~ WSP* ~ chevron_right_question } |
| |
| // Catch dangling elements |
| // Ex: <div/></div> |
| el_dangling = { chevron_left_closed ~ WSP* ~ el_name ~ WSP* ~ chevron_right_normal} |
| |
| // |
| // SYMBOLS / CHARACTERS |
| // |
| text_chars = _{'a'..'z' | 'A'..'Z' | "_" | "-" | ":" |'0'..'9'} |
| |
| chevron_left_normal = _{ "<" } |
| chevron_left_closed = _{ "</" } |
| chevron_left_bang = _{ "<!" } |
| chevron_left_question = _{ "<?" } |
| |
| chevron_right_normal = _{ ">" } |
| chevron_right_closed = _{ "/>" } |
| chevron_right_question = _{ "?>" } |
| chevron_right = _{ |
| chevron_right_normal |
| | chevron_right_closed |
| | chevron_right_question |
| } |
| |
| equal = _{ "=" } |
| quote_dubble = _{ "\"" } |
| quote_single = _{ "'" } |
| quote = _{ quote_dubble | quote_single } |
| WSP = _{ " " | "\t" | "\r" | "\n" } |