| //! Basic syntax highlighting functionality. |
| //! |
| //! This module uses librustc_ast's lexer to provide token-based highlighting for |
| //! the HTML documentation generated by rustdoc. |
| //! |
| //! Use the `render_with_highlighting` to highlight some rust code. |
| |
| use std::collections::VecDeque; |
| use std::fmt::{Display, Write}; |
| |
| use rustc_data_structures::fx::FxIndexMap; |
| use rustc_lexer::{Cursor, LiteralKind, TokenKind}; |
| use rustc_span::edition::Edition; |
| use rustc_span::symbol::Symbol; |
| use rustc_span::{BytePos, DUMMY_SP, Span}; |
| |
| use super::format::{self, Buffer}; |
| use crate::clean::PrimitiveType; |
| use crate::html::escape::EscapeBodyText; |
| use crate::html::render::{Context, LinkFromSrc}; |
| |
| /// This type is needed in case we want to render links on items to allow to go to their definition. |
| pub(crate) struct HrefContext<'a, 'tcx> { |
| pub(crate) context: &'a Context<'tcx>, |
| /// This span contains the current file we're going through. |
| pub(crate) file_span: Span, |
| /// This field is used to know "how far" from the top of the directory we are to link to either |
| /// documentation pages or other source pages. |
| pub(crate) root_path: &'a str, |
| /// This field is used to calculate precise local URLs. |
| pub(crate) current_href: String, |
| } |
| |
| /// Decorations are represented as a map from CSS class to vector of character ranges. |
| /// Each range will be wrapped in a span with that class. |
| #[derive(Default)] |
| pub(crate) struct DecorationInfo(pub(crate) FxIndexMap<&'static str, Vec<(u32, u32)>>); |
| |
| #[derive(Eq, PartialEq, Clone, Copy)] |
| pub(crate) enum Tooltip { |
| Ignore, |
| CompileFail, |
| ShouldPanic, |
| Edition(Edition), |
| None, |
| } |
| |
| /// Highlights `src` as an inline example, returning the HTML output. |
| pub(crate) fn render_example_with_highlighting( |
| src: &str, |
| out: &mut Buffer, |
| tooltip: Tooltip, |
| playground_button: Option<&str>, |
| extra_classes: &[String], |
| ) { |
| write_header(out, "rust-example-rendered", None, tooltip, extra_classes); |
| write_code(out, src, None, None); |
| write_footer(out, playground_button); |
| } |
| |
| /// Highlights `src` as an item-decl, returning the HTML output. |
| pub(crate) fn render_item_decl_with_highlighting(src: &str, out: &mut Buffer) { |
| write!(out, "<pre class=\"rust item-decl\">"); |
| write_code(out, src, None, None); |
| write!(out, "</pre>"); |
| } |
| |
| fn write_header( |
| out: &mut Buffer, |
| class: &str, |
| extra_content: Option<Buffer>, |
| tooltip: Tooltip, |
| extra_classes: &[String], |
| ) { |
| write!(out, "<div class=\"example-wrap{}\">", match tooltip { |
| Tooltip::Ignore => " ignore", |
| Tooltip::CompileFail => " compile_fail", |
| Tooltip::ShouldPanic => " should_panic", |
| Tooltip::Edition(_) => " edition", |
| Tooltip::None => "", |
| },); |
| |
| if tooltip != Tooltip::None { |
| let edition_code; |
| write!(out, "<a href=\"#\" class=\"tooltip\" title=\"{}\">ⓘ</a>", match tooltip { |
| Tooltip::Ignore => "This example is not tested", |
| Tooltip::CompileFail => "This example deliberately fails to compile", |
| Tooltip::ShouldPanic => "This example panics", |
| Tooltip::Edition(edition) => { |
| edition_code = format!("This example runs with edition {edition}"); |
| &edition_code |
| } |
| Tooltip::None => unreachable!(), |
| },); |
| } |
| |
| if let Some(extra) = extra_content { |
| out.push_buffer(extra); |
| } |
| if class.is_empty() { |
| write!( |
| out, |
| "<pre class=\"rust{}{}\">", |
| if extra_classes.is_empty() { "" } else { " " }, |
| extra_classes.join(" "), |
| ); |
| } else { |
| write!( |
| out, |
| "<pre class=\"rust {class}{}{}\">", |
| if extra_classes.is_empty() { "" } else { " " }, |
| extra_classes.join(" "), |
| ); |
| } |
| write!(out, "<code>"); |
| } |
| |
| /// Check if two `Class` can be merged together. In the following rules, "unclassified" means `None` |
| /// basically (since it's `Option<Class>`). The following rules apply: |
| /// |
| /// * If two `Class` have the same variant, then they can be merged. |
| /// * If the other `Class` is unclassified and only contains white characters (backline, |
| /// whitespace, etc), it can be merged. |
| /// * `Class::Ident` is considered the same as unclassified (because it doesn't have an associated |
| /// CSS class). |
| fn can_merge(class1: Option<Class>, class2: Option<Class>, text: &str) -> bool { |
| match (class1, class2) { |
| (Some(c1), Some(c2)) => c1.is_equal_to(c2), |
| (Some(Class::Ident(_)), None) | (None, Some(Class::Ident(_))) => true, |
| (Some(Class::Macro(_)), _) => false, |
| (Some(_), None) | (None, Some(_)) => text.trim().is_empty(), |
| (None, None) => true, |
| } |
| } |
| |
| /// This type is used as a conveniency to prevent having to pass all its fields as arguments into |
| /// the various functions (which became its methods). |
| struct TokenHandler<'a, 'tcx, F: Write> { |
| out: &'a mut F, |
| /// It contains the closing tag and the associated `Class`. |
| closing_tags: Vec<(&'static str, Class)>, |
| /// This is used because we don't automatically generate the closing tag on `ExitSpan` in |
| /// case an `EnterSpan` event with the same class follows. |
| pending_exit_span: Option<Class>, |
| /// `current_class` and `pending_elems` are used to group HTML elements with same `class` |
| /// attributes to reduce the DOM size. |
| current_class: Option<Class>, |
| /// We need to keep the `Class` for each element because it could contain a `Span` which is |
| /// used to generate links. |
| pending_elems: Vec<(&'a str, Option<Class>)>, |
| href_context: Option<HrefContext<'a, 'tcx>>, |
| } |
| |
| impl<'a, 'tcx, F: Write> TokenHandler<'a, 'tcx, F> { |
| fn handle_exit_span(&mut self) { |
| // We can't get the last `closing_tags` element using `pop()` because `closing_tags` is |
| // being used in `write_pending_elems`. |
| let class = self.closing_tags.last().expect("ExitSpan without EnterSpan").1; |
| // We flush everything just in case... |
| self.write_pending_elems(Some(class)); |
| |
| exit_span(self.out, self.closing_tags.pop().expect("ExitSpan without EnterSpan").0); |
| self.pending_exit_span = None; |
| } |
| |
| /// Write all the pending elements sharing a same (or at mergeable) `Class`. |
| /// |
| /// If there is a "parent" (if a `EnterSpan` event was encountered) and the parent can be merged |
| /// with the elements' class, then we simply write the elements since the `ExitSpan` event will |
| /// close the tag. |
| /// |
| /// Otherwise, if there is only one pending element, we let the `string` function handle both |
| /// opening and closing the tag, otherwise we do it into this function. |
| /// |
| /// It returns `true` if `current_class` must be set to `None` afterwards. |
| fn write_pending_elems(&mut self, current_class: Option<Class>) -> bool { |
| if self.pending_elems.is_empty() { |
| return false; |
| } |
| if let Some((_, parent_class)) = self.closing_tags.last() |
| && can_merge(current_class, Some(*parent_class), "") |
| { |
| for (text, class) in self.pending_elems.iter() { |
| string(self.out, EscapeBodyText(text), *class, &self.href_context, false); |
| } |
| } else { |
| // We only want to "open" the tag ourselves if we have more than one pending and if the |
| // current parent tag is not the same as our pending content. |
| let close_tag = if self.pending_elems.len() > 1 |
| && let Some(current_class) = current_class |
| { |
| Some(enter_span(self.out, current_class, &self.href_context)) |
| } else { |
| None |
| }; |
| for (text, class) in self.pending_elems.iter() { |
| string( |
| self.out, |
| EscapeBodyText(text), |
| *class, |
| &self.href_context, |
| close_tag.is_none(), |
| ); |
| } |
| if let Some(close_tag) = close_tag { |
| exit_span(self.out, close_tag); |
| } |
| } |
| self.pending_elems.clear(); |
| true |
| } |
| } |
| |
| impl<'a, 'tcx, F: Write> Drop for TokenHandler<'a, 'tcx, F> { |
| /// When leaving, we need to flush all pending data to not have missing content. |
| fn drop(&mut self) { |
| if self.pending_exit_span.is_some() { |
| self.handle_exit_span(); |
| } else { |
| self.write_pending_elems(self.current_class); |
| } |
| } |
| } |
| |
| /// Convert the given `src` source code into HTML by adding classes for highlighting. |
| /// |
| /// This code is used to render code blocks (in the documentation) as well as the source code pages. |
| /// |
| /// Some explanations on the last arguments: |
| /// |
| /// In case we are rendering a code block and not a source code file, `href_context` will be `None`. |
| /// To put it more simply: if `href_context` is `None`, the code won't try to generate links to an |
| /// item definition. |
| /// |
| /// More explanations about spans and how we use them here are provided in the |
| pub(super) fn write_code( |
| out: &mut impl Write, |
| src: &str, |
| href_context: Option<HrefContext<'_, '_>>, |
| decoration_info: Option<DecorationInfo>, |
| ) { |
| // This replace allows to fix how the code source with DOS backline characters is displayed. |
| let src = src.replace("\r\n", "\n"); |
| let mut token_handler = TokenHandler { |
| out, |
| closing_tags: Vec::new(), |
| pending_exit_span: None, |
| current_class: None, |
| pending_elems: Vec::new(), |
| href_context, |
| }; |
| |
| Classifier::new( |
| &src, |
| token_handler.href_context.as_ref().map(|c| c.file_span).unwrap_or(DUMMY_SP), |
| decoration_info, |
| ) |
| .highlight(&mut |highlight| { |
| match highlight { |
| Highlight::Token { text, class } => { |
| // If we received a `ExitSpan` event and then have a non-compatible `Class`, we |
| // need to close the `<span>`. |
| let need_current_class_update = if let Some(pending) = |
| token_handler.pending_exit_span |
| && !can_merge(Some(pending), class, text) |
| { |
| token_handler.handle_exit_span(); |
| true |
| // If the two `Class` are different, time to flush the current content and start |
| // a new one. |
| } else if !can_merge(token_handler.current_class, class, text) { |
| token_handler.write_pending_elems(token_handler.current_class); |
| true |
| } else { |
| token_handler.current_class.is_none() |
| }; |
| |
| if need_current_class_update { |
| token_handler.current_class = class.map(Class::dummy); |
| } |
| token_handler.pending_elems.push((text, class)); |
| } |
| Highlight::EnterSpan { class } => { |
| let mut should_add = true; |
| if let Some(pending_exit_span) = token_handler.pending_exit_span { |
| if class.is_equal_to(pending_exit_span) { |
| should_add = false; |
| } else { |
| token_handler.handle_exit_span(); |
| } |
| } else { |
| // We flush everything just in case... |
| if token_handler.write_pending_elems(token_handler.current_class) { |
| token_handler.current_class = None; |
| } |
| } |
| if should_add { |
| let closing_tag = |
| enter_span(token_handler.out, class, &token_handler.href_context); |
| token_handler.closing_tags.push((closing_tag, class)); |
| } |
| |
| token_handler.current_class = None; |
| token_handler.pending_exit_span = None; |
| } |
| Highlight::ExitSpan => { |
| token_handler.current_class = None; |
| token_handler.pending_exit_span = Some( |
| token_handler |
| .closing_tags |
| .last() |
| .as_ref() |
| .expect("ExitSpan without EnterSpan") |
| .1, |
| ); |
| } |
| }; |
| }); |
| } |
| |
| fn write_footer(out: &mut Buffer, playground_button: Option<&str>) { |
| writeln!(out, "</code></pre>{}</div>", playground_button.unwrap_or_default()); |
| } |
| |
| /// How a span of text is classified. Mostly corresponds to token kinds. |
| #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
| enum Class { |
| Comment, |
| DocComment, |
| Attribute, |
| KeyWord, |
| /// Keywords that do pointer/reference stuff. |
| RefKeyWord, |
| Self_(Span), |
| Macro(Span), |
| MacroNonTerminal, |
| String, |
| Number, |
| Bool, |
| /// `Ident` isn't rendered in the HTML but we still need it for the `Span` it contains. |
| Ident(Span), |
| Lifetime, |
| PreludeTy, |
| PreludeVal, |
| QuestionMark, |
| Decoration(&'static str), |
| } |
| |
| impl Class { |
| /// It is only looking at the variant, not the variant content. |
| /// |
| /// It is used mostly to group multiple similar HTML elements into one `<span>` instead of |
| /// multiple ones. |
| fn is_equal_to(self, other: Self) -> bool { |
| match (self, other) { |
| (Self::Self_(_), Self::Self_(_)) |
| | (Self::Macro(_), Self::Macro(_)) |
| | (Self::Ident(_), Self::Ident(_)) => true, |
| (Self::Decoration(c1), Self::Decoration(c2)) => c1 == c2, |
| (x, y) => x == y, |
| } |
| } |
| |
| /// If `self` contains a `Span`, it'll be replaced with `DUMMY_SP` to prevent creating links |
| /// on "empty content" (because of the attributes merge). |
| fn dummy(self) -> Self { |
| match self { |
| Self::Self_(_) => Self::Self_(DUMMY_SP), |
| Self::Macro(_) => Self::Macro(DUMMY_SP), |
| Self::Ident(_) => Self::Ident(DUMMY_SP), |
| s => s, |
| } |
| } |
| |
| /// Returns the css class expected by rustdoc for each `Class`. |
| fn as_html(self) -> &'static str { |
| match self { |
| Class::Comment => "comment", |
| Class::DocComment => "doccomment", |
| Class::Attribute => "attr", |
| Class::KeyWord => "kw", |
| Class::RefKeyWord => "kw-2", |
| Class::Self_(_) => "self", |
| Class::Macro(_) => "macro", |
| Class::MacroNonTerminal => "macro-nonterminal", |
| Class::String => "string", |
| Class::Number => "number", |
| Class::Bool => "bool-val", |
| Class::Ident(_) => "", |
| Class::Lifetime => "lifetime", |
| Class::PreludeTy => "prelude-ty", |
| Class::PreludeVal => "prelude-val", |
| Class::QuestionMark => "question-mark", |
| Class::Decoration(kind) => kind, |
| } |
| } |
| |
| /// In case this is an item which can be converted into a link to a definition, it'll contain |
| /// a "span" (a tuple representing `(lo, hi)` equivalent of `Span`). |
| fn get_span(self) -> Option<Span> { |
| match self { |
| Self::Ident(sp) | Self::Self_(sp) | Self::Macro(sp) => Some(sp), |
| Self::Comment |
| | Self::DocComment |
| | Self::Attribute |
| | Self::KeyWord |
| | Self::RefKeyWord |
| | Self::MacroNonTerminal |
| | Self::String |
| | Self::Number |
| | Self::Bool |
| | Self::Lifetime |
| | Self::PreludeTy |
| | Self::PreludeVal |
| | Self::QuestionMark |
| | Self::Decoration(_) => None, |
| } |
| } |
| } |
| |
| enum Highlight<'a> { |
| Token { text: &'a str, class: Option<Class> }, |
| EnterSpan { class: Class }, |
| ExitSpan, |
| } |
| |
| struct TokenIter<'a> { |
| src: &'a str, |
| cursor: Cursor<'a>, |
| } |
| |
| impl<'a> Iterator for TokenIter<'a> { |
| type Item = (TokenKind, &'a str); |
| fn next(&mut self) -> Option<(TokenKind, &'a str)> { |
| let token = self.cursor.advance_token(); |
| if token.kind == TokenKind::Eof { |
| return None; |
| } |
| let (text, rest) = self.src.split_at(token.len as usize); |
| self.src = rest; |
| Some((token.kind, text)) |
| } |
| } |
| |
| /// Classifies into identifier class; returns `None` if this is a non-keyword identifier. |
| fn get_real_ident_class(text: &str, allow_path_keywords: bool) -> Option<Class> { |
| let ignore: &[&str] = |
| if allow_path_keywords { &["self", "Self", "super", "crate"] } else { &["self", "Self"] }; |
| if ignore.iter().any(|k| *k == text) { |
| return None; |
| } |
| Some(match text { |
| "ref" | "mut" => Class::RefKeyWord, |
| "false" | "true" => Class::Bool, |
| _ if Symbol::intern(text).is_reserved(|| Edition::Edition2021) => Class::KeyWord, |
| _ => return None, |
| }) |
| } |
| |
| /// This iterator comes from the same idea than "Peekable" except that it allows to "peek" more than |
| /// just the next item by using `peek_next`. The `peek` method always returns the next item after |
| /// the current one whereas `peek_next` will return the next item after the last one peeked. |
| /// |
| /// You can use both `peek` and `peek_next` at the same time without problem. |
| struct PeekIter<'a> { |
| stored: VecDeque<(TokenKind, &'a str)>, |
| /// This position is reinitialized when using `next`. It is used in `peek_next`. |
| peek_pos: usize, |
| iter: TokenIter<'a>, |
| } |
| |
| impl<'a> PeekIter<'a> { |
| fn new(iter: TokenIter<'a>) -> Self { |
| Self { stored: VecDeque::new(), peek_pos: 0, iter } |
| } |
| /// Returns the next item after the current one. It doesn't interfere with `peek_next` output. |
| fn peek(&mut self) -> Option<&(TokenKind, &'a str)> { |
| if self.stored.is_empty() |
| && let Some(next) = self.iter.next() |
| { |
| self.stored.push_back(next); |
| } |
| self.stored.front() |
| } |
| /// Returns the next item after the last one peeked. It doesn't interfere with `peek` output. |
| fn peek_next(&mut self) -> Option<&(TokenKind, &'a str)> { |
| self.peek_pos += 1; |
| if self.peek_pos - 1 < self.stored.len() { |
| self.stored.get(self.peek_pos - 1) |
| } else if let Some(next) = self.iter.next() { |
| self.stored.push_back(next); |
| self.stored.back() |
| } else { |
| None |
| } |
| } |
| } |
| |
| impl<'a> Iterator for PeekIter<'a> { |
| type Item = (TokenKind, &'a str); |
| fn next(&mut self) -> Option<Self::Item> { |
| self.peek_pos = 0; |
| if let Some(first) = self.stored.pop_front() { Some(first) } else { self.iter.next() } |
| } |
| } |
| |
| /// Custom spans inserted into the source. Eg --scrape-examples uses this to highlight function calls |
| struct Decorations { |
| starts: Vec<(u32, &'static str)>, |
| ends: Vec<u32>, |
| } |
| |
| impl Decorations { |
| fn new(info: DecorationInfo) -> Self { |
| // Extract tuples (start, end, kind) into separate sequences of (start, kind) and (end). |
| let (mut starts, mut ends): (Vec<_>, Vec<_>) = info |
| .0 |
| .into_iter() |
| .flat_map(|(kind, ranges)| ranges.into_iter().map(move |(lo, hi)| ((lo, kind), hi))) |
| .unzip(); |
| |
| // Sort the sequences in document order. |
| starts.sort_by_key(|(lo, _)| *lo); |
| ends.sort(); |
| |
| Decorations { starts, ends } |
| } |
| } |
| |
| /// Processes program tokens, classifying strings of text by highlighting |
| /// category (`Class`). |
| struct Classifier<'src> { |
| tokens: PeekIter<'src>, |
| in_attribute: bool, |
| in_macro: bool, |
| in_macro_nonterminal: bool, |
| byte_pos: u32, |
| file_span: Span, |
| src: &'src str, |
| decorations: Option<Decorations>, |
| } |
| |
| impl<'src> Classifier<'src> { |
| /// Takes as argument the source code to HTML-ify, the rust edition to use and the source code |
| /// file span which will be used later on by the `span_correspondence_map`. |
| fn new(src: &str, file_span: Span, decoration_info: Option<DecorationInfo>) -> Classifier<'_> { |
| let tokens = PeekIter::new(TokenIter { src, cursor: Cursor::new(src) }); |
| let decorations = decoration_info.map(Decorations::new); |
| Classifier { |
| tokens, |
| in_attribute: false, |
| in_macro: false, |
| in_macro_nonterminal: false, |
| byte_pos: 0, |
| file_span, |
| src, |
| decorations, |
| } |
| } |
| |
| /// Convenient wrapper to create a [`Span`] from a position in the file. |
| fn new_span(&self, lo: u32, text: &str) -> Span { |
| let hi = lo + text.len() as u32; |
| let file_lo = self.file_span.lo(); |
| self.file_span.with_lo(file_lo + BytePos(lo)).with_hi(file_lo + BytePos(hi)) |
| } |
| |
| /// Concatenate colons and idents as one when possible. |
| fn get_full_ident_path(&mut self) -> Vec<(TokenKind, usize, usize)> { |
| let start = self.byte_pos as usize; |
| let mut pos = start; |
| let mut has_ident = false; |
| |
| loop { |
| let mut nb = 0; |
| while let Some((TokenKind::Colon, _)) = self.tokens.peek() { |
| self.tokens.next(); |
| nb += 1; |
| } |
| // Ident path can start with "::" but if we already have content in the ident path, |
| // the "::" is mandatory. |
| if has_ident && nb == 0 { |
| return vec![(TokenKind::Ident, start, pos)]; |
| } else if nb != 0 && nb != 2 { |
| if has_ident { |
| return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)]; |
| } else { |
| return vec![(TokenKind::Colon, start, pos + nb)]; |
| } |
| } |
| |
| if let Some((None, text)) = self.tokens.peek().map(|(token, text)| { |
| if *token == TokenKind::Ident { |
| let class = get_real_ident_class(text, true); |
| (class, text) |
| } else { |
| // Doesn't matter which Class we put in here... |
| (Some(Class::Comment), text) |
| } |
| }) { |
| // We only "add" the colon if there is an ident behind. |
| pos += text.len() + nb; |
| has_ident = true; |
| self.tokens.next(); |
| } else if nb > 0 && has_ident { |
| return vec![(TokenKind::Ident, start, pos), (TokenKind::Colon, pos, pos + nb)]; |
| } else if nb > 0 { |
| return vec![(TokenKind::Colon, start, start + nb)]; |
| } else if has_ident { |
| return vec![(TokenKind::Ident, start, pos)]; |
| } else { |
| return Vec::new(); |
| } |
| } |
| } |
| |
| /// Wraps the tokens iteration to ensure that the `byte_pos` is always correct. |
| /// |
| /// It returns the token's kind, the token as a string and its byte position in the source |
| /// string. |
| fn next(&mut self) -> Option<(TokenKind, &'src str, u32)> { |
| if let Some((kind, text)) = self.tokens.next() { |
| let before = self.byte_pos; |
| self.byte_pos += text.len() as u32; |
| Some((kind, text, before)) |
| } else { |
| None |
| } |
| } |
| |
| /// Exhausts the `Classifier` writing the output into `sink`. |
| /// |
| /// The general structure for this method is to iterate over each token, |
| /// possibly giving it an HTML span with a class specifying what flavor of |
| /// token is used. |
| fn highlight(mut self, sink: &mut dyn FnMut(Highlight<'src>)) { |
| loop { |
| if let Some(decs) = self.decorations.as_mut() { |
| let byte_pos = self.byte_pos; |
| let n_starts = decs.starts.iter().filter(|(i, _)| byte_pos >= *i).count(); |
| for (_, kind) in decs.starts.drain(0..n_starts) { |
| sink(Highlight::EnterSpan { class: Class::Decoration(kind) }); |
| } |
| |
| let n_ends = decs.ends.iter().filter(|i| byte_pos >= **i).count(); |
| for _ in decs.ends.drain(0..n_ends) { |
| sink(Highlight::ExitSpan); |
| } |
| } |
| |
| if self |
| .tokens |
| .peek() |
| .map(|t| matches!(t.0, TokenKind::Colon | TokenKind::Ident)) |
| .unwrap_or(false) |
| { |
| let tokens = self.get_full_ident_path(); |
| for (token, start, end) in &tokens { |
| let text = &self.src[*start..*end]; |
| self.advance(*token, text, sink, *start as u32); |
| self.byte_pos += text.len() as u32; |
| } |
| if !tokens.is_empty() { |
| continue; |
| } |
| } |
| if let Some((token, text, before)) = self.next() { |
| self.advance(token, text, sink, before); |
| } else { |
| break; |
| } |
| } |
| } |
| |
| /// Single step of highlighting. This will classify `token`, but maybe also a couple of |
| /// following ones as well. |
| /// |
| /// `before` is the position of the given token in the `source` string and is used as "lo" byte |
| /// in case we want to try to generate a link for this token using the |
| /// `span_correspondence_map`. |
| fn advance( |
| &mut self, |
| token: TokenKind, |
| text: &'src str, |
| sink: &mut dyn FnMut(Highlight<'src>), |
| before: u32, |
| ) { |
| let lookahead = self.peek(); |
| let no_highlight = |sink: &mut dyn FnMut(_)| sink(Highlight::Token { text, class: None }); |
| let class = match token { |
| TokenKind::Whitespace => return no_highlight(sink), |
| TokenKind::LineComment { doc_style } | TokenKind::BlockComment { doc_style, .. } => { |
| if doc_style.is_some() { |
| Class::DocComment |
| } else { |
| Class::Comment |
| } |
| } |
| // Consider this as part of a macro invocation if there was a |
| // leading identifier. |
| TokenKind::Bang if self.in_macro => { |
| self.in_macro = false; |
| sink(Highlight::Token { text, class: None }); |
| sink(Highlight::ExitSpan); |
| return; |
| } |
| |
| // Assume that '&' or '*' is the reference or dereference operator |
| // or a reference or pointer type. Unless, of course, it looks like |
| // a logical and or a multiplication operator: `&&` or `* `. |
| TokenKind::Star => match self.tokens.peek() { |
| Some((TokenKind::Whitespace, _)) => return no_highlight(sink), |
| Some((TokenKind::Ident, "mut")) => { |
| self.next(); |
| sink(Highlight::Token { text: "*mut", class: Some(Class::RefKeyWord) }); |
| return; |
| } |
| Some((TokenKind::Ident, "const")) => { |
| self.next(); |
| sink(Highlight::Token { text: "*const", class: Some(Class::RefKeyWord) }); |
| return; |
| } |
| _ => Class::RefKeyWord, |
| }, |
| TokenKind::And => match self.tokens.peek() { |
| Some((TokenKind::And, _)) => { |
| self.next(); |
| sink(Highlight::Token { text: "&&", class: None }); |
| return; |
| } |
| Some((TokenKind::Eq, _)) => { |
| self.next(); |
| sink(Highlight::Token { text: "&=", class: None }); |
| return; |
| } |
| Some((TokenKind::Whitespace, _)) => return no_highlight(sink), |
| Some((TokenKind::Ident, "mut")) => { |
| self.next(); |
| sink(Highlight::Token { text: "&mut", class: Some(Class::RefKeyWord) }); |
| return; |
| } |
| _ => Class::RefKeyWord, |
| }, |
| |
| // These can either be operators, or arrows. |
| TokenKind::Eq => match lookahead { |
| Some(TokenKind::Eq) => { |
| self.next(); |
| sink(Highlight::Token { text: "==", class: None }); |
| return; |
| } |
| Some(TokenKind::Gt) => { |
| self.next(); |
| sink(Highlight::Token { text: "=>", class: None }); |
| return; |
| } |
| _ => return no_highlight(sink), |
| }, |
| TokenKind::Minus if lookahead == Some(TokenKind::Gt) => { |
| self.next(); |
| sink(Highlight::Token { text: "->", class: None }); |
| return; |
| } |
| |
| // Other operators. |
| TokenKind::Minus |
| | TokenKind::Plus |
| | TokenKind::Or |
| | TokenKind::Slash |
| | TokenKind::Caret |
| | TokenKind::Percent |
| | TokenKind::Bang |
| | TokenKind::Lt |
| | TokenKind::Gt => return no_highlight(sink), |
| |
| // Miscellaneous, no highlighting. |
| TokenKind::Dot |
| | TokenKind::Semi |
| | TokenKind::Comma |
| | TokenKind::OpenParen |
| | TokenKind::CloseParen |
| | TokenKind::OpenBrace |
| | TokenKind::CloseBrace |
| | TokenKind::OpenBracket |
| | TokenKind::At |
| | TokenKind::Tilde |
| | TokenKind::Colon |
| | TokenKind::Unknown => return no_highlight(sink), |
| |
| TokenKind::Question => Class::QuestionMark, |
| |
| TokenKind::Dollar => match lookahead { |
| Some(TokenKind::Ident) => { |
| self.in_macro_nonterminal = true; |
| Class::MacroNonTerminal |
| } |
| _ => return no_highlight(sink), |
| }, |
| |
| // This might be the start of an attribute. We're going to want to |
| // continue highlighting it as an attribute until the ending ']' is |
| // seen, so skip out early. Down below we terminate the attribute |
| // span when we see the ']'. |
| TokenKind::Pound => { |
| match lookahead { |
| // Case 1: #![inner_attribute] |
| Some(TokenKind::Bang) => { |
| self.next(); |
| if let Some(TokenKind::OpenBracket) = self.peek() { |
| self.in_attribute = true; |
| sink(Highlight::EnterSpan { class: Class::Attribute }); |
| } |
| sink(Highlight::Token { text: "#", class: None }); |
| sink(Highlight::Token { text: "!", class: None }); |
| return; |
| } |
| // Case 2: #[outer_attribute] |
| Some(TokenKind::OpenBracket) => { |
| self.in_attribute = true; |
| sink(Highlight::EnterSpan { class: Class::Attribute }); |
| } |
| _ => (), |
| } |
| return no_highlight(sink); |
| } |
| TokenKind::CloseBracket => { |
| if self.in_attribute { |
| self.in_attribute = false; |
| sink(Highlight::Token { text: "]", class: None }); |
| sink(Highlight::ExitSpan); |
| return; |
| } |
| return no_highlight(sink); |
| } |
| TokenKind::Literal { kind, .. } => match kind { |
| // Text literals. |
| LiteralKind::Byte { .. } |
| | LiteralKind::Char { .. } |
| | LiteralKind::Str { .. } |
| | LiteralKind::ByteStr { .. } |
| | LiteralKind::RawStr { .. } |
| | LiteralKind::RawByteStr { .. } |
| | LiteralKind::CStr { .. } |
| | LiteralKind::RawCStr { .. } => Class::String, |
| // Number literals. |
| LiteralKind::Float { .. } | LiteralKind::Int { .. } => Class::Number, |
| }, |
| TokenKind::GuardedStrPrefix => return no_highlight(sink), |
| TokenKind::Ident | TokenKind::RawIdent if lookahead == Some(TokenKind::Bang) => { |
| self.in_macro = true; |
| sink(Highlight::EnterSpan { class: Class::Macro(self.new_span(before, text)) }); |
| sink(Highlight::Token { text, class: None }); |
| return; |
| } |
| TokenKind::Ident => match get_real_ident_class(text, false) { |
| None => match text { |
| "Option" | "Result" => Class::PreludeTy, |
| "Some" | "None" | "Ok" | "Err" => Class::PreludeVal, |
| // "union" is a weak keyword and is only considered as a keyword when declaring |
| // a union type. |
| "union" if self.check_if_is_union_keyword() => Class::KeyWord, |
| _ if self.in_macro_nonterminal => { |
| self.in_macro_nonterminal = false; |
| Class::MacroNonTerminal |
| } |
| "self" | "Self" => Class::Self_(self.new_span(before, text)), |
| _ => Class::Ident(self.new_span(before, text)), |
| }, |
| Some(c) => c, |
| }, |
| TokenKind::RawIdent |
| | TokenKind::UnknownPrefix |
| | TokenKind::InvalidPrefix |
| | TokenKind::InvalidIdent => Class::Ident(self.new_span(before, text)), |
| TokenKind::Lifetime { .. } |
| | TokenKind::RawLifetime |
| | TokenKind::UnknownPrefixLifetime => Class::Lifetime, |
| TokenKind::Eof => panic!("Eof in advance"), |
| }; |
| // Anything that didn't return above is the simple case where we the |
| // class just spans a single token, so we can use the `string` method. |
| sink(Highlight::Token { text, class: Some(class) }); |
| } |
| |
| fn peek(&mut self) -> Option<TokenKind> { |
| self.tokens.peek().map(|(token_kind, _text)| *token_kind) |
| } |
| |
| fn check_if_is_union_keyword(&mut self) -> bool { |
| while let Some(kind) = self.tokens.peek_next().map(|(token_kind, _text)| token_kind) { |
| if *kind == TokenKind::Whitespace { |
| continue; |
| } |
| return *kind == TokenKind::Ident; |
| } |
| false |
| } |
| } |
| |
| /// Called when we start processing a span of text that should be highlighted. |
| /// The `Class` argument specifies how it should be highlighted. |
| fn enter_span( |
| out: &mut impl Write, |
| klass: Class, |
| href_context: &Option<HrefContext<'_, '_>>, |
| ) -> &'static str { |
| string_without_closing_tag(out, "", Some(klass), href_context, true).expect( |
| "internal error: enter_span was called with Some(klass) but did not return a \ |
| closing HTML tag", |
| ) |
| } |
| |
| /// Called at the end of a span of highlighted text. |
| fn exit_span(out: &mut impl Write, closing_tag: &str) { |
| out.write_str(closing_tag).unwrap(); |
| } |
| |
| /// Called for a span of text. If the text should be highlighted differently |
| /// from the surrounding text, then the `Class` argument will be a value other |
| /// than `None`. |
| /// |
| /// The following sequences of callbacks are equivalent: |
| /// ```plain |
| /// enter_span(Foo), string("text", None), exit_span() |
| /// string("text", Foo) |
| /// ``` |
| /// |
| /// The latter can be thought of as a shorthand for the former, which is more |
| /// flexible. |
| /// |
| /// Note that if `context` is not `None` and that the given `klass` contains a `Span`, the function |
| /// will then try to find this `span` in the `span_correspondence_map`. If found, it'll then |
| /// generate a link for this element (which corresponds to where its definition is located). |
| fn string<T: Display>( |
| out: &mut impl Write, |
| text: T, |
| klass: Option<Class>, |
| href_context: &Option<HrefContext<'_, '_>>, |
| open_tag: bool, |
| ) { |
| if let Some(closing_tag) = string_without_closing_tag(out, text, klass, href_context, open_tag) |
| { |
| out.write_str(closing_tag).unwrap(); |
| } |
| } |
| |
| /// This function writes `text` into `out` with some modifications depending on `klass`: |
| /// |
| /// * If `klass` is `None`, `text` is written into `out` with no modification. |
| /// * If `klass` is `Some` but `klass.get_span()` is `None`, it writes the text wrapped in a |
| /// `<span>` with the provided `klass`. |
| /// * If `klass` is `Some` and has a [`rustc_span::Span`], it then tries to generate a link (`<a>` |
| /// element) by retrieving the link information from the `span_correspondence_map` that was filled |
| /// in `span_map.rs::collect_spans_and_sources`. If it cannot retrieve the information, then it's |
| /// the same as the second point (`klass` is `Some` but doesn't have a [`rustc_span::Span`]). |
| fn string_without_closing_tag<T: Display>( |
| out: &mut impl Write, |
| text: T, |
| klass: Option<Class>, |
| href_context: &Option<HrefContext<'_, '_>>, |
| open_tag: bool, |
| ) -> Option<&'static str> { |
| let Some(klass) = klass else { |
| write!(out, "{text}").unwrap(); |
| return None; |
| }; |
| let Some(def_span) = klass.get_span() else { |
| if !open_tag { |
| write!(out, "{text}").unwrap(); |
| return None; |
| } |
| write!(out, "<span class=\"{klass}\">{text}", klass = klass.as_html()).unwrap(); |
| return Some("</span>"); |
| }; |
| |
| let mut text_s = text.to_string(); |
| if text_s.contains("::") { |
| text_s = text_s.split("::").intersperse("::").fold(String::new(), |mut path, t| { |
| match t { |
| "self" | "Self" => write!( |
| &mut path, |
| "<span class=\"{klass}\">{t}</span>", |
| klass = Class::Self_(DUMMY_SP).as_html(), |
| ), |
| "crate" | "super" => { |
| write!( |
| &mut path, |
| "<span class=\"{klass}\">{t}</span>", |
| klass = Class::KeyWord.as_html(), |
| ) |
| } |
| t => write!(&mut path, "{t}"), |
| } |
| .expect("Failed to build source HTML path"); |
| path |
| }); |
| } |
| |
| if let Some(href_context) = href_context { |
| if let Some(href) = |
| href_context.context.shared.span_correspondence_map.get(&def_span).and_then(|href| { |
| let context = href_context.context; |
| // FIXME: later on, it'd be nice to provide two links (if possible) for all items: |
| // one to the documentation page and one to the source definition. |
| // FIXME: currently, external items only generate a link to their documentation, |
| // a link to their definition can be generated using this: |
| // https://github.com/rust-lang/rust/blob/60f1a2fc4b535ead9c85ce085fdce49b1b097531/src/librustdoc/html/render/context.rs#L315-L338 |
| match href { |
| LinkFromSrc::Local(span) => { |
| context.href_from_span_relative(*span, &href_context.current_href) |
| } |
| LinkFromSrc::External(def_id) => { |
| format::href_with_root_path(*def_id, context, Some(href_context.root_path)) |
| .ok() |
| .map(|(url, _, _)| url) |
| } |
| LinkFromSrc::Primitive(prim) => format::href_with_root_path( |
| PrimitiveType::primitive_locations(context.tcx())[prim], |
| context, |
| Some(href_context.root_path), |
| ) |
| .ok() |
| .map(|(url, _, _)| url), |
| LinkFromSrc::Doc(def_id) => { |
| format::href_with_root_path(*def_id, context, Some(&href_context.root_path)) |
| .ok() |
| .map(|(doc_link, _, _)| doc_link) |
| } |
| } |
| }) |
| { |
| if !open_tag { |
| // We're already inside an element which has the same klass, no need to give it |
| // again. |
| write!(out, "<a href=\"{href}\">{text_s}").unwrap(); |
| } else { |
| let klass_s = klass.as_html(); |
| if klass_s.is_empty() { |
| write!(out, "<a href=\"{href}\">{text_s}").unwrap(); |
| } else { |
| write!(out, "<a class=\"{klass_s}\" href=\"{href}\">{text_s}").unwrap(); |
| } |
| } |
| return Some("</a>"); |
| } |
| } |
| if !open_tag { |
| write!(out, "{}", text_s).unwrap(); |
| return None; |
| } |
| let klass_s = klass.as_html(); |
| if klass_s.is_empty() { |
| out.write_str(&text_s).unwrap(); |
| Some("") |
| } else { |
| write!(out, "<span class=\"{klass_s}\">{text_s}").unwrap(); |
| Some("</span>") |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests; |