vendor/mdbook-i18n-helpers-0.3.5/src/lib.rs - toolchain/rustc - Git at Google

 // Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 //! Helpers for translating `mdbook` projects.
 //!
 //! The functions here are used to implement a robust
 //! internationalization (i18n) workflow for `mdbook`. This allows you
 //! to translate your books into other languages while also making it
 //! easy to keep the translations up to date as you edit the original
 //! source text.
 //!
 //! See <https://github.com/google/mdbook-i18n-helpers> for details on
 //! how to use the supplied `mdbook` plugins.

 use polib::catalog::Catalog;
 use pulldown_cmark::{
     BrokenLinkCallback, CodeBlockKind, DefaultBrokenLinkCallback, Event, LinkType, Tag, TagEnd,
 };
 use pulldown_cmark_to_cmark::{
     calculate_code_block_token_count, cmark_resume_with_options, Options, State,
 };
 use std::sync::OnceLock;
 use syntect::easy::ScopeRangeIterator;
 use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxSet};

 pub mod directives;
 pub mod gettext;
 pub mod normalize;
 pub mod preprocessors;
 pub mod xgettext;

 /// Re-wrap the sources field of a message.
 ///
 /// This function tries to wrap the `file:lineno` pairs so they look
 /// the same as what you get from `msgcat` or `msgmerge`.
 pub fn wrap_sources(sources: &str) -> String {
     let options = textwrap::Options::new(76)
         .break_words(false)
         .word_splitter(textwrap::WordSplitter::NoHyphenation);
     textwrap::refill(sources, options)
 }

 /// Like `mdbook::utils::new_cmark_parser`, but also passes a
 /// `BrokenLinkCallback`.
 pub fn new_cmark_parser<'input, F: BrokenLinkCallback<'input>>(
     text: &'input str,
     broken_link_callback: Option<F>,
 ) -> pulldown_cmark::Parser<'input, F> {
     let mut options = pulldown_cmark::Options::empty();
     options.insert(pulldown_cmark::Options::ENABLE_TABLES);
     options.insert(pulldown_cmark::Options::ENABLE_OLD_FOOTNOTES);
     options.insert(pulldown_cmark::Options::ENABLE_STRIKETHROUGH);
     options.insert(pulldown_cmark::Options::ENABLE_TASKLISTS);
     options.insert(pulldown_cmark::Options::ENABLE_HEADING_ATTRIBUTES);
     pulldown_cmark::Parser::new_with_broken_link_callback(text, options, broken_link_callback)
 }

 /// Extract Markdown events from `text`.
 ///
 /// The `state` can be used to give the parsing context. In
 /// particular, if a code block has started, the text should be parsed
 /// without interpreting special Markdown characters.
 ///
 /// The events are labeled with the line number where they start in
 /// the document.
 ///
 /// # Examples
 ///
 /// ```
 /// use mdbook_i18n_helpers::extract_events;
 /// use pulldown_cmark::{Event, Tag, TagEnd};
 ///
 /// assert_eq!(
 ///     extract_events("Hello,\nworld!", None),
 ///     vec![
 ///         (1, Event::Start(Tag::Paragraph)),
 ///         (1, Event::Text("Hello,".into())),
 ///         (1, Event::Text(" ".into())),
 ///         (2, Event::Text("world!".into())),
 ///         (1, Event::End(TagEnd::Paragraph)),
 ///     ]
 /// );
 /// ```
 pub fn extract_events<'a>(text: &'a str, state: Option<State<'a>>) -> Vec<(usize, Event<'a>)> {
     // Expand a `[foo]` style link into `[foo][foo]`.
     fn expand_shortcut_link(tag: Tag<'_>) -> Tag<'_> {
         match tag {
             Tag::Link {
                 link_type: LinkType::Shortcut,
                 dest_url,
                 title,
                 id,
             } => Tag::Link {
                 link_type: LinkType::Reference,
                 dest_url,
                 title,
                 id,
             },
             Tag::Image {
                 link_type: LinkType::Shortcut,
                 dest_url,
                 title,
                 id,
             } => Tag::Image {
                 link_type: LinkType::Reference,
                 dest_url,
                 title,
                 id,
             },
             _ => tag,
         }
     }

     // Offsets of each newline in the input, used to calculate line
     // numbers from byte offsets.
     let offsets = text
         .match_indices('\n')
         .map(|(offset, _)| offset)
         .collect::<Vec<_>>();

     match state {
         // If we're in a code block, we disable the normal parsing and
         // return lines of text. This matches the behavior of the
         // parser in this case.
         Some(state) if state.is_in_code_block => text
             .split_inclusive('\n')
             .enumerate()
             .map(|(idx, line)| (idx + 1, Event::Text(line.into())))
             .collect(),
         // Otherwise, we parse the text line normally.
         _ => new_cmark_parser::<'a, DefaultBrokenLinkCallback>(text, None)
             .into_offset_iter()
             .map(|(event, range)| {
                 let lineno = offsets.partition_point(|&o| o < range.start) + 1;
                 let event = match event {
                     Event::SoftBreak => Event::Text(" ".into()),
                     // Shortcut links like "[foo]" end up as "[foo]"
                     // in output. By changing them to a reference
                     // link, the link is expanded on the fly and the
                     // output becomes self-contained.
                     Event::Start(tag @ (Tag::Link { .. } | Tag::Image { .. })) => {
                         Event::Start(expand_shortcut_link(tag))
                     }
                     _ => event,
                 };
                 (lineno, event)
             })
             .collect(),
     }
 }

 /// Markdown events grouped by type.
 #[derive(Debug, Clone, PartialEq)]
 pub enum Group<'a> {
     /// Markdown events which should be translated.
     ///
     /// This includes `[Text("foo")]` as well as sequences with text
     /// such as `[Start(Emphasis), Text("foo") End(Emphasis)]`.
     Translate {
         events: Vec<(usize, Event<'a>)>,
         /// A comment that may be associated with the translation text.
         comment: String,
     },

     /// Markdown events which should be skipped when translating.
     ///
     /// This includes structural events such as `Start(Heading(H1,
     /// None, vec![]))`.
     Skip(Vec<(usize, Event<'a>)>),
 }

 #[derive(Debug, Default)]
 struct GroupingContext {
     skip_next_group: bool,
     comments: Vec<String>,
 }

 impl GroupingContext {
     fn clear_skip_next_group(self) -> Self {
         Self {
             skip_next_group: false,
             ..self
         }
     }
 }

 /// Group Markdown events into translatable and skipped events.
 ///
 /// This function will partition the input events into groups of
 /// events which should be translated or skipped. Concatenating the
 /// events in each group will give you back the original events.
 ///
 /// # Examples
 ///
 /// ```
 /// use mdbook_i18n_helpers::{extract_events, group_events, Group};
 /// use pulldown_cmark::{Event, Tag, TagEnd};
 ///
 /// let events = extract_events("- A list item.", None);
 /// assert_eq!(
 ///     events,
 ///     vec![
 ///         (1, Event::Start(Tag::List(None))),
 ///         (1, Event::Start(Tag::Item)),
 ///         (1, Event::Text("A list item.".into())),
 ///         (1, Event::End(TagEnd::Item)),
 ///         (1, Event::End(TagEnd::List(false))),
 ///     ],
 /// );
 ///
 /// let groups = group_events(&events);
 /// assert_eq!(
 ///     groups,
 ///     vec![
 ///         Group::Skip(vec![
 ///             (1, Event::Start(Tag::List(None))),
 ///             (1, Event::Start(Tag::Item)),
 ///         ]),
 ///         Group::Translate {
 ///             events: vec![
 ///                 (1, Event::Text("A list item.".into())),
 ///             ], comment: "".into()},
 ///         Group::Skip(vec![
 ///             (1, Event::End(TagEnd::Item)),
 ///             (1, Event::End(TagEnd::List(false))),
 ///         ]),
 ///     ]
 /// );
 /// ```
 pub fn group_events<'a>(events: &'a [(usize, Event<'a>)]) -> Vec<Group<'a>> {
     #[derive(Debug)]
     enum State {
         Translate(usize),
         Skip(usize),
     }

     impl State {
         /// Creates groups based on the capturing state and context.
         fn into_groups<'a>(
             self,
             idx: usize,
             events: &'a [(usize, Event<'a>)],
             mut ctx: GroupingContext,
         ) -> (Vec<Group<'a>>, GroupingContext) {
             match self {
                 State::Translate(start) => {
                     if ctx.skip_next_group {
                         (
                             vec![Group::Skip(events[start..idx].into())],
                             ctx.clear_skip_next_group(),
                         )
                     } else if is_codeblock_group(&events[start..idx]) {
                         parse_codeblock(&events[start..idx], ctx)
                     } else {
                         (
                             vec![Group::Translate {
                                 events: events[start..idx].into(),
                                 comment: std::mem::take(&mut ctx.comments).join(" "),
                             }],
                             ctx,
                         )
                     }
                 }
                 State::Skip(start) => (vec![Group::Skip(events[start..idx].into())], ctx),
             }
         }
     }

     let mut groups = Vec::new();
     let mut state = State::Skip(0);
     let mut ctx = GroupingContext::default();

     for (idx, (_, event)) in events.iter().enumerate() {
         match event {
             // These block-level events force new groups. We do this
             // because we want to include these events in the group to
             // make the group self-contained.
             Event::Start(Tag::Paragraph | Tag::CodeBlock(..)) => {
                 // A translatable group starts here.
                 let mut next_groups;
                 (next_groups, ctx) = state.into_groups(idx, events, ctx);
                 groups.append(&mut next_groups);

                 state = State::Translate(idx);
             }
             Event::End(TagEnd::Paragraph | TagEnd::CodeBlock) => {
                 // A translatable group ends after `idx`.
                 let idx = idx + 1;
                 let mut next_groups;
                 (next_groups, ctx) = state.into_groups(idx, events, ctx);
                 groups.append(&mut next_groups);

                 state = State::Skip(idx);
             }

             // Inline events start or continue a translating group.
             Event::Start(
                 Tag::Emphasis
                 | Tag::Strong
                 | Tag::Strikethrough
                 | Tag::Link { .. }
                 | Tag::Image { .. },
             )
             | Event::End(
                 TagEnd::Emphasis
                 | TagEnd::Strong
                 | TagEnd::Strikethrough
                 | TagEnd::Link
                 | TagEnd::Image,
             )
             | Event::Text(_)
             | Event::Code(_)
             | Event::FootnoteReference(_)
             | Event::SoftBreak
             | Event::HardBreak => {
                 // If we're currently skipping, then a new
                 // translatable group starts here.
                 if let State::Skip(_) = state {
                     let mut next_groups;
                     (next_groups, ctx) = state.into_groups(idx, events, ctx);
                     groups.append(&mut next_groups);

                     state = State::Translate(idx);
                 }
             }

             Event::Html(s) | Event::InlineHtml(s) => {
                 match directives::find(s) {
                     Some(directives::Directive::Skip) => {
                         // If in the middle of translation, finish it.
                         if let State::Translate(_) = state {
                             let mut next_groups;
                             (next_groups, ctx) = state.into_groups(idx, events, ctx);
                             groups.append(&mut next_groups);

                             // Restart translation: subtle but should be
                             // needed to handle the skipping of the rest of
                             // the inlined content.
                             state = State::Translate(idx);
                         }

                         ctx.skip_next_group = true;
                     }

                     Some(directives::Directive::Comment(comment)) => {
                         // If in the middle of translation, finish it.
                         if let State::Translate(_) = state {
                             let mut next_groups;
                             (next_groups, ctx) = state.into_groups(idx, events, ctx);
                             groups.append(&mut next_groups);

                             // Restart translation: subtle but should be
                             // needed to handle the skipping of the rest of
                             // the inlined content.
                             state = State::Translate(idx);
                         }

                         ctx.comments.push(comment);
                     }
                     _ => {
                         match event {
                             Event::Html(_) => {
                                 // Otherwise, treat as a skipping group if this is a block level Html tag
                                 if let State::Translate(_) = state {
                                     let mut next_groups;
                                     (next_groups, ctx) = state.into_groups(idx, events, ctx);
                                     groups.append(&mut next_groups);

                                     state = State::Skip(idx);
                                 }
                             }
                             Event::InlineHtml(_) =>
                             // If we're currently skipping, then a new
                             // translatable group starts here.
                             {
                                 if let State::Skip(_) = state {
                                     let mut next_groups;
                                     (next_groups, ctx) = state.into_groups(idx, events, ctx);
                                     groups.append(&mut next_groups);

                                     state = State::Translate(idx);
                                 }
                             }
                             // this code is inside a match of Event::{Html|InlineHtml}, other types are not possible
                             _ => unreachable!(),
                         }
                     }
                 }
             }

             // All other block-level events start or continue a
             // skipping group.
             _ => {
                 if let State::Translate(_) = state {
                     let mut next_groups;
                     (next_groups, ctx) = state.into_groups(idx, events, ctx);
                     groups.append(&mut next_groups);

                     state = State::Skip(idx);
                 }
             }
         }
     }

     match state {
         State::Translate(start) => groups.push(Group::Translate {
             events: events[start..].into(),
             comment: "".into(),
         }),
         State::Skip(start) => groups.push(Group::Skip(events[start..].into())),
     }

     groups
 }

 /// Returns true if the events appear to be a codeblock.
 fn is_codeblock_group(events: &[(usize, Event<'_>)]) -> bool {
     matches!(
         events,
         [
             (_, Event::Start(Tag::CodeBlock(_))),
             ..,
             (_, Event::End(TagEnd::CodeBlock))
         ]
     )
 }

 /// Returns true if the scope should be translated.
 fn is_translate_scope(x: Scope) -> bool {
     static SCOPE_STRING: OnceLock<Scope> = OnceLock::new();
     static SCOPE_COMMENT: OnceLock<Scope> = OnceLock::new();

     let scope_string = SCOPE_STRING.get_or_init(|| Scope::new("string").unwrap());
     let scope_comment = SCOPE_COMMENT.get_or_init(|| Scope::new("comment").unwrap());
     scope_string.is_prefix_of(x) || scope_comment.is_prefix_of(x)
 }

 /// Creates groups by checking codeblock with heuristic way.
 fn heuristic_codeblock<'a>(
     events: &'a [(usize, Event<'_>)],
     mut ctx: GroupingContext,
 ) -> (Vec<Group<'a>>, GroupingContext) {
     let is_translate = match events {
         [(_, Event::Start(Tag::CodeBlock(_))), .., (_, Event::End(TagEnd::CodeBlock))] => {
             let (codeblock_text, _) = reconstruct_markdown(events, None);
             // Heuristic to check whether the codeblock nether has a
             // literal string nor a line comment.  We may actually
             // want to use a lexer here to make this more robust.
             codeblock_text.contains('"') || codeblock_text.contains("//")
         }
         _ => true,
     };

     if is_translate {
         (
             vec![Group::Translate {
                 events: events.into(),
                 comment: std::mem::take(&mut ctx.comments).join(" "),
             }],
             ctx,
         )
     } else {
         (vec![Group::Skip(events.into())], ctx)
     }
 }

 /// Creates groups by parsing codeblock.
 fn parse_codeblock<'a>(
     events: &'a [(usize, Event<'_>)],
     mut ctx: GroupingContext,
 ) -> (Vec<Group<'a>>, GroupingContext) {
     // Language detection from language identifier of codeblock.
     static SYNTAX_SET: OnceLock<SyntaxSet> = OnceLock::new();
     let ss = SYNTAX_SET.get_or_init(SyntaxSet::load_defaults_newlines);

     let syntax = if let (_, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(x)))) = &events[0] {
         ss.find_syntax_by_token(x.split(',').next().unwrap())
     } else {
         None
     };

     let Some(syntax) = syntax else {
         // If there is no language specifier, falling back to heuristic way.
         return heuristic_codeblock(events, ctx);
     };

     let mut ps = ParseState::new(syntax);
     let mut ret = vec![];

     for (idx, event) in events.iter().enumerate() {
         match event {
             (text_line, Event::Text(text)) => {
                 let mut stack = ScopeStack::new();
                 let mut stack_failure = false;

                 let Ok(ops) = ps.parse_line(text, ss) else {
                     // If parse is failed, the text event should be translated.
                     ret.push(Group::Translate {
                         events: events[idx..idx + 1].into(),
                         comment: std::mem::take(&mut ctx.comments).join(" "),
                     });
                     continue;
                 };

                 let mut translate_events = vec![];
                 let mut groups = vec![];

                 for (range, op) in ScopeRangeIterator::new(&ops, text) {
                     if stack.apply(op).is_err() {
                         stack_failure = true;
                         break;
                     }

                     if range.is_empty() {
                         continue;
                     }

                     // Calculate line number of the range
                     let range_line = if range.start == 0 {
                         *text_line
                     } else {
                         text_line + text[0..range.start].lines().count() - 1
                     };

                     let text = &text[range];

                     // Whitespaces between translate texts should be added to translate
                     // group.
                     // So all whitespaces are added to the translate events buffer temporary,
                     // and the trailing whitespaces will be remvoed finally.
                     let is_whitespace = text.trim_matches(&[' ', '\t'] as &[_]).is_empty();

                     let is_translate = stack.scopes.iter().any(|x| is_translate_scope(*x));

                     if is_translate || (is_whitespace && !translate_events.is_empty()) {
                         translate_events.push((range_line, Event::Text(text.into())));
                     } else {
                         let whitespace_events = extract_trailing_whitespaces(&mut translate_events);
                         if !translate_events.is_empty() {
                             groups.push(Group::Translate {
                                 events: std::mem::take(&mut translate_events),
                                 comment: std::mem::take(&mut ctx.comments).join(" "),
                             });
                         }
                         if !whitespace_events.is_empty() {
                             groups.push(Group::Skip(whitespace_events));
                         }
                         groups.push(Group::Skip(vec![(range_line, Event::Text(text.into()))]));
                     }
                 }

                 let whitespace_events = extract_trailing_whitespaces(&mut translate_events);
                 if !translate_events.is_empty() {
                     groups.push(Group::Translate {
                         events: std::mem::take(&mut translate_events),
                         comment: std::mem::take(&mut ctx.comments).join(" "),
                     });
                 }
                 if !whitespace_events.is_empty() {
                     groups.push(Group::Skip(whitespace_events));
                 }

                 if stack_failure {
                     // If stack operation is failed, the text event should be translated.
                     ret.push(Group::Translate {
                         events: events[idx..idx + 1].into(),
                         comment: std::mem::take(&mut ctx.comments).join(" "),
                     });
                 } else {
                     ret.append(&mut groups);
                 }
             }
             _ => {
                 ret.push(Group::Skip(events[idx..idx + 1].into()));
             }
         }
     }
     (ret, ctx)
 }

 /// Extract trailing events which have whitespace only.
 fn extract_trailing_whitespaces<'a>(buf: &mut Vec<(usize, Event<'a>)>) -> Vec<(usize, Event<'a>)> {
     let mut ret = vec![];

     while let Some(last) = buf.last() {
         match &last.1 {
             Event::Text(text) if text.as_ref().trim_matches(&[' ', '\t'] as &[_]).is_empty() => {
                 let last = buf.pop().unwrap();
                 ret.push(last);
             }
             _ => break,
         }
     }
     ret.reverse();
     ret
 }

 /// Render a slice of Markdown events back to Markdown.
 ///
 /// # Examples
 ///
 /// ```
 /// use mdbook_i18n_helpers::{extract_events, reconstruct_markdown};
 /// use pulldown_cmark::{Event, Tag};
 ///
 /// let group = extract_events("Hello *world!*", None);
 /// let (reconstructed, _) = reconstruct_markdown(&group, None);
 /// assert_eq!(reconstructed, "Hello _world!_");
 /// ```
 ///
 /// Notice how this will normalize the Markdown to use `_` for
 /// emphasis and `**` for strong emphasis. The style is chosen to
 /// match the [Google developer documentation style
 /// guide](https://developers.google.com/style/text-formatting).
 pub fn reconstruct_markdown<'a>(
     group: &[(usize, Event<'a>)],
     state: Option<State<'a>>,
 ) -> (String, State<'a>) {
     let events = group.iter().map(|(_, event)| event);
     let code_block_token_count = calculate_code_block_token_count(events.clone()).unwrap_or(3);
     let mut markdown = String::new();
     let options = Options {
         code_block_token_count,
         list_token: '-',
         emphasis_token: '_',
         strong_token: "**",
         ..Options::default()
     };
     // Advance the true state, but throw away the rendered Markdown
     // since it can contain unwanted padding.
     let new_state = cmark_resume_with_options(
         events.clone(),
         String::new(),
         state.clone(),
         options.clone(),
     )
     .unwrap();

     // Block quotes and lists add padding to the state, which is
     // reflected in the rendered Markdown. We want to capture the
     // Markdown without the padding to remove the effect of these
     // structural elements. Similarly, we don't want extra newlines at
     // the start.
     let simplified_state = state.map(|state| State {
         newlines_before_start: 0,
         padding: Vec::new(),
         ..state
     });
     cmark_resume_with_options(events, &mut markdown, simplified_state, options).unwrap();
     // Even with `newlines_before_start` set to zero, we get a leading
     // `\n` for code blocks (since they must start on a new line). We
     // can safely trim this here since we know that we always
     // reconstruct Markdown for a self-contained group of events.
     (String::from(markdown.trim_start_matches('\n')), new_state)
 }

 #[derive(Debug, PartialEq)]
 pub struct ExtractedMessage {
     pub message: String,
     pub comment: String,
 }
 impl From<&str> for ExtractedMessage {
     fn from(s: &str) -> Self {
         ExtractedMessage {
             message: s.to_owned(),
             comment: "".into(),
         }
     }
 }

 /// Extract translatable strings from `document`.
 ///
 /// # Examples
 ///
 /// Structural markup like headings and lists are removed from the
 /// messages:
 ///
 /// ```
 /// use mdbook_i18n_helpers::extract_messages;
 ///
 /// assert_eq!(
 ///     extract_messages("# A heading"),
 ///     vec![(1, "A heading".into())],
 /// );
 /// assert_eq!(
 ///     extract_messages(
 ///         "1. First item\n\
 ///          2. Second item\n"
 ///     ),
 ///     vec![
 ///         (1, "First item".into()),
 ///         (2, "Second item".into()),
 ///     ],
 /// );
 /// ```
 ///
 /// Indentation due to structural elements like block quotes and lists
 /// is ignored:
 ///
 /// ```
 /// use mdbook_i18n_helpers::extract_messages;
 ///
 /// let messages = extract_messages(
 ///     "> *   Hello, this is a\n\
 ///      >     list in a quote.\n\
 ///      >\n\
 ///      >     This is the second\n\
 ///      >     paragraph.\n"
 /// );
 /// assert_eq!(
 ///     messages,
 ///     vec![
 ///         (1, "Hello, this is a list in a quote.".into()),
 ///         (4, "This is the second paragraph.".into()),
 ///     ],
 /// );
 /// ```
 pub fn extract_messages(document: &str) -> Vec<(usize, ExtractedMessage)> {
     let events = extract_events(document, None);
     let mut messages = Vec::new();
     let mut state = None;

     for group in group_events(&events) {
         match group {
             Group::Translate { events, comment } => {
                 if let Some((lineno, _)) = events.first() {
                     let (text, new_state) = reconstruct_markdown(&events, state);
                     // Skip empty messages since they are special:
                     // they contains the PO file metadata.
                     if !text.trim().is_empty() {
                         messages.push((
                             *lineno,
                             ExtractedMessage {
                                 message: text,
                                 comment,
                             },
                         ));
                     }
                     state = Some(new_state);
                 }
             }
             Group::Skip(events) => {
                 let (_, new_state) = reconstruct_markdown(&events, state);
                 state = Some(new_state);
             }
         }
     }

     messages
 }

 /// Trim `new_events` if they're wrapped in an unwanted paragraph.
 ///
 /// If `new_events` is wrapped in a paragraph and `old_events` isn't,
 /// then the paragraph is removed. This is useful when a text event
 /// has been wrapped in a paragraph:
 ///
 /// ```
 /// use pulldown_cmark::{Event, Tag, TagEnd};
 /// use mdbook_i18n_helpers::{extract_events, reconstruct_markdown, trim_paragraph};
 ///
 /// let old_events = vec![(1, Event::Text("A line of text".into()))];
 /// let (markdown, _) = reconstruct_markdown(&old_events, None);
 /// let new_events = extract_events(&markdown, None);
 /// // The stand-alone text has been wrapped in an extra paragraph:
 /// assert_eq!(
 ///     new_events,
 ///     &[
 ///         (1, Event::Start(Tag::Paragraph)),
 ///         (1, Event::Text("A line of text".into())),
 ///         (1, Event::End(TagEnd::Paragraph)),
 ///     ],
 /// );
 ///
 /// assert_eq!(
 ///     trim_paragraph(&new_events, &old_events),
 ///     &[(1, Event::Text("A line of text".into()))],
 /// );
 /// ```
 pub fn trim_paragraph<'a, 'event>(
     new_events: &'a [(usize, Event<'event>)],
     old_events: &'a [(usize, Event<'event>)],
 ) -> &'a [(usize, Event<'event>)] {
     use pulldown_cmark::Event::{End, Start};
     use pulldown_cmark::Tag::Paragraph;
     match new_events {
         [(_, Start(Paragraph)), inner @ .., (_, End(TagEnd::Paragraph))] => match old_events {
             [(_, Start(Paragraph)), .., (_, End(TagEnd::Paragraph))] => new_events,
             [..] => inner,
         },
         [..] => new_events,
     }
 }

 /// Translate `events` using `catalog`.
 pub fn translate_events<'a>(
     events: &'a [(usize, Event<'a>)],
     catalog: &'a Catalog,
 ) -> Vec<(usize, Event<'a>)> {
     let mut translated_events = Vec::new();
     let mut state = None;

     for group in group_events(events) {
         match group {
             Group::Translate { events, .. } => {
                 // Reconstruct the message.
                 let (msgid, new_state) = reconstruct_markdown(&events, state.clone());
                 let translated = catalog
                     .find_message(None, &msgid, None)
                     .filter(|msg| !msg.flags().is_fuzzy() && msg.is_translated())
                     .and_then(|msg| msg.msgstr().ok());
                 match translated {
                     Some(msgstr) => {
                         // Generate new events for `msgstr`, taking
                         // care to trim away unwanted paragraphs.
                         translated_events.extend_from_slice(trim_paragraph(
                             &extract_events(msgstr, state),
                             &events,
                         ));
                     }
                     None => translated_events.extend_from_slice(&events),
                 }
                 // Advance the state.
                 state = Some(new_state);
             }
             Group::Skip(events) => {
                 // Copy the events unchanged to the output.
                 translated_events.extend_from_slice(&events);
                 // Advance the state.
                 let (_, new_state) = reconstruct_markdown(&events, state);
                 state = Some(new_state);
             }
         }
     }

     translated_events
 }

 #[cfg(test)]
 mod tests {
     use super::*;
     use pretty_assertions::assert_eq;
     use pulldown_cmark::CodeBlockKind;
     use pulldown_cmark::Event::*;
     use pulldown_cmark::HeadingLevel::*;
     use pulldown_cmark::Tag::*;

     /// Extract messages in `document`, assert they match `expected`.
     #[track_caller]
     fn assert_extract_messages(document: &str, expected: &[(usize, &str)]) {
         assert_eq!(
             extract_messages(document)
                 .iter()
                 .map(|(lineno, msg)| (*lineno, &msg.message[..]))
                 .collect::<Vec<_>>(),
             expected,
         );
     }

     #[test]
     fn extract_events_empty() {
         assert_eq!(extract_events("", None), vec![]);
     }

     #[test]
     fn extract_events_paragraph() {
         assert_eq!(
             extract_events("foo bar", None),
             vec![
                 (1, Start(Paragraph)),
                 (1, Text("foo bar".into())),
                 (1, End(TagEnd::Paragraph)),
             ]
         );
     }

     #[test]
     fn extract_events_softbreak() {
         assert_eq!(
             extract_events("foo\nbar", None),
             vec![
                 (1, Start(Paragraph)),
                 (1, Text("foo".into())),
                 (1, Text(" ".into())),
                 (2, Text("bar".into())),
                 (1, End(TagEnd::Paragraph)),
             ]
         );
     }

     #[test]
     fn extract_events_heading() {
         assert_eq!(
             extract_events("# Foo Bar", None),
             vec![
                 (
                     1,
                     Start(Tag::Heading {
                         level: H1,
                         id: None,
                         classes: vec![],
                         attrs: vec![]
                     })
                 ),
                 (1, Text("Foo Bar".into())),
                 (1, End(TagEnd::Heading(H1))),
             ]
         );
     }

     #[test]
     fn extract_events_list_item() {
         assert_eq!(
             extract_events("* foo bar", None),
             vec![
                 (1, Start(List(None))),
                 (1, Start(Item)),
                 (1, Text("foo bar".into())),
                 (1, End(TagEnd::Item)),
                 (1, End(TagEnd::List(false))),
             ]
         );
     }

     #[test]
     fn extract_events_code_block() {
         let (_, state) =
             reconstruct_markdown(&[(1, Start(CodeBlock(CodeBlockKind::Indented)))], None);
         assert_eq!(
             extract_events("foo\nbar\nbaz", Some(state)),
             vec![
                 (1, Text("foo\n".into())),
                 (2, Text("bar\n".into())),
                 (3, Text("baz".into())),
             ]
         );

         // Compare with extraction without state:
         assert_eq!(
             extract_events("foo\nbar\nbaz", None),
             vec![
                 (1, Start(Paragraph)),
                 (1, Text("foo".into())),
                 (1, Text(" ".into())),
                 (2, Text("bar".into())),
                 (2, Text(" ".into())),
                 (3, Text("baz".into())),
                 (1, End(TagEnd::Paragraph)),
             ]
         );
     }

     #[test]
     fn extract_events_comments() {
         assert_eq!(
             extract_events("<!-- mdbook-xgettext:skip -->\nHello", None),
             vec![
                 (1, Start(HtmlBlock)),
                 (1, Html("<!-- mdbook-xgettext:skip -->\n".into())),
                 (1, End(TagEnd::HtmlBlock)),
                 (2, Start(Paragraph)),
                 (2, Text("Hello".into())),
                 (2, End(TagEnd::Paragraph)),
             ]
         );
     }

     #[test]
     fn extract_messages_empty() {
         assert_extract_messages("", &[]);
     }

     #[test]
     fn extract_messages_keep_empty_inline_html() {
         // Keep inline html tags
         assert_extract_messages("<span></span>", &[(1, "<span></span>")]);
     }

     #[test]
     fn extract_messages_keep_whitespace_inline_html() {
         // span is an inline html tag so even whitespace is kept as is
         assert_extract_messages("<span>  </span>", &[(1, "<span>  </span>")]);
     }

     #[test]
     fn extract_messages_ignore_whitespace_only_block_html() {
         // Whitespace in block level html tags is ignored
         assert_extract_messages("<p>  </p>", &[]);
     }

     #[test]
     fn extract_messages_single_line() {
         assert_extract_messages("This is a paragraph.", &[(1, "This is a paragraph.")]);
     }

     #[test]
     fn extract_messages_simple() {
         assert_extract_messages(
             "This is\n\
              the first\n\
              paragraph.🦀\n\
              \n\
              Second paragraph.",
             &[
                 (1, "This is the first paragraph.🦀"),
                 (5, "Second paragraph."),
             ],
         );
     }

     #[test]
     fn extract_messages_leading_newlines() {
         assert_extract_messages(
             "\n\
              \n\
              \n\
              This is the\n\
              first paragraph.",
             &[(4, "This is the first paragraph.")],
         );
     }

     #[test]
     fn extract_messages_trailing_newlines() {
         assert_extract_messages(
             "This is\n\
              a paragraph.\n\
              \n\
              \n",
             &[(1, "This is a paragraph.")],
         );
     }

     #[test]
     fn extract_messages_styled_text() {
         // The parser normalizes "*emphasis*" to "_emphasis_" and
         // "__strong emphasis__" to "**strong emphasis**".
         assert_extract_messages(
             "**This** __~~message~~__ _has_ `code` *style*\n",
             &[(1, "**This** **~~message~~** _has_ `code` _style_")],
         );
     }

     #[test]
     fn extract_messages_inline_html() {
         // Inline HTML tag is kept as is in the translation.
         assert_extract_messages(
             "Hi from <span dir=\"ltr\">Rust</div>",
             &[(1, "Hi from <span dir=\"ltr\">Rust</div>")],
         );
     }

     #[test]
     fn extract_messages_block_html() {
         // block level HTML tag is skipped, but text inside is extracted.
         assert_extract_messages(
             "<div class=\"warning\">\n\
             \n\
             Beware of the dog!\n\
             \n\
             </div>",
             &[(3, "Beware of the dog!")],
         );
     }

     #[test]
     fn extract_messages_mixed_html() {
         // block level HTML tag is skipped, but text inside is extracted with inline html as is.
         assert_extract_messages(
             "<div>\n\
             \n\
             Hi from <span dir=\"ltr\">Rust</span>\n\
             \n\
             </div>",
             &[(3, "Hi from <span dir=\"ltr\">Rust</span>")],
         );
     }

     #[test]
     fn extract_messages_inline_link() {
         assert_extract_messages(
             "See [this page](https://example.com) for more info.",
             &[(1, "See [this page](https://example.com) for more info.")],
         );
     }

     #[test]
     fn extract_messages_reference_link() {
         assert_extract_messages(
             "See [this page][1] for more info.\n\n\
              [1]: https://example.com",
             // The parser expands reference links on the fly.
             &[(1, "See [this page](https://example.com) for more info.")],
         );
     }

     #[test]
     fn extract_messages_collapsed_link() {
         // We make the parser expand collapsed links on the fly.
         assert_extract_messages(
             "Click [here][]!\n\n\
              [here]: http://example.net/",
             &[(1, "Click [here](http://example.net/)!")],
         );
     }

     #[test]
     fn extract_messages_shortcut_link() {
         assert_extract_messages(
             "Click [here]!\n\n\
              [here]: http://example.net/",
             &[(1, "Click [here](http://example.net/)!")],
         );
     }

     #[test]
     fn extract_messages_autolink() {
         assert_extract_messages(
             "Visit <http://example.net>!",
             &[(1, "Visit <http://example.net>!")],
         );
     }

     #[test]
     fn extract_messages_email() {
         assert_extract_messages(
             "Contact <[email protected]>!",
             &[(1, "Contact <[email protected]>!")],
         );
     }

     #[test]
     fn extract_messages_broken_reference_link() {
         // A reference link without the corresponding link definition
         // results in an escaped link.
         //
         // See `SourceMap::extract_messages` for a more complex
         // approach which can work around this in some cases.
         assert_extract_messages("[foo][unknown]", &[(1, r"\[foo\]\[unknown\]")]);
     }

     #[test]
     fn extract_messages_footnotes() {
         assert_extract_messages(
             "
 The document[^1] text.

 [^1]: The footnote text.
 ",
             &[
                 (2, "The document[^1] text."), //
                 (4, "The footnote text."),
             ],
         );
     }

     #[test]
     fn extract_messages_block_quote() {
         assert_extract_messages(
             r"One of my favorite quotes is:

 > Don't believe everything you read on the Internet.
 >
 > I didn't say this second part, but I needed a paragraph for testing.

 --Abraham Lincoln
 ",
             &[
                 (1, "One of my favorite quotes is:"),
                 (3, "Don't believe everything you read on the Internet."),
                 (
                     5,
                     "I didn't say this second part, but I needed a paragraph for testing.",
                 ),
                 (7, "\\--Abraham Lincoln"),
             ],
         );
     }

     #[test]
     fn extract_messages_table() {
         let input = "\
             | Module Type       | Description\n\
             |-------------------|-------------------------\n\
             | `rust_binary`     | Produces a Rust binary.\n\
             | `rust_library`    | Produces a Rust library.\n\
         ";
         assert_extract_messages(
             input,
             &[
                 (1, "Module Type"),
                 (1, "Description"),
                 (3, "`rust_binary`"),
                 (3, "Produces a Rust binary."),
                 (4, "`rust_library`"),
                 (4, "Produces a Rust library."),
             ],
         );
     }

     #[test]
     fn extract_messages_code_block() {
         assert_extract_messages(
             "Preamble\n```rust\n// Example:\nfn hello() {\n  some_code()\n\n  todo!()\n}\n```\nPostamble",
             &[
                 (1, "Preamble"),
                 (
                     3,
                     "// Example:\n",
                 ),
                 (10, "Postamble"),
             ],
         );
     }

     #[test]
     fn extract_messages_two_code_blocks() {
         assert_extract_messages(
             "```\n\
              \"First\" block\n\
              ```\n\
              ```\n\
              \"Second\" block\n\
              ```\n\
              ",
             &[
                 (1, "```\n\"First\" block\n```"), //
                 (4, "```\n\"Second\" block\n```"),
             ],
         );
     }

     #[test]
     fn extract_messages_quoted_code_block() {
         assert_extract_messages(
             "\
             > Preamble\n\
             > ```rust\n\
             > fn hello() {\n\
             >     some_code()\n\
             >\n\
             >     // FIXME: do something here!\n\
             >     todo!()\n\
             > }\n\
             > ```\n\
             > Postamble",
             &[
                 (1, "Preamble"),
                 (6, "// FIXME: do something here!\n"),
                 (10, "Postamble"),
             ],
         );
     }

     #[test]
     fn extract_messages_code_block_with_block_comment() {
         assert_extract_messages(
             "```rust\n\
             /* block comment\n\
              * /* nested block comment\n\
              * */\n\
              * \n\
              * \n\
              * \n\
              * */\n\
             ```\n",
             &[(
                 2,
                 "/* block comment\n* /* nested block comment\n* */\n* \n* \n* \n* */",
             )],
         );
     }

     #[test]
     fn extract_messages_code_block_with_continuous_line_comments() {
         assert_extract_messages(
             r"```rust
 // continuous
 // line
 // comments
 {
     // continuous
     // line
     // comments
     let a = 1; // single line comment
     let b = 1; // single line comment
 }
 ```",
             &[
                 (2, "// continuous\n// line\n// comments\n"),
                 (6, "// continuous\n    // line\n    // comments\n"),
                 (9, "// single line comment\n"),
                 (10, "// single line comment\n"),
             ],
         );
     }

     #[test]
     fn extract_messages_multi_language_code_blocks() {
         assert_extract_messages(
             r#"```c
 // C
 'C'; "C";
 ```
 ```html
 <!-- HTML
 HTML -->
 ```
 ```javascript
 `JavaScript`
 ```
 ```ruby
 # Ruby
 ```"#,
             &[
                 (2, "// C\n'C'"),
                 (3, "\"C\""),
                 (6, "<!-- HTML\nHTML -->"),
                 (10, "`JavaScript`"),
                 (13, "# Ruby\n"),
             ],
         );
     }

     #[test]
     fn extract_messages_details() {
         // This isn't great: we lose text following a HTML tag:
         assert_extract_messages(
             "Preamble\n\
              <details>\n\
              Some Details\n\
              </details>\n\
              \n\
              Postamble",
             &[
                 (1, "Preamble"), //
                 // Missing "Some Details"
                 (6, "Postamble"),
             ],
         );
         // It works well enough when `<details>` has blank lines
         // before and after.
         assert_extract_messages(
             "Preamble\n\
              \n\
              <details>\n\
              \n\
              Some Details\n\
              \n\
              </details>\n\
              \n\
              Postamble",
             &[
                 (1, "Preamble"), //
                 (5, "Some Details"),
                 (9, "Postamble"),
             ],
         );
     }

     #[test]
     fn extract_messages_list() {
         assert_extract_messages(
             "Some text\n * List item 1🦀\n * List item 2\n\nMore text",
             &[
                 (1, "Some text"), //
                 (2, "List item 1🦀"),
                 (3, "List item 2"),
                 (5, "More text"),
             ],
         );
     }

     #[test]
     fn extract_messages_multilevel_list() {
         assert_extract_messages(
             "Some text\n * List item 1\n * List item 2\n    * Sublist 1\n    * Sublist 2\n\nMore text",
             &[
                 (1, "Some text"), //
                 (2, "List item 1"),
                 (3, "List item 2"),
                 (4, "Sublist 1"),
                 (5, "Sublist 2"),
                 (7, "More text"),
             ],
         );
     }

     #[test]
     fn extract_messages_list_with_paragraphs() {
         assert_extract_messages(
             r"* Item 1.
 * Item 2,
   two lines.

   * Sub 1.
   * Sub 2.
 ",
             &[
                 (1, "Item 1."),
                 (2, "Item 2, two lines."),
                 (5, "Sub 1."),
                 (6, "Sub 2."),
             ],
         );
     }

     #[test]
     fn extract_messages_headings() {
         assert_extract_messages(
             r"Some text
 # Headline News🦀

 * A
 * List

 ## Subheading
 ",
             &[
                 (1, "Some text"),
                 (2, "Headline News🦀"),
                 (4, "A"),
                 (5, "List"),
                 (7, "Subheading"),
             ],
         );
     }

     #[test]
     fn extract_messages_code_followed_by_details() {
         // This is a regression test for an error that would
         // incorrectly combine CodeBlock and HTML.
         assert_extract_messages(
             r"```bob
 // BOB
 ```

 <details>

 * Blah blah

 </details>
 ",
             &[
                 (1, "```bob\n// BOB\n```"), //
                 (7, "Blah blah"),
             ],
         );
     }

     #[test]
     fn extract_messages_backslashes() {
         // Demonstrate how a single backslash in the Markdown becomes
         // a backslash-escaped backslash when we extract the text.
         // This is consistent with the CommonMark spec:
         // https://spec.commonmark.org/0.30/#backslash-escapes.
         // However, it causes problems for LaTeX preprocessors:
         // https://github.com/google/mdbook-i18n-helpers/issues/105.
         assert_extract_messages(
             r"
 $$
 \sum_{n=1}^{\infty} 2^{-n} = 1
 $$
 ",
             &[(2, r"$$ \\sum\_{n=1}^{\infty} 2^{-n} = 1 $$")],
         );
     }

     #[test]

     fn extract_messages_skip_simple() {
         assert_extract_messages(
             r"<!-- mdbook-xgettext:skip -->

 This is a paragraph.",
             &[],
         );
     }

     #[test]
     fn extract_messages_skip_next_paragraph_ok() {
         assert_extract_messages(
             r"<!-- mdbook-xgettext:skip -->
 This is a paragraph.

 This should be translated.
 ",
             &[(4, "This should be translated.")],
         );
     }

     #[test]
     fn extract_messages_skip_next_codeblock() {
         assert_extract_messages(
             r"<!-- mdbook-xgettext:skip -->
 ```
 def f(x): return x * x
 ```
 This should be translated.
 ",
             &[(5, "This should be translated.")],
         );
     }

     #[test]
     fn extract_messages_skip_back_to_back() {
         assert_extract_messages(
             r"<!-- mdbook-xgettext:skip -->
 ```
 def f(x): return x * x
 ```
 <!-- mdbook-xgettext:skip -->
 This should not translated.

 But *this* should!
 ",
             &[(8, "But _this_ should!")],
         );
     }

     #[test]
     fn extract_messages_block_html_skip() {
         // The comment is a block level html tag.
         assert_extract_messages(
             "<!-- mdbook-xgettext:skip -->\n\
             This is ignored\n\
             \n\
             but this is not",
             &[(4, "but this is not")],
         );
     }

     #[test]
     fn extract_messages_inline_html_skips() {
         // The comment is an inline html tag.
         assert_extract_messages(
             "
 this should be translated <!-- mdbook-xgettext:skip --> but not this.
 ... nor this.

 But *this* should!",
             &[(2, "this should be translated "), (5, "But _this_ should!")],
         );
     }

     #[test]
     fn extract_messages_skipping_second_item() {
         assert_extract_messages(
             "
 * A
 <!-- mdbook-xgettext:skip -->
 * B
 * C
 ",
             &[(2, "A"), (5, "C")],
         );
     }

     #[test]
     fn extract_messages_skipping_second_paragraphed_item() {
         assert_extract_messages(
             "
 * A

 <!-- mdbook-xgettext:skip -->
 * B

 * C
 ",
             &[(2, "A"), (7, "C")],
         );
     }

     #[test]
     fn extract_messages_skipping_inline_second_item() {
         // This isn't great: we lose text following a HTML comment.
         // Very similar to the failure mode of the
         // `extract_messages_details` test.
         //
         // The root cause is due to the Markdown spec and how the
         // Markdown parser treats HTML blocks.  The text that
         // immediately follows an HTML block on the same line is
         // included as part of the HTML block.
         assert_extract_messages(
             "
 * A
 * <!-- mdbook-xgettext:skip --> B
 * C
 ",
             &[(2, "A")],
         );
     }

     #[test]
     fn extract_messages_inline_skip_to_end_of_block() {
         assert_extract_messages(
             "foo <!-- mdbook-xgettext:skip --> **bold** bar
 still skipped

 not-skipped",
             &[(1, "foo "), (4, "not-skipped")],
         );
     }

     #[test]
     fn extract_messages_automatic_skipping_nontranslatable_codeblocks_simple() {
         assert_extract_messages(
             r"
 ```python
 def g(x):
   this_should_be_skipped_no_strings_or_comments()
 ```
 ",
             &[],
         );
     }

     #[test]
     fn extract_messages_automatic_skipping_nontranslatable_codeblocks() {
         assert_extract_messages(
             r#"
 ```python
 def f(x):
   print("this should be translated")
 ```


 ```python
 def g(x):
   but_this_should_not()
 ```
 "#,
             &[(4, "\"this should be translated\"")],
         );
     }

     #[test]
     fn extract_messages_without_language_specifier() {
         assert_extract_messages(
             r#"
 ```
 def f(x):
   print("this should be translated")
 ```


 ```
 def g(x):
   but_this_should_not()
 ```
 "#,
             &[(
                 2,
                 "```\ndef f(x):\n  print(\"this should be translated\")\n```",
             )],
         );
     }

     #[test]
     fn extract_messages_codeblock_in_codeblock() {
         assert_extract_messages(
             r#"
 ````
 ```
 // codeblock in codeblock
 ```
 ````
 "#,
             &[(2, "````\n```\n// codeblock in codeblock\n```\n````")],
         );
     }

     #[test]
     fn extract_message_comments() {
         assert_eq!(
             extract_messages(
                 "
 <!-- mdbook-xgettext:comment: first comment! -->
 Hello world!
 "
             ),
             vec![(
                 3,
                 ExtractedMessage {
                     message: "Hello world!".into(),
                     comment: "first comment!".into(),
                 }
             )]
         );
     }

     #[test]
     fn extract_message_comments_multiple_joined() {
         assert_eq!(
             extract_messages(
                 "
 <!-- mdbook-xgettext:comment: this is a test -->
 <!-- mdbook-xgettext:comment: of a comment that spans. -->
 Greetings!
 "
             ),
             vec![(
                 4,
                 ExtractedMessage {
                     message: "Greetings!".into(),
                     comment: "this is a test of a comment that spans.".into(),
                 }
             )]
         );
     }

     #[test]
     fn extract_message_multiple_comments() {
         assert_eq!(
             extract_messages(
                 "
 before-no-comment

 <!-- mdbook-xgettext:comment: another -->
 Hello again, this is some text
 with a comment on it.

 <!-- mdbook-xgettext:comment: one more comment. -->
 after

 after-no-comment
 "
             ),
             vec![
                 (
                     2,
                     ExtractedMessage {
                         message: "before-no-comment".into(),
                         comment: "".into(),
                     }
                 ),
                 (
                     5,
                     ExtractedMessage {
                         message: "Hello again, this is some text with a comment on it.".into(),
                         comment: "another".into(),
                     }
                 ),
                 (
                     9,
                     ExtractedMessage {
                         message: "after".into(),
                         comment: "one more comment.".into(),
                     }
                 ),
                 (
                     11,
                     ExtractedMessage {
                         message: "after-no-comment".into(),
                         comment: "".into(),
                     }
                 ),
             ]
         );
     }

     #[test]
     fn extract_message_comments_on_codeblock() {
         assert_eq!(
             extract_messages(
                 r#"
 <!-- mdbook-xgettext:comment: greetings! -->
 ```python
 print("Hello world")
 ```
 "#
             ),
             vec![(
                 4,
                 ExtractedMessage {
                     message: "\"Hello world\"".into(),
                     comment: "greetings!".into(),
                 }
             ),]
         );
     }
 }