| //! Replace json comments and trailing commas in place. |
| //! |
| //! A fork of a fork: |
| //! |
| //! * <https://github.com/tmccombs/json-comments-rs> |
| //! * <https://github.com/parcel-bundler/parcel/pull/9032> |
| //! |
| //! `json-strip-comments` is a library to strip out comments from JSON. By processing text |
| //! through a [`StripComments`] adapter first, it is possible to use a standard JSON parser (such |
| //! as [serde_json](https://crates.io/crates/serde_json) with quasi-json input that contains |
| //! comments. |
| //! |
| //! In fact, this code makes few assumptions about the input and could probably be used to strip |
| //! comments out of other types of code as well, provided that strings use double quotes and |
| //! backslashes are used for escapes in strings. |
| //! |
| //! The following types of comments are supported: |
| //! - C style block comments (`/* ... */`) |
| //! - C style line comments (`// ...`) |
| //! - Shell style line comments (`# ...`) |
| //! |
| //! ## Example |
| //! |
| //! ```rust |
| #![doc = include_str!("../examples/example.rs")] |
| //! ``` |
| |
| use std::io::{ErrorKind, Read, Result}; |
| |
| #[derive(Eq, PartialEq, Copy, Clone, Debug)] |
| enum State { |
| Top, |
| InString, |
| StringEscape, |
| InComment, |
| InBlockComment, |
| MaybeCommentEnd, |
| InLineComment, |
| } |
| |
| use State::{ |
| InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top, |
| }; |
| |
| /// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser |
| /// (such as json-serde) doesn't choke on them. |
| /// |
| /// The supported comments are: |
| /// - C style block comments (`/* ... */`) |
| /// - C style line comments (`// ...`) |
| /// - Shell style line comments (`# ...`) |
| /// |
| /// ## Example |
| /// ``` |
| /// use json_strip_comments::StripComments; |
| /// use std::io::Read; |
| /// |
| /// let input = r#"{ |
| /// // c line comment |
| /// "a": "comment in string /* a */", |
| /// ## shell line comment |
| /// } /** end */"#; |
| /// |
| /// let mut stripped = String::new(); |
| /// StripComments::new(input.as_bytes()).read_to_string(&mut stripped).unwrap(); |
| /// |
| /// assert_eq!(stripped, "{ |
| /// \n\"a\": \"comment in string /* a */\", |
| /// \n} "); |
| /// |
| /// ``` |
| /// |
| pub struct StripComments<T: Read> { |
| inner: T, |
| state: State, |
| settings: CommentSettings, |
| } |
| |
| impl<T> StripComments<T> |
| where |
| T: Read, |
| { |
| pub fn new(input: T) -> Self { |
| Self { |
| inner: input, |
| state: Top, |
| settings: CommentSettings::default(), |
| } |
| } |
| |
| /// Create a new `StripComments` with settings which may be different from the default. |
| /// |
| /// This is useful if you wish to disable allowing certain kinds of comments. |
| #[inline] |
| pub fn with_settings(settings: CommentSettings, input: T) -> Self { |
| Self { |
| inner: input, |
| state: Top, |
| settings, |
| } |
| } |
| } |
| |
| macro_rules! invalid_data { |
| () => { |
| return Err(ErrorKind::InvalidData.into()) |
| }; |
| } |
| |
| impl<T> Read for StripComments<T> |
| where |
| T: Read, |
| { |
| fn read(&mut self, buf: &mut [u8]) -> Result<usize> { |
| let count = self.inner.read(buf)?; |
| if count > 0 { |
| strip_buf(&mut self.state, &mut buf[..count], self.settings, false)?; |
| } else if self.state != Top && self.state != InLineComment { |
| invalid_data!(); |
| } |
| Ok(count) |
| } |
| } |
| |
| fn consume_comment_whitespace_until_maybe_bracket( |
| state: &mut State, |
| buf: &mut [u8], |
| i: &mut usize, |
| settings: CommentSettings, |
| ) -> Result<bool> { |
| *i += 1; |
| while *i < buf.len() { |
| let c = &mut buf[*i]; |
| *state = match state { |
| Top => { |
| *state = top(c, settings); |
| if c.is_ascii_whitespace() { |
| *i += 1; |
| continue; |
| } |
| return Ok(*c == b'}' || *c == b']'); |
| } |
| InString => in_string(*c), |
| StringEscape => InString, |
| InComment => in_comment(c, settings)?, |
| InBlockComment => consume_block_comments(buf, i), |
| MaybeCommentEnd => maybe_comment_end(c), |
| InLineComment => consume_line_comments(buf, i), |
| }; |
| *i += 1; |
| } |
| Ok(false) |
| } |
| |
| fn strip_buf( |
| state: &mut State, |
| buf: &mut [u8], |
| settings: CommentSettings, |
| remove_trailing_commas: bool, |
| ) -> Result<()> { |
| let mut i = 0; |
| let len = buf.len(); |
| while i < len { |
| let c = &mut buf[i]; |
| if matches!(state, Top) { |
| let cur = i; |
| *state = top(c, settings); |
| if remove_trailing_commas |
| && *c == b',' |
| && consume_comment_whitespace_until_maybe_bracket(state, buf, &mut i, settings)? |
| { |
| buf[cur] = b' '; |
| } |
| } else { |
| *state = match state { |
| Top => unreachable!(), |
| InString => in_string(*c), |
| StringEscape => InString, |
| InComment => in_comment(c, settings)?, |
| InBlockComment => consume_block_comments(buf, &mut i), |
| MaybeCommentEnd => maybe_comment_end(c), |
| InLineComment => consume_line_comments(buf, &mut i), |
| } |
| } |
| i += 1; |
| } |
| Ok(()) |
| } |
| |
| #[inline] |
| fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State { |
| let cur = *i; |
| match memchr::memchr(b'\n', &buf[*i..]) { |
| Some(offset) => { |
| *i += offset; |
| buf[cur..*i].fill(b' '); |
| Top |
| } |
| None => { |
| *i = buf.len() - 1; |
| buf[cur..].fill(b' '); |
| InLineComment |
| } |
| } |
| } |
| |
| #[inline] |
| fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State { |
| let cur = *i; |
| match memchr::memchr(b'*', &buf[*i..]) { |
| Some(offset) => { |
| *i += offset; |
| buf[cur..=*i].fill(b' '); |
| MaybeCommentEnd |
| } |
| None => { |
| *i = buf.len() - 1; |
| buf[cur..].fill(b' '); |
| InBlockComment |
| } |
| } |
| } |
| |
| /// Strips comments from a string in place, replacing it with whitespaces. |
| /// |
| /// /// ## Example |
| /// ``` |
| /// use json_strip_comments::{strip_comments_in_place, CommentSettings}; |
| /// |
| /// let mut string = String::from(r#"{ |
| /// // c line comment |
| /// "a": "comment in string /* a */", |
| /// ## shell line comment |
| /// } /** end */"#); |
| /// |
| /// strip_comments_in_place(&mut string, CommentSettings::default(), false).unwrap(); |
| /// |
| /// assert_eq!(string, "{ |
| /// \n\"a\": \"comment in string /* a */\", |
| /// \n} "); |
| /// |
| /// ``` |
| pub fn strip_comments_in_place( |
| s: &mut str, |
| settings: CommentSettings, |
| remove_trailing_commas: bool, |
| ) -> Result<()> { |
| // Safety: we have made sure the text is UTF-8 |
| strip_buf( |
| &mut Top, |
| unsafe { s.as_bytes_mut() }, |
| settings, |
| remove_trailing_commas, |
| ) |
| } |
| |
| pub fn strip(s: &mut str) -> Result<()> { |
| strip_comments_in_place(s, CommentSettings::all(), true) |
| } |
| |
| /// Settings for `StripComments` |
| /// |
| /// The default is for all comment types to be enabled. |
| #[derive(Copy, Clone, Debug)] |
| pub struct CommentSettings { |
| /// True if c-style block comments (`/* ... */`) are allowed |
| block_comments: bool, |
| /// True if c-style `//` line comments are allowed |
| slash_line_comments: bool, |
| /// True if shell-style `#` line comments are allowed |
| hash_line_comments: bool, |
| } |
| |
| impl Default for CommentSettings { |
| fn default() -> Self { |
| Self::all() |
| } |
| } |
| |
| impl CommentSettings { |
| /// Enable all comment Styles |
| pub const fn all() -> Self { |
| Self { |
| block_comments: true, |
| slash_line_comments: true, |
| hash_line_comments: true, |
| } |
| } |
| /// Only allow line comments starting with `#` |
| pub const fn hash_only() -> Self { |
| Self { |
| hash_line_comments: true, |
| block_comments: false, |
| slash_line_comments: false, |
| } |
| } |
| /// Only allow "c-style" comments. |
| /// |
| /// Specifically, line comments beginning with `//` and |
| /// block comment like `/* ... */`. |
| pub const fn c_style() -> Self { |
| Self { |
| block_comments: true, |
| slash_line_comments: true, |
| hash_line_comments: false, |
| } |
| } |
| |
| /// Create a new `StripComments` for `input`, using these settings. |
| /// |
| /// Transform `input` into a [`Read`] that strips out comments. |
| /// The types of comments to support are determined by the configuration of |
| /// `self`. |
| /// |
| /// ## Examples |
| /// |
| /// ``` |
| /// use json_strip_comments::CommentSettings; |
| /// use std::io::Read; |
| /// |
| /// let input = r#"{ |
| /// // c line comment |
| /// "a": "b" |
| /// /** multi line |
| /// comment |
| /// */ }"#; |
| /// |
| /// let mut stripped = String::new(); |
| /// CommentSettings::c_style().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap(); |
| /// |
| /// assert_eq!(stripped, "{ |
| /// \n\"a\": \"b\" |
| /// }"); |
| /// ``` |
| /// |
| /// ``` |
| /// use json_strip_comments::CommentSettings; |
| /// use std::io::Read; |
| /// |
| /// let input = r#"{ |
| /// ## shell line comment |
| /// "a": "b" |
| /// }"#; |
| /// |
| /// let mut stripped = String::new(); |
| /// CommentSettings::hash_only().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap(); |
| /// |
| /// assert_eq!(stripped, "{ |
| /// \n\"a\": \"b\"\n}"); |
| /// ``` |
| #[inline] |
| pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> { |
| StripComments::with_settings(self, input) |
| } |
| } |
| |
| #[inline] |
| fn top(c: &mut u8, settings: CommentSettings) -> State { |
| match *c { |
| b'"' => InString, |
| b'/' => { |
| *c = b' '; |
| InComment |
| } |
| b'#' if settings.hash_line_comments => { |
| *c = b' '; |
| InLineComment |
| } |
| _ => Top, |
| } |
| } |
| |
| #[inline] |
| fn in_string(c: u8) -> State { |
| match c { |
| b'"' => Top, |
| b'\\' => StringEscape, |
| _ => InString, |
| } |
| } |
| |
| fn in_comment(c: &mut u8, settings: CommentSettings) -> Result<State> { |
| let new_state = match c { |
| b'*' if settings.block_comments => InBlockComment, |
| b'/' if settings.slash_line_comments => InLineComment, |
| _ => { |
| invalid_data!() |
| } |
| }; |
| *c = b' '; |
| Ok(new_state) |
| } |
| |
| fn maybe_comment_end(c: &mut u8) -> State { |
| let old = *c; |
| *c = b' '; |
| match old { |
| b'/' => Top, |
| b'*' => MaybeCommentEnd, |
| _ => InBlockComment, |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use std::io::{ErrorKind, Read}; |
| |
| fn strip_string(input: &str) -> String { |
| let mut out = String::new(); |
| let count = StripComments::new(input.as_bytes()) |
| .read_to_string(&mut out) |
| .unwrap(); |
| assert_eq!(count, input.len()); |
| out |
| } |
| |
| #[test] |
| fn block_comments() { |
| let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#; |
| let stripped = strip_string(json); |
| assert_eq!(stripped, r#"{ "hi": "bye"}"#); |
| } |
| |
| #[test] |
| fn block_comments_with_possible_end() { |
| let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#; |
| let stripped = strip_string(json); |
| assert_eq!( |
| stripped, |
| r#"{ "hi": "bye"}"# |
| ); |
| } |
| |
| // See https://github.com/tmccombs/json-comments-rs/issues/12 |
| // Make sure we can parse a block comment that ends with more than one "*" |
| #[test] |
| fn doc_comment() { |
| let json = r##"/** C **/ { "foo": 123 }"##; |
| let stripped = strip_string(json); |
| assert_eq!(stripped, r##" { "foo": 123 }"##); |
| } |
| |
| #[test] |
| fn line_comments() { |
| let json = r#"{ |
| // line comment |
| "a": 4, |
| # another |
| }"#; |
| |
| let expected = "{ |
| \n \"a\": 4, |
| \n }"; |
| |
| assert_eq!(strip_string(json), expected); |
| } |
| |
| #[test] |
| fn incomplete_string() { |
| let json = r#""foo"#; |
| let mut stripped = String::new(); |
| |
| let err = StripComments::new(json.as_bytes()) |
| .read_to_string(&mut stripped) |
| .unwrap_err(); |
| assert_eq!(err.kind(), ErrorKind::InvalidData); |
| } |
| |
| #[test] |
| fn incomplete_comment() { |
| let json = "/* foo "; |
| let mut stripped = String::new(); |
| |
| let err = StripComments::new(json.as_bytes()) |
| .read_to_string(&mut stripped) |
| .unwrap_err(); |
| assert_eq!(err.kind(), ErrorKind::InvalidData); |
| } |
| |
| #[test] |
| fn incomplete_comment2() { |
| let json = "/* foo *"; |
| let mut stripped = String::new(); |
| |
| let err = StripComments::new(json.as_bytes()) |
| .read_to_string(&mut stripped) |
| .unwrap_err(); |
| assert_eq!(err.kind(), ErrorKind::InvalidData); |
| } |
| |
| #[test] |
| fn no_hash_comments() { |
| let json = r#"# bad comment |
| {"a": "b"}"#; |
| let mut stripped = String::new(); |
| CommentSettings::c_style() |
| .strip_comments(json.as_bytes()) |
| .read_to_string(&mut stripped) |
| .unwrap(); |
| assert_eq!(stripped, json); |
| } |
| |
| #[test] |
| fn no_slash_line_comments() { |
| let json = r#"// bad comment |
| {"a": "b"}"#; |
| let mut stripped = String::new(); |
| let err = CommentSettings::hash_only() |
| .strip_comments(json.as_bytes()) |
| .read_to_string(&mut stripped) |
| .unwrap_err(); |
| assert_eq!(err.kind(), ErrorKind::InvalidData); |
| } |
| |
| #[test] |
| fn no_block_comments() { |
| let json = r#"/* bad comment */ {"a": "b"}"#; |
| let mut stripped = String::new(); |
| let err = CommentSettings::hash_only() |
| .strip_comments(json.as_bytes()) |
| .read_to_string(&mut stripped) |
| .unwrap_err(); |
| assert_eq!(err.kind(), ErrorKind::InvalidData); |
| } |
| |
| #[test] |
| fn strip_in_place() { |
| let mut json = String::from(r#"{/* Comment */"hi": /** abc */ "bye"}"#); |
| strip_comments_in_place(&mut json, CommentSettings::default(), false).unwrap(); |
| assert_eq!(json, r#"{ "hi": "bye"}"#); |
| } |
| |
| #[test] |
| fn trailing_comma() { |
| let mut json = String::from( |
| r#"{ |
| "a1": [1,], |
| "a2": [1,/* x */], |
| "a3": [ |
| 1, // x |
| ], |
| "o1": {v:1,}, |
| "o2": {v:1,/* x */}, |
| "o3": { |
| "v":1, // x |
| }, |
| # another |
| }"#, |
| ); |
| strip_comments_in_place(&mut json, CommentSettings::default(), true).unwrap(); |
| |
| let expected = r#"{ |
| "a1": [1 ], |
| "a2": [1 ], |
| "a3": [ |
| 1 |
| ], |
| "o1": {v:1 }, |
| "o2": {v:1 }, |
| "o3": { |
| "v":1 |
| } |
| }"#; |
| |
| assert_eq!( |
| json.replace(|s: char| s.is_ascii_whitespace(), ""), |
| expected.replace(|s: char| s.is_ascii_whitespace(), "") |
| ); |
| } |
| } |