blob: f994ffd2ac0e4cc19e6621ccc0c303f4acf6669b [file] [log] [blame]
//! Replace json comments and trailing commas in place.
//!
//! A fork of a fork:
//!
//! * <https://github.com/tmccombs/json-comments-rs>
//! * <https://github.com/parcel-bundler/parcel/pull/9032>
//!
//! `json-strip-comments` is a library to strip out comments from JSON. By processing text
//! through a [`StripComments`] adapter first, it is possible to use a standard JSON parser (such
//! as [serde_json](https://crates.io/crates/serde_json) with quasi-json input that contains
//! comments.
//!
//! In fact, this code makes few assumptions about the input and could probably be used to strip
//! comments out of other types of code as well, provided that strings use double quotes and
//! backslashes are used for escapes in strings.
//!
//! The following types of comments are supported:
//! - C style block comments (`/* ... */`)
//! - C style line comments (`// ...`)
//! - Shell style line comments (`# ...`)
//!
//! ## Example
//!
//! ```rust
#![doc = include_str!("../examples/example.rs")]
//! ```
use std::io::{ErrorKind, Read, Result};
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
enum State {
Top,
InString,
StringEscape,
InComment,
InBlockComment,
MaybeCommentEnd,
InLineComment,
}
use State::{
InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
};
/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
/// (such as json-serde) doesn't choke on them.
///
/// The supported comments are:
/// - C style block comments (`/* ... */`)
/// - C style line comments (`// ...`)
/// - Shell style line comments (`# ...`)
///
/// ## Example
/// ```
/// use json_strip_comments::StripComments;
/// use std::io::Read;
///
/// let input = r#"{
/// // c line comment
/// "a": "comment in string /* a */",
/// ## shell line comment
/// } /** end */"#;
///
/// let mut stripped = String::new();
/// StripComments::new(input.as_bytes()).read_to_string(&mut stripped).unwrap();
///
/// assert_eq!(stripped, "{
/// \n\"a\": \"comment in string /* a */\",
/// \n} ");
///
/// ```
///
pub struct StripComments<T: Read> {
inner: T,
state: State,
settings: CommentSettings,
}
impl<T> StripComments<T>
where
T: Read,
{
pub fn new(input: T) -> Self {
Self {
inner: input,
state: Top,
settings: CommentSettings::default(),
}
}
/// Create a new `StripComments` with settings which may be different from the default.
///
/// This is useful if you wish to disable allowing certain kinds of comments.
#[inline]
pub fn with_settings(settings: CommentSettings, input: T) -> Self {
Self {
inner: input,
state: Top,
settings,
}
}
}
macro_rules! invalid_data {
() => {
return Err(ErrorKind::InvalidData.into())
};
}
impl<T> Read for StripComments<T>
where
T: Read,
{
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let count = self.inner.read(buf)?;
if count > 0 {
strip_buf(&mut self.state, &mut buf[..count], self.settings, false)?;
} else if self.state != Top && self.state != InLineComment {
invalid_data!();
}
Ok(count)
}
}
fn consume_comment_whitespace_until_maybe_bracket(
state: &mut State,
buf: &mut [u8],
i: &mut usize,
settings: CommentSettings,
) -> Result<bool> {
*i += 1;
while *i < buf.len() {
let c = &mut buf[*i];
*state = match state {
Top => {
*state = top(c, settings);
if c.is_ascii_whitespace() {
*i += 1;
continue;
}
return Ok(*c == b'}' || *c == b']');
}
InString => in_string(*c),
StringEscape => InString,
InComment => in_comment(c, settings)?,
InBlockComment => consume_block_comments(buf, i),
MaybeCommentEnd => maybe_comment_end(c),
InLineComment => consume_line_comments(buf, i),
};
*i += 1;
}
Ok(false)
}
fn strip_buf(
state: &mut State,
buf: &mut [u8],
settings: CommentSettings,
remove_trailing_commas: bool,
) -> Result<()> {
let mut i = 0;
let len = buf.len();
while i < len {
let c = &mut buf[i];
if matches!(state, Top) {
let cur = i;
*state = top(c, settings);
if remove_trailing_commas
&& *c == b','
&& consume_comment_whitespace_until_maybe_bracket(state, buf, &mut i, settings)?
{
buf[cur] = b' ';
}
} else {
*state = match state {
Top => unreachable!(),
InString => in_string(*c),
StringEscape => InString,
InComment => in_comment(c, settings)?,
InBlockComment => consume_block_comments(buf, &mut i),
MaybeCommentEnd => maybe_comment_end(c),
InLineComment => consume_line_comments(buf, &mut i),
}
}
i += 1;
}
Ok(())
}
#[inline]
fn consume_line_comments(buf: &mut [u8], i: &mut usize) -> State {
let cur = *i;
match memchr::memchr(b'\n', &buf[*i..]) {
Some(offset) => {
*i += offset;
buf[cur..*i].fill(b' ');
Top
}
None => {
*i = buf.len() - 1;
buf[cur..].fill(b' ');
InLineComment
}
}
}
#[inline]
fn consume_block_comments(buf: &mut [u8], i: &mut usize) -> State {
let cur = *i;
match memchr::memchr(b'*', &buf[*i..]) {
Some(offset) => {
*i += offset;
buf[cur..=*i].fill(b' ');
MaybeCommentEnd
}
None => {
*i = buf.len() - 1;
buf[cur..].fill(b' ');
InBlockComment
}
}
}
/// Strips comments from a string in place, replacing it with whitespaces.
///
/// /// ## Example
/// ```
/// use json_strip_comments::{strip_comments_in_place, CommentSettings};
///
/// let mut string = String::from(r#"{
/// // c line comment
/// "a": "comment in string /* a */",
/// ## shell line comment
/// } /** end */"#);
///
/// strip_comments_in_place(&mut string, CommentSettings::default(), false).unwrap();
///
/// assert_eq!(string, "{
/// \n\"a\": \"comment in string /* a */\",
/// \n} ");
///
/// ```
pub fn strip_comments_in_place(
s: &mut str,
settings: CommentSettings,
remove_trailing_commas: bool,
) -> Result<()> {
// Safety: we have made sure the text is UTF-8
strip_buf(
&mut Top,
unsafe { s.as_bytes_mut() },
settings,
remove_trailing_commas,
)
}
pub fn strip(s: &mut str) -> Result<()> {
strip_comments_in_place(s, CommentSettings::all(), true)
}
/// Settings for `StripComments`
///
/// The default is for all comment types to be enabled.
#[derive(Copy, Clone, Debug)]
pub struct CommentSettings {
/// True if c-style block comments (`/* ... */`) are allowed
block_comments: bool,
/// True if c-style `//` line comments are allowed
slash_line_comments: bool,
/// True if shell-style `#` line comments are allowed
hash_line_comments: bool,
}
impl Default for CommentSettings {
fn default() -> Self {
Self::all()
}
}
impl CommentSettings {
/// Enable all comment Styles
pub const fn all() -> Self {
Self {
block_comments: true,
slash_line_comments: true,
hash_line_comments: true,
}
}
/// Only allow line comments starting with `#`
pub const fn hash_only() -> Self {
Self {
hash_line_comments: true,
block_comments: false,
slash_line_comments: false,
}
}
/// Only allow "c-style" comments.
///
/// Specifically, line comments beginning with `//` and
/// block comment like `/* ... */`.
pub const fn c_style() -> Self {
Self {
block_comments: true,
slash_line_comments: true,
hash_line_comments: false,
}
}
/// Create a new `StripComments` for `input`, using these settings.
///
/// Transform `input` into a [`Read`] that strips out comments.
/// The types of comments to support are determined by the configuration of
/// `self`.
///
/// ## Examples
///
/// ```
/// use json_strip_comments::CommentSettings;
/// use std::io::Read;
///
/// let input = r#"{
/// // c line comment
/// "a": "b"
/// /** multi line
/// comment
/// */ }"#;
///
/// let mut stripped = String::new();
/// CommentSettings::c_style().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
///
/// assert_eq!(stripped, "{
/// \n\"a\": \"b\"
/// }");
/// ```
///
/// ```
/// use json_strip_comments::CommentSettings;
/// use std::io::Read;
///
/// let input = r#"{
/// ## shell line comment
/// "a": "b"
/// }"#;
///
/// let mut stripped = String::new();
/// CommentSettings::hash_only().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
///
/// assert_eq!(stripped, "{
/// \n\"a\": \"b\"\n}");
/// ```
#[inline]
pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> {
StripComments::with_settings(self, input)
}
}
#[inline]
fn top(c: &mut u8, settings: CommentSettings) -> State {
match *c {
b'"' => InString,
b'/' => {
*c = b' ';
InComment
}
b'#' if settings.hash_line_comments => {
*c = b' ';
InLineComment
}
_ => Top,
}
}
#[inline]
fn in_string(c: u8) -> State {
match c {
b'"' => Top,
b'\\' => StringEscape,
_ => InString,
}
}
fn in_comment(c: &mut u8, settings: CommentSettings) -> Result<State> {
let new_state = match c {
b'*' if settings.block_comments => InBlockComment,
b'/' if settings.slash_line_comments => InLineComment,
_ => {
invalid_data!()
}
};
*c = b' ';
Ok(new_state)
}
fn maybe_comment_end(c: &mut u8) -> State {
let old = *c;
*c = b' ';
match old {
b'/' => Top,
b'*' => MaybeCommentEnd,
_ => InBlockComment,
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{ErrorKind, Read};
fn strip_string(input: &str) -> String {
let mut out = String::new();
let count = StripComments::new(input.as_bytes())
.read_to_string(&mut out)
.unwrap();
assert_eq!(count, input.len());
out
}
#[test]
fn block_comments() {
let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
let stripped = strip_string(json);
assert_eq!(stripped, r#"{ "hi": "bye"}"#);
}
#[test]
fn block_comments_with_possible_end() {
let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
let stripped = strip_string(json);
assert_eq!(
stripped,
r#"{ "hi": "bye"}"#
);
}
// See https://github.com/tmccombs/json-comments-rs/issues/12
// Make sure we can parse a block comment that ends with more than one "*"
#[test]
fn doc_comment() {
let json = r##"/** C **/ { "foo": 123 }"##;
let stripped = strip_string(json);
assert_eq!(stripped, r##" { "foo": 123 }"##);
}
#[test]
fn line_comments() {
let json = r#"{
// line comment
"a": 4,
# another
}"#;
let expected = "{
\n \"a\": 4,
\n }";
assert_eq!(strip_string(json), expected);
}
#[test]
fn incomplete_string() {
let json = r#""foo"#;
let mut stripped = String::new();
let err = StripComments::new(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn incomplete_comment() {
let json = "/* foo ";
let mut stripped = String::new();
let err = StripComments::new(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn incomplete_comment2() {
let json = "/* foo *";
let mut stripped = String::new();
let err = StripComments::new(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn no_hash_comments() {
let json = r#"# bad comment
{"a": "b"}"#;
let mut stripped = String::new();
CommentSettings::c_style()
.strip_comments(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap();
assert_eq!(stripped, json);
}
#[test]
fn no_slash_line_comments() {
let json = r#"// bad comment
{"a": "b"}"#;
let mut stripped = String::new();
let err = CommentSettings::hash_only()
.strip_comments(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn no_block_comments() {
let json = r#"/* bad comment */ {"a": "b"}"#;
let mut stripped = String::new();
let err = CommentSettings::hash_only()
.strip_comments(json.as_bytes())
.read_to_string(&mut stripped)
.unwrap_err();
assert_eq!(err.kind(), ErrorKind::InvalidData);
}
#[test]
fn strip_in_place() {
let mut json = String::from(r#"{/* Comment */"hi": /** abc */ "bye"}"#);
strip_comments_in_place(&mut json, CommentSettings::default(), false).unwrap();
assert_eq!(json, r#"{ "hi": "bye"}"#);
}
#[test]
fn trailing_comma() {
let mut json = String::from(
r#"{
"a1": [1,],
"a2": [1,/* x */],
"a3": [
1, // x
],
"o1": {v:1,},
"o2": {v:1,/* x */},
"o3": {
"v":1, // x
},
# another
}"#,
);
strip_comments_in_place(&mut json, CommentSettings::default(), true).unwrap();
let expected = r#"{
"a1": [1 ],
"a2": [1 ],
"a3": [
1
],
"o1": {v:1 },
"o2": {v:1 },
"o3": {
"v":1
}
}"#;
assert_eq!(
json.replace(|s: char| s.is_ascii_whitespace(), ""),
expected.replace(|s: char| s.is_ascii_whitespace(), "")
);
}
}