| #include "image_io/base/data_scanner.h" |
| |
| namespace photos_editing_formats { |
| namespace image_io { |
| |
| namespace { |
| |
| const char kWhitespaceChars[] = " \t\n\r"; |
| |
| /// This function is like strspn but does not assume a null-terminated string. |
| size_t memspn(const char* s, size_t slen, const char* accept) { |
| const char* p = s; |
| const char* spanp; |
| char c, sc; |
| |
| cont: |
| c = *p++; |
| if (slen-- == 0) return p - 1 - s; |
| for (spanp = accept; (sc = *spanp++) != '\0';) |
| if (sc == c) goto cont; |
| return p - 1 - s; |
| } |
| |
| /// @return Whether value is in the range [lo:hi]. |
| bool InRange(char value, char lo, char hi) { |
| return value >= lo && value <= hi; |
| } |
| |
| /// @return Whether the value is the first character of a kName type scanner. |
| bool IsFirstNameChar(char value) { |
| return InRange(value, 'A', 'Z') || InRange(value, 'a', 'z') || value == '_' || |
| value == ':'; |
| } |
| |
| /// Scans the characters in the s string, where the characters can be any legal |
| /// character in the name. |
| /// @return The number of name characters scanned. |
| size_t ScanOptionalNameChars(const char* s, size_t slen) { |
| const char* kOptionalChars = |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.-_:"; |
| return memspn(s, slen, kOptionalChars); |
| } |
| |
| /// Scans the whitespace characters in the s string. |
| /// @return The number of whitepace characters scanned. |
| size_t ScanWhitespaceChars(const char* s, size_t slen) { |
| return memspn(s, slen, kWhitespaceChars); |
| } |
| |
| } // namespace |
| |
| std::string DataScanner::GetWhitespaceChars() { return kWhitespaceChars; } |
| |
| DataScanner DataScanner::CreateLiteralScanner(const std::string& literal) { |
| return DataScanner(DataScanner::kLiteral, literal); |
| } |
| |
| DataScanner DataScanner::CreateNameScanner() { |
| return DataScanner(DataScanner::kName); |
| } |
| |
| DataScanner DataScanner::CreateQuotedStringScanner() { |
| return DataScanner(DataScanner::kQuotedString); |
| } |
| |
| DataScanner DataScanner::CreateSentinelScanner(const std::string& sentinels) { |
| return DataScanner(DataScanner::kSentinel, sentinels); |
| } |
| |
| DataScanner DataScanner::CreateThroughLiteralScanner( |
| const std::string& literal) { |
| return DataScanner(DataScanner::kThroughLiteral, literal); |
| } |
| |
| DataScanner DataScanner::CreateWhitespaceScanner() { |
| return DataScanner(DataScanner::kWhitespace); |
| } |
| |
| DataScanner DataScanner::CreateOptionalWhitespaceScanner() { |
| return DataScanner(DataScanner::kOptionalWhitespace); |
| } |
| |
| size_t DataScanner::ExtendTokenLength(size_t delta_length) { |
| token_range_ = |
| DataRange(token_range_.GetBegin(), token_range_.GetEnd() + delta_length); |
| return token_range_.GetLength(); |
| } |
| |
| void DataScanner::SetInternalError(const DataContext& context, |
| const std::string& error_description, |
| DataMatchResult* result) { |
| result->SetType(DataMatchResult::kError); |
| result->SetMessage( |
| Message::kInternalError, |
| context.GetErrorText({}, {GetDescription()}, error_description, "")); |
| } |
| |
| void DataScanner::SetSyntaxError(const DataContext& context, |
| const std::string& error_description, |
| DataMatchResult* result) { |
| result->SetType(DataMatchResult::kError); |
| result->SetMessage(Message::kSyntaxError, |
| context.GetErrorText(error_description, GetDescription())); |
| } |
| |
| DataMatchResult DataScanner::ScanLiteral(const char* cbytes, |
| size_t bytes_available, |
| const DataContext& context) { |
| DataMatchResult result; |
| size_t token_length = token_range_.GetLength(); |
| if (token_length >= literal_or_sentinels_.length()) { |
| SetInternalError(context, "Literal already scanned", &result); |
| return result; |
| } |
| size_t bytes_still_needed = literal_or_sentinels_.length() - token_length; |
| size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available); |
| if (strncmp(&literal_or_sentinels_[token_length], cbytes, bytes_to_compare) == |
| 0) { |
| token_length = ExtendTokenLength(bytes_to_compare); |
| result.SetBytesConsumed(bytes_to_compare); |
| result.SetType(token_length == literal_or_sentinels_.length() |
| ? DataMatchResult::kFull |
| : DataMatchResult::kPartialOutOfData); |
| } else { |
| SetSyntaxError(context, "Expected literal", &result); |
| } |
| return result; |
| } |
| |
| DataMatchResult DataScanner::ScanName(const char* cbytes, |
| size_t bytes_available, |
| const DataContext& context) { |
| DataMatchResult result; |
| size_t token_length = token_range_.GetLength(); |
| if (token_length == 0) { |
| if (!IsFirstNameChar(*cbytes)) { |
| SetSyntaxError(context, "Expected first character of a name", &result); |
| return result; |
| } |
| token_length = ExtendTokenLength(1); |
| result.SetBytesConsumed(1); |
| bytes_available -= 1; |
| cbytes += 1; |
| } |
| size_t optional_bytes_consumed = |
| ScanOptionalNameChars(cbytes, bytes_available); |
| token_length = ExtendTokenLength(optional_bytes_consumed); |
| result.IncrementBytesConsumed(optional_bytes_consumed); |
| if (result.GetBytesConsumed() == 0 && token_length > 0) { |
| result.SetType(DataMatchResult::kFull); |
| } else if (optional_bytes_consumed < bytes_available) { |
| result.SetType(DataMatchResult::kFull); |
| } else { |
| result.SetType(DataMatchResult::kPartialOutOfData); |
| } |
| return result; |
| } |
| |
| DataMatchResult DataScanner::ScanQuotedString(const char* cbytes, |
| size_t bytes_available, |
| const DataContext& context) { |
| const size_t kStart = 0; |
| const size_t kDone = '.'; |
| const size_t kSquote = '\''; |
| const size_t kDquote = '"'; |
| DataMatchResult result; |
| size_t token_length = token_range_.GetLength(); |
| if ((data_ == kStart && token_length != 0) || |
| (data_ != kStart && data_ != kSquote && data_ != kDquote)) { |
| SetInternalError(context, "Inconsistent state", &result); |
| return result; |
| } |
| if (data_ == kStart) { |
| if (*cbytes != kSquote && *cbytes != kDquote) { |
| SetSyntaxError(context, "Expected start of a quoted string", &result); |
| return result; |
| } |
| data_ = *cbytes++; |
| bytes_available--; |
| result.SetBytesConsumed(1); |
| token_length = ExtendTokenLength(1); |
| } |
| const char* ebytes = reinterpret_cast<const char*>( |
| memchr(cbytes, static_cast<int>(data_), bytes_available)); |
| size_t bytes_scanned = ebytes ? ebytes - cbytes : bytes_available; |
| result.IncrementBytesConsumed(bytes_scanned); |
| token_length = ExtendTokenLength(bytes_scanned); |
| if (bytes_scanned == bytes_available) { |
| result.SetType(DataMatchResult::kPartialOutOfData); |
| } else { |
| result.SetType(DataMatchResult::kFull); |
| result.IncrementBytesConsumed(1); |
| ExtendTokenLength(1); |
| data_ = kDone; |
| } |
| return result; |
| } |
| |
| DataMatchResult DataScanner::ScanSentinel(const char* cbytes, |
| size_t bytes_available, |
| const DataContext& context) { |
| DataMatchResult result; |
| if (data_ != 0) { |
| SetInternalError(context, "Sentinel already scanned", &result); |
| return result; |
| } |
| char cbyte = *cbytes; |
| for (size_t index = 0; index < literal_or_sentinels_.size(); ++index) { |
| char sentinel = literal_or_sentinels_[index]; |
| if ((sentinel == '~' && IsFirstNameChar(cbyte)) || cbyte == sentinel) { |
| ExtendTokenLength(1); |
| result.SetBytesConsumed(1).SetType(DataMatchResult::kFull); |
| data_ = sentinel; |
| break; |
| } |
| } |
| if (result.GetBytesConsumed() == 0) { |
| SetSyntaxError(context, "Expected sentinal character", &result); |
| } |
| return result; |
| } |
| |
| DataMatchResult DataScanner::ScanThroughLiteral(const char* cbytes, |
| size_t bytes_available, |
| const DataContext& context) { |
| DataMatchResult result; |
| size_t& scanned_literal_length = data_; |
| if (scanned_literal_length >= literal_or_sentinels_.length()) { |
| SetInternalError(context, "Literal already scanned", &result); |
| return result; |
| } |
| while (bytes_available > 0) { |
| if (scanned_literal_length == 0) { |
| // Literal scan not in progress. Find the first char of the literal. |
| auto* matched_byte = reinterpret_cast<const char*>( |
| memchr(cbytes, literal_or_sentinels_[0], bytes_available)); |
| if (matched_byte == nullptr) { |
| // first char not found and chars exhausted. |
| ExtendTokenLength(bytes_available); |
| result.IncrementBytesConsumed(bytes_available); |
| result.SetType(DataMatchResult::kPartialOutOfData); |
| break; |
| } else { |
| // found the first char of the literal. |
| size_t bytes_scanned = (matched_byte - cbytes) + 1; |
| result.IncrementBytesConsumed(bytes_scanned); |
| bytes_available -= bytes_scanned; |
| cbytes += bytes_scanned; |
| ExtendTokenLength(bytes_scanned); |
| scanned_literal_length = 1; |
| } |
| } |
| // check if the rest of the literal is there. |
| size_t bytes_still_needed = |
| literal_or_sentinels_.length() - scanned_literal_length; |
| size_t bytes_to_compare = std::min(bytes_still_needed, bytes_available); |
| if (strncmp(&literal_or_sentinels_[scanned_literal_length], cbytes, |
| bytes_to_compare) == 0) { |
| // Yes, the whole literal is there or chars are exhausted. |
| ExtendTokenLength(bytes_to_compare); |
| scanned_literal_length += bytes_to_compare; |
| result.IncrementBytesConsumed(bytes_to_compare); |
| result.SetType(scanned_literal_length == literal_or_sentinels_.length() |
| ? DataMatchResult::kFull |
| : DataMatchResult::kPartialOutOfData); |
| break; |
| } |
| // false alarm, the firsts char of the literal were found, but not the |
| // whole enchilada. Keep searching at one past the first char of the match. |
| scanned_literal_length = 0; |
| } |
| return result; |
| } |
| |
| DataMatchResult DataScanner::ScanWhitespace(const char* cbytes, |
| size_t bytes_available, |
| const DataContext& context) { |
| DataMatchResult result; |
| size_t token_length = token_range_.GetLength(); |
| result.SetBytesConsumed(ScanWhitespaceChars(cbytes, bytes_available)); |
| token_length = ExtendTokenLength(result.GetBytesConsumed()); |
| if (result.GetBytesConsumed() == 0) { |
| if (token_length == 0 && type_ == kWhitespace) { |
| SetSyntaxError(context, "Expected whitespace", &result); |
| } else { |
| result.SetType(DataMatchResult::kFull); |
| } |
| } else { |
| result.SetType((result.GetBytesConsumed() < bytes_available) |
| ? DataMatchResult::kFull |
| : DataMatchResult::kPartialOutOfData); |
| } |
| return result; |
| } |
| |
| DataMatchResult DataScanner::Scan(const DataContext& context) { |
| scan_call_count_ += 1; |
| DataMatchResult result; |
| if (!context.IsValidLocationAndRange()) { |
| SetInternalError(context, context.GetInvalidLocationAndRangeErrorText(), |
| &result); |
| return result; |
| } |
| if (!token_range_.IsValid()) { |
| token_range_ = DataRange(context.GetLocation(), context.GetLocation()); |
| } |
| size_t bytes_available = context.GetRange().GetEnd() - context.GetLocation(); |
| const char* cbytes = context.GetCharBytes(); |
| switch (type_) { |
| case kLiteral: |
| result = ScanLiteral(cbytes, bytes_available, context); |
| break; |
| case kName: |
| result = ScanName(cbytes, bytes_available, context); |
| break; |
| case kQuotedString: |
| result = ScanQuotedString(cbytes, bytes_available, context); |
| break; |
| case kSentinel: |
| result = ScanSentinel(cbytes, bytes_available, context); |
| break; |
| case kThroughLiteral: |
| result = ScanThroughLiteral(cbytes, bytes_available, context); |
| break; |
| case kWhitespace: |
| case kOptionalWhitespace: |
| result = ScanWhitespace(cbytes, bytes_available, context); |
| break; |
| default: |
| SetInternalError(context, "Undefined scanner type", &result); |
| break; |
| } |
| return result; |
| } |
| |
| void DataScanner::ResetTokenRange() { token_range_ = DataRange(); } |
| |
| void DataScanner::Reset() { |
| data_ = 0; |
| scan_call_count_ = 0; |
| ResetTokenRange(); |
| } |
| |
| std::string DataScanner::GetDescription() const { |
| std::string description; |
| switch (type_) { |
| case kLiteral: |
| description = "Literal:'"; |
| description += literal_or_sentinels_; |
| description += "'"; |
| break; |
| case kName: |
| description = "Name"; |
| break; |
| case kQuotedString: |
| description = "QuotedString"; |
| break; |
| case kSentinel: |
| description = "OneOf:'"; |
| description += literal_or_sentinels_; |
| description += "'"; |
| break; |
| case kThroughLiteral: |
| description = "ThruLiteral:'"; |
| description += literal_or_sentinels_; |
| description += "'"; |
| break; |
| case kWhitespace: |
| description = "Whitespace"; |
| break; |
| case kOptionalWhitespace: |
| description = "OptionalWhitespace"; |
| break; |
| } |
| return description; |
| } |
| |
| std::string DataScanner::GetLiteral() const { |
| return type_ == kLiteral || type_ == kThroughLiteral ? literal_or_sentinels_ |
| : ""; |
| } |
| |
| std::string DataScanner::GetSentenels() const { |
| return type_ == kSentinel ? literal_or_sentinels_ : ""; |
| } |
| |
| char DataScanner::GetSentinel() const { return type_ == kSentinel ? data_ : 0; } |
| |
| } // namespace image_io |
| } // namespace photos_editing_formats |