| // Copyright 2021 Code Intelligence GmbH |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Modified from |
| // https://raw.githubusercontent.com/google/atheris/034284dc4bb1ad4f4ab6ba5d34fb4dca7c633660/fuzzed_data_provider.cc |
| // |
| // Original license and copyright notices: |
| // |
| // Copyright 2020 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Modified from |
| // https://github.com/llvm/llvm-project/blob/70de7e0d9a95b7fcd7c105b06bd90fdf4e01f563/compiler-rt/include/fuzzer/FuzzedDataProvider.h |
| // |
| // Original license and copyright notices: |
| // |
| //===- FuzzedDataProvider.h - Utility header for fuzz targets ---*- C++ -* ===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| |
| #include "fuzzed_data_provider.h" |
| |
| #include <algorithm> |
| #include <cstdint> |
| #include <string> |
| #include <type_traits> |
| #include <vector> |
| |
| #include "absl/strings/str_format.h" |
| |
| namespace { |
| |
| const uint8_t *gDataPtr = nullptr; |
| std::size_t gRemainingBytes = 0; |
| |
| // Advance by `bytes` bytes in the buffer or stay at the end if it has been |
| // consumed. |
| void Advance(const std::size_t bytes) { |
| if (bytes > gRemainingBytes) { |
| gRemainingBytes = 0; |
| } else { |
| gDataPtr += bytes; |
| gRemainingBytes -= bytes; |
| } |
| } |
| |
| void ThrowIllegalArgumentException(JNIEnv &env, const std::string &message) { |
| jclass illegal_argument_exception = |
| env.FindClass("java/lang/IllegalArgumentException"); |
| env.ThrowNew(illegal_argument_exception, message.c_str()); |
| } |
| |
| template <typename T> |
| struct JniArrayType {}; |
| |
| #define JNI_ARRAY_TYPE(lower_case, sentence_case) \ |
| template <> \ |
| struct JniArrayType<j##lower_case> { \ |
| typedef j##lower_case type; \ |
| typedef j##lower_case##Array array_type; \ |
| static constexpr array_type (JNIEnv::*kNewArrayFunc)(jsize) = \ |
| &JNIEnv::New##sentence_case##Array; \ |
| static constexpr void (JNIEnv::*kSetArrayRegionFunc)( \ |
| array_type array, jsize start, jsize len, \ |
| const type *buf) = &JNIEnv::Set##sentence_case##ArrayRegion; \ |
| }; |
| |
| JNI_ARRAY_TYPE(boolean, Boolean); |
| JNI_ARRAY_TYPE(byte, Byte); |
| JNI_ARRAY_TYPE(short, Short); |
| JNI_ARRAY_TYPE(int, Int); |
| JNI_ARRAY_TYPE(long, Long); |
| |
| template <typename T> |
| typename JniArrayType<T>::array_type JNICALL |
| ConsumeIntegralArray(JNIEnv &env, jobject self, jint max_length) { |
| if (max_length < 0) { |
| ThrowIllegalArgumentException(env, "maxLength must not be negative"); |
| return nullptr; |
| } |
| // Arrays of integral types are considered data and thus consumed from the |
| // beginning of the buffer. |
| std::size_t max_num_bytes = std::min(sizeof(T) * max_length, gRemainingBytes); |
| jsize actual_length = max_num_bytes / sizeof(T); |
| std::size_t actual_num_bytes = sizeof(T) * actual_length; |
| auto array = (env.*(JniArrayType<T>::kNewArrayFunc))(actual_length); |
| (env.*(JniArrayType<T>::kSetArrayRegionFunc))( |
| array, 0, actual_length, reinterpret_cast<const T *>(gDataPtr)); |
| Advance(actual_num_bytes); |
| return array; |
| } |
| |
| template <typename T> |
| jbyteArray JNICALL ConsumeRemainingAsArray(JNIEnv &env, jobject self) { |
| return ConsumeIntegralArray<T>(env, self, std::numeric_limits<jint>::max()); |
| } |
| |
| template <typename T> |
| T JNICALL ConsumeIntegralInRange(JNIEnv &env, jobject self, T min, T max) { |
| if (min > max) { |
| ThrowIllegalArgumentException( |
| env, absl::StrFormat( |
| "Consume*InRange: min must be <= max (got min: %d, max: %d)", |
| min, max)); |
| return 0; |
| } |
| |
| uint64_t range = static_cast<uint64_t>(max) - min; |
| uint64_t result = 0; |
| std::size_t offset = 0; |
| |
| while (offset < 8 * sizeof(T) && (range >> offset) > 0 && |
| gRemainingBytes != 0) { |
| --gRemainingBytes; |
| result = (result << 8u) | gDataPtr[gRemainingBytes]; |
| offset += 8; |
| } |
| |
| if (range != std::numeric_limits<T>::max()) |
| // We accept modulo bias in favor of reading a dynamic number of bytes as |
| // this would make it harder for the fuzzer to mutate towards values from |
| // the table of recent compares. |
| result = result % (range + 1); |
| |
| return static_cast<T>(min + result); |
| } |
| |
| template <typename T> |
| T JNICALL ConsumeIntegral(JNIEnv &env, jobject self) { |
| // First generate an unsigned value and then (safely) cast it to a signed |
| // integral type. By doing this rather than calling ConsumeIntegralInRange |
| // with bounds [signed_min, signed_max], we ensure that there is a direct |
| // correspondence between the consumed raw bytes and the result (e.g., 0 |
| // corresponds to 0 and not to signed_min). This should help mutating |
| // towards entries of the table of recent compares. |
| using UnsignedT = typename std::make_unsigned<T>::type; |
| static_assert( |
| std::numeric_limits<UnsignedT>::is_modulo, |
| "Unsigned to signed conversion requires modulo-based overflow handling"); |
| return static_cast<T>(ConsumeIntegralInRange<UnsignedT>( |
| env, self, 0, std::numeric_limits<UnsignedT>::max())); |
| } |
| |
| bool JNICALL ConsumeBool(JNIEnv &env, jobject self) { |
| return ConsumeIntegral<uint8_t>(env, self) & 1u; |
| } |
| |
| jchar ConsumeCharInternal(JNIEnv &env, jobject self, bool filter_surrogates) { |
| auto raw_codepoint = ConsumeIntegral<jchar>(env, self); |
| if (filter_surrogates && raw_codepoint >= 0xd800 && raw_codepoint < 0xe000) |
| raw_codepoint -= 0xd800; |
| return raw_codepoint; |
| } |
| |
| jchar JNICALL ConsumeChar(JNIEnv &env, jobject self) { |
| return ConsumeCharInternal(env, self, false); |
| } |
| |
| jchar JNICALL ConsumeCharNoSurrogates(JNIEnv &env, jobject self) { |
| return ConsumeCharInternal(env, self, true); |
| } |
| |
| template <typename T> |
| T JNICALL ConsumeProbability(JNIEnv &env, jobject self) { |
| using IntegralType = |
| typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t, |
| uint64_t>::type; |
| T result = static_cast<T>(ConsumeIntegral<IntegralType>(env, self)); |
| result /= static_cast<T>(std::numeric_limits<IntegralType>::max()); |
| return result; |
| } |
| |
| template <typename T> |
| T JNICALL ConsumeFloatInRange(JNIEnv &env, jobject self, T min, T max) { |
| if (min > max) { |
| ThrowIllegalArgumentException( |
| env, absl::StrFormat( |
| "Consume*InRange: min must be <= max (got min: %f, max: %f)", |
| min, max)); |
| return 0.0; |
| } |
| |
| T range; |
| T result = min; |
| |
| // Deal with overflow, in the event min and max are very far apart |
| if (min < 0 && max > 0 && min + std::numeric_limits<T>::max() < max) { |
| range = (max / 2) - (min / 2); |
| if (ConsumeBool(env, self)) { |
| result += range; |
| } |
| } else { |
| range = max - min; |
| } |
| |
| T probability = ConsumeProbability<T>(env, self); |
| return result + range * probability; |
| } |
| |
| template <typename T> |
| T JNICALL ConsumeRegularFloat(JNIEnv &env, jobject self) { |
| return ConsumeFloatInRange(env, self, std::numeric_limits<T>::lowest(), |
| std::numeric_limits<T>::max()); |
| } |
| |
| template <typename T> |
| T JNICALL ConsumeFloat(JNIEnv &env, jobject self) { |
| if (!gRemainingBytes) return 0.0; |
| |
| auto type_val = ConsumeIntegral<uint8_t>(env, self); |
| |
| if (type_val <= 10) { |
| // Consume the same amount of bytes as for a regular float/double |
| ConsumeRegularFloat<T>(env, self); |
| |
| switch (type_val) { |
| case 0: |
| return 0.0; |
| case 1: |
| return -0.0; |
| case 2: |
| return std::numeric_limits<T>::infinity(); |
| case 3: |
| return -std::numeric_limits<T>::infinity(); |
| case 4: |
| return std::numeric_limits<T>::quiet_NaN(); |
| case 5: |
| return std::numeric_limits<T>::denorm_min(); |
| case 6: |
| return -std::numeric_limits<T>::denorm_min(); |
| case 7: |
| return std::numeric_limits<T>::min(); |
| case 8: |
| return -std::numeric_limits<T>::min(); |
| case 9: |
| return std::numeric_limits<T>::max(); |
| case 10: |
| return -std::numeric_limits<T>::max(); |
| default: |
| abort(); |
| } |
| } |
| |
| T regular = ConsumeRegularFloat<T>(env, self); |
| return regular; |
| } |
| |
| // Polyfill for C++20 std::countl_one, which counts the number of leading ones |
| // in an unsigned integer. |
| inline __attribute__((always_inline)) uint8_t countl_one(uint8_t byte) { |
| // The result of __builtin_clz is undefined for 0. |
| if (byte == 0xFF) return 8; |
| return __builtin_clz(static_cast<uint8_t>(~byte)) - 24; |
| } |
| |
| // Forces a byte to be a valid UTF-8 continuation byte. |
| inline __attribute__((always_inline)) void ForceContinuationByte( |
| uint8_t &byte) { |
| byte = (byte | (1u << 7u)) & ~(1u << 6u); |
| } |
| |
| constexpr uint8_t kTwoByteZeroLeadingByte = 0b11000000; |
| constexpr uint8_t kTwoByteZeroContinuationByte = 0b10000000; |
| constexpr uint8_t kThreeByteLowLeadingByte = 0b11100000; |
| constexpr uint8_t kSurrogateLeadingByte = 0b11101101; |
| |
| enum class Utf8GenerationState { |
| LeadingByte_Generic, |
| LeadingByte_AfterBackslash, |
| ContinuationByte_Generic, |
| ContinuationByte_LowLeadingByte, |
| FirstContinuationByte_LowLeadingByte, |
| FirstContinuationByte_SurrogateLeadingByte, |
| FirstContinuationByte_Generic, |
| SecondContinuationByte_Generic, |
| LeadingByte_LowSurrogate, |
| FirstContinuationByte_LowSurrogate, |
| SecondContinuationByte_HighSurrogate, |
| SecondContinuationByte_LowSurrogate, |
| }; |
| |
| // Consumes up to `max_bytes` arbitrary bytes pointed to by `ptr` and returns a |
| // valid "modified UTF-8" string of length at most `max_length` that resembles |
| // the input bytes as closely as possible as well as the number of consumed |
| // bytes. If `stop_on_slash` is true, then the string will end on the first |
| // single consumed '\'. |
| // |
| // "Modified UTF-8" is the string encoding used by the JNI. It is the same as |
| // the legacy encoding CESU-8, but with `\0` coded on two bytes. In these |
| // encodings, code points requiring 4 bytes in modern UTF-8 are represented as |
| // two surrogates, each of which is coded on 3 bytes. |
| // |
| // This function has been designed with the following goals in mind: |
| // 1. The generated string should be biased towards containing ASCII characters |
| // as these are often the ones that affect control flow directly. |
| // 2. Correctly encoded data (e.g. taken from the table of recent compares) |
| // should be emitted unchanged. |
| // 3. The raw fuzzer input should be preserved as far as possible, but the |
| // output must always be correctly encoded. |
| // |
| // The JVM accepts string in two encodings: UTF-16 and modified UTF-8. |
| // Generating UTF-16 would make it harder to fulfill the first design goal and |
| // would potentially hinder compatibility with corpora using the much more |
| // widely used UTF-8 encoding, which is reasonably similar to modified UTF-8. As |
| // a result, this function uses modified UTF-8. |
| // |
| // See Algorithm 1 of https://arxiv.org/pdf/2010.03090.pdf for more details on |
| // the individual cases involved in determining the validity of a UTF-8 string. |
| template <bool ascii_only, bool stop_on_backslash> |
| std::pair<std::string, std::size_t> FixUpModifiedUtf8(const uint8_t *data, |
| std::size_t max_bytes, |
| jint max_length) { |
| std::string str; |
| // Every character in modified UTF-8 is coded on at most six bytes. Every |
| // consumed byte is transformed into at most one code unit, except for the |
| // case of a zero byte which requires two bytes. |
| if (max_bytes > std::numeric_limits<std::size_t>::max() / 2) |
| max_bytes = std::numeric_limits<std::size_t>::max() / 2; |
| if (ascii_only) { |
| str.reserve( |
| std::min(2 * static_cast<std::size_t>(max_length), 2 * max_bytes)); |
| } else { |
| str.reserve( |
| std::min(6 * static_cast<std::size_t>(max_length), 2 * max_bytes)); |
| } |
| |
| Utf8GenerationState state = Utf8GenerationState::LeadingByte_Generic; |
| const uint8_t *pos = data; |
| const auto data_end = data + max_bytes; |
| for (std::size_t length = 0; length < max_length && pos != data_end; ++pos) { |
| uint8_t c = *pos; |
| if (ascii_only) { |
| // Clamp to 7-bit ASCII range. |
| c &= 0x7Fu; |
| } |
| // Fix up c or previously read bytes according to the value of c and the |
| // current state. In the end, add the fixed up code unit c to the string. |
| // Exception: The zero character has to be coded on two bytes and is the |
| // only case in which an iteration of the loop adds two code units. |
| switch (state) { |
| case Utf8GenerationState::LeadingByte_Generic: { |
| switch (ascii_only ? 0 : countl_one(c)) { |
| case 0: { |
| // valid - 1-byte code point (ASCII) |
| // The zero character has to be coded on two bytes in modified |
| // UTF-8. |
| if (c == 0) { |
| str += static_cast<char>(kTwoByteZeroLeadingByte); |
| c = kTwoByteZeroContinuationByte; |
| } else if (stop_on_backslash && c == '\\') { |
| state = Utf8GenerationState::LeadingByte_AfterBackslash; |
| // The slash either signals the end of the string or is skipped, |
| // so don't append anything. |
| continue; |
| } |
| // Remain in state LeadingByte. |
| ++length; |
| break; |
| } |
| case 1: { |
| // invalid - continuation byte at leader byte position |
| // Fix it up to be of the form 0b110XXXXX and fall through to the |
| // case of a 2-byte sequence. |
| c |= 1u << 6u; |
| c &= ~(1u << 5u); |
| [[fallthrough]]; |
| } |
| case 2: { |
| // (most likely) valid - start of a 2-byte sequence |
| // ASCII characters must be coded on a single byte, so we must |
| // ensure that the lower two bits combined with the six non-header |
| // bits of the following byte do not form a 7-bit ASCII value. This |
| // could only be the case if at most the lowest bit is set. |
| if ((c & 0b00011110u) == 0) { |
| state = Utf8GenerationState::ContinuationByte_LowLeadingByte; |
| } else { |
| state = Utf8GenerationState::ContinuationByte_Generic; |
| } |
| break; |
| } |
| // The default case falls through to the case of three leading ones |
| // coming right after. |
| default: { |
| // invalid - at least four leading ones |
| // In the case of exactly four leading ones, this would be valid |
| // UTF-8, but is not valid in the JVM's modified UTF-8 encoding. |
| // Fix it up by clearing the fourth leading one and falling through |
| // to the 3-byte case. |
| c &= ~(1u << 4u); |
| [[fallthrough]]; |
| } |
| case 3: { |
| // valid - start of a 3-byte sequence |
| if (c == kThreeByteLowLeadingByte) { |
| state = Utf8GenerationState::FirstContinuationByte_LowLeadingByte; |
| } else if (c == kSurrogateLeadingByte) { |
| state = Utf8GenerationState:: |
| FirstContinuationByte_SurrogateLeadingByte; |
| } else { |
| state = Utf8GenerationState::FirstContinuationByte_Generic; |
| } |
| break; |
| } |
| } |
| break; |
| } |
| case Utf8GenerationState::LeadingByte_AfterBackslash: { |
| if (c != '\\') { |
| // Mark the current byte as consumed. |
| ++pos; |
| goto done; |
| } |
| // A double backslash is consumed as a single one. As we skipped the |
| // first one, emit the second one as usual. |
| state = Utf8GenerationState::LeadingByte_Generic; |
| ++length; |
| break; |
| } |
| case Utf8GenerationState::ContinuationByte_LowLeadingByte: { |
| ForceContinuationByte(c); |
| // Preserve the zero character, which is coded on two bytes in modified |
| // UTF-8. In all other cases ensure that we are not incorrectly encoding |
| // an ASCII character on two bytes by setting the eigth least |
| // significant bit of the encoded value (second least significant bit of |
| // the leading byte). |
| auto previous_c = static_cast<uint8_t>(str.back()); |
| if (previous_c != kTwoByteZeroLeadingByte || |
| c != kTwoByteZeroContinuationByte) { |
| str.back() = static_cast<char>(previous_c | (1u << 1u)); |
| } |
| state = Utf8GenerationState::LeadingByte_Generic; |
| ++length; |
| break; |
| } |
| case Utf8GenerationState::ContinuationByte_Generic: { |
| ForceContinuationByte(c); |
| state = Utf8GenerationState::LeadingByte_Generic; |
| ++length; |
| break; |
| } |
| case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: { |
| ForceContinuationByte(c); |
| // Ensure that the current code point could not have been coded on two |
| // bytes. As two bytes encode up to 11 bits and three bytes encode up |
| // to 16 bits, we thus have to make it such that the five highest bits |
| // are not all zero. Four of these bits are the non-header bits of the |
| // leader byte. Thus, set the highest non-header bit in this byte (fifth |
| // highest in the encoded value). |
| c |= 1u << 5u; |
| state = Utf8GenerationState::SecondContinuationByte_Generic; |
| break; |
| } |
| case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: { |
| ForceContinuationByte(c); |
| if (c & (1u << 5u)) { |
| // Start with a high surrogate (0xD800-0xDBFF). c contains the second |
| // byte and the first two bits of the third byte. The first two bits |
| // of this second byte are fixed to 10 (in 0x8-0xB). |
| c |= 1u << 5u; |
| c &= ~(1u << 4u); |
| // The high surrogate must be followed by a low surrogate. |
| state = Utf8GenerationState::SecondContinuationByte_HighSurrogate; |
| } else { |
| state = Utf8GenerationState::SecondContinuationByte_Generic; |
| } |
| break; |
| } |
| case Utf8GenerationState::FirstContinuationByte_Generic: { |
| ForceContinuationByte(c); |
| state = Utf8GenerationState::SecondContinuationByte_Generic; |
| break; |
| } |
| case Utf8GenerationState::SecondContinuationByte_HighSurrogate: { |
| ForceContinuationByte(c); |
| state = Utf8GenerationState::LeadingByte_LowSurrogate; |
| ++length; |
| break; |
| } |
| case Utf8GenerationState::SecondContinuationByte_LowSurrogate: |
| case Utf8GenerationState::SecondContinuationByte_Generic: { |
| ForceContinuationByte(c); |
| state = Utf8GenerationState::LeadingByte_Generic; |
| ++length; |
| break; |
| } |
| case Utf8GenerationState::LeadingByte_LowSurrogate: { |
| // We have to emit a low surrogate leading byte, which is a fixed value. |
| // We still consume a byte from the input to make fuzzer changes more |
| // stable and preserve valid surrogate pairs picked up from e.g. the |
| // table of recent compares. |
| c = kSurrogateLeadingByte; |
| state = Utf8GenerationState::FirstContinuationByte_LowSurrogate; |
| break; |
| } |
| case Utf8GenerationState::FirstContinuationByte_LowSurrogate: { |
| ForceContinuationByte(c); |
| // Low surrogates are code points in the range 0xDC00-0xDFFF. c contains |
| // the second byte and the first two bits of the third byte. The first |
| // two bits of this second byte are fixed to 11 (in 0xC-0xF). |
| c |= (1u << 5u) | (1u << 4u); |
| // The second continuation byte of a low surrogate is not restricted, |
| // but we need to track it differently to allow for correct backtracking |
| // if it isn't completed. |
| state = Utf8GenerationState::SecondContinuationByte_LowSurrogate; |
| break; |
| } |
| } |
| str += static_cast<uint8_t>(c); |
| } |
| |
| // Backtrack the current incomplete character. |
| switch (state) { |
| case Utf8GenerationState::SecondContinuationByte_LowSurrogate: |
| str.pop_back(); |
| [[fallthrough]]; |
| case Utf8GenerationState::FirstContinuationByte_LowSurrogate: |
| str.pop_back(); |
| [[fallthrough]]; |
| case Utf8GenerationState::LeadingByte_LowSurrogate: |
| str.pop_back(); |
| [[fallthrough]]; |
| case Utf8GenerationState::SecondContinuationByte_Generic: |
| case Utf8GenerationState::SecondContinuationByte_HighSurrogate: |
| str.pop_back(); |
| [[fallthrough]]; |
| case Utf8GenerationState::ContinuationByte_Generic: |
| case Utf8GenerationState::ContinuationByte_LowLeadingByte: |
| case Utf8GenerationState::FirstContinuationByte_Generic: |
| case Utf8GenerationState::FirstContinuationByte_LowLeadingByte: |
| case Utf8GenerationState::FirstContinuationByte_SurrogateLeadingByte: |
| str.pop_back(); |
| [[fallthrough]]; |
| case Utf8GenerationState::LeadingByte_Generic: |
| case Utf8GenerationState::LeadingByte_AfterBackslash: |
| // No backtracking required. |
| break; |
| } |
| |
| done: |
| return std::make_pair(str, pos - data); |
| } |
| } // namespace |
| |
| namespace jazzer { |
| // Exposed for testing only. |
| std::pair<std::string, std::size_t> FixUpModifiedUtf8(const uint8_t *data, |
| std::size_t max_bytes, |
| jint max_length, |
| bool ascii_only, |
| bool stop_on_backslash) { |
| if (ascii_only) { |
| if (stop_on_backslash) { |
| return ::FixUpModifiedUtf8<true, true>(data, max_bytes, max_length); |
| } else { |
| return ::FixUpModifiedUtf8<true, false>(data, max_bytes, max_length); |
| } |
| } else { |
| if (stop_on_backslash) { |
| return ::FixUpModifiedUtf8<false, true>(data, max_bytes, max_length); |
| } else { |
| return ::FixUpModifiedUtf8<false, false>(data, max_bytes, max_length); |
| } |
| } |
| } |
| } // namespace jazzer |
| |
| namespace { |
| jstring ConsumeStringInternal(JNIEnv &env, jint max_length, bool ascii_only, |
| bool stop_on_backslash) { |
| if (max_length < 0) { |
| ThrowIllegalArgumentException(env, "maxLength must not be negative"); |
| return nullptr; |
| } |
| |
| if (max_length == 0 || gRemainingBytes == 0) return env.NewStringUTF(""); |
| |
| if (gRemainingBytes == 1) { |
| Advance(1); |
| return env.NewStringUTF(""); |
| } |
| |
| std::size_t max_bytes = gRemainingBytes; |
| std::string str; |
| std::size_t consumed_bytes; |
| std::tie(str, consumed_bytes) = jazzer::FixUpModifiedUtf8( |
| gDataPtr, max_bytes, max_length, ascii_only, stop_on_backslash); |
| Advance(consumed_bytes); |
| return env.NewStringUTF(str.c_str()); |
| } |
| |
| jstring JNICALL ConsumeAsciiString(JNIEnv &env, jobject self, jint max_length) { |
| return ConsumeStringInternal(env, max_length, true, true); |
| } |
| |
| jstring JNICALL ConsumeString(JNIEnv &env, jobject self, jint max_length) { |
| return ConsumeStringInternal(env, max_length, false, true); |
| } |
| |
| jstring JNICALL ConsumeRemainingAsAsciiString(JNIEnv &env, jobject self) { |
| return ConsumeStringInternal(env, std::numeric_limits<jint>::max(), true, |
| false); |
| } |
| |
| jstring JNICALL ConsumeRemainingAsString(JNIEnv &env, jobject self) { |
| return ConsumeStringInternal(env, std::numeric_limits<jint>::max(), false, |
| false); |
| } |
| |
| std::size_t RemainingBytes(JNIEnv &env, jobject self) { |
| return gRemainingBytes; |
| } |
| |
| const JNINativeMethod kFuzzedDataMethods[]{ |
| {(char *)"consumeBoolean", (char *)"()Z", (void *)&ConsumeBool}, |
| {(char *)"consumeByte", (char *)"()B", (void *)&ConsumeIntegral<jbyte>}, |
| {(char *)"consumeByte", (char *)"(BB)B", |
| (void *)&ConsumeIntegralInRange<jbyte>}, |
| {(char *)"consumeShort", (char *)"()S", (void *)&ConsumeIntegral<jshort>}, |
| {(char *)"consumeShort", (char *)"(SS)S", |
| (void *)&ConsumeIntegralInRange<jshort>}, |
| {(char *)"consumeInt", (char *)"()I", (void *)&ConsumeIntegral<jint>}, |
| {(char *)"consumeInt", (char *)"(II)I", |
| (void *)&ConsumeIntegralInRange<jint>}, |
| {(char *)"consumeLong", (char *)"()J", (void *)&ConsumeIntegral<jlong>}, |
| {(char *)"consumeLong", (char *)"(JJ)J", |
| (void *)&ConsumeIntegralInRange<jlong>}, |
| {(char *)"consumeFloat", (char *)"()F", (void *)&ConsumeFloat<jfloat>}, |
| {(char *)"consumeRegularFloat", (char *)"()F", |
| (void *)&ConsumeRegularFloat<jfloat>}, |
| {(char *)"consumeRegularFloat", (char *)"(FF)F", |
| (void *)&ConsumeFloatInRange<jfloat>}, |
| {(char *)"consumeProbabilityFloat", (char *)"()F", |
| (void *)&ConsumeProbability<jfloat>}, |
| {(char *)"consumeDouble", (char *)"()D", (void *)&ConsumeFloat<jdouble>}, |
| {(char *)"consumeRegularDouble", (char *)"()D", |
| (void *)&ConsumeRegularFloat<jdouble>}, |
| {(char *)"consumeRegularDouble", (char *)"(DD)D", |
| (void *)&ConsumeFloatInRange<jdouble>}, |
| {(char *)"consumeProbabilityDouble", (char *)"()D", |
| (void *)&ConsumeProbability<jdouble>}, |
| {(char *)"consumeChar", (char *)"()C", (void *)&ConsumeChar}, |
| {(char *)"consumeChar", (char *)"(CC)C", |
| (void *)&ConsumeIntegralInRange<jchar>}, |
| {(char *)"consumeCharNoSurrogates", (char *)"()C", |
| (void *)&ConsumeCharNoSurrogates}, |
| {(char *)"consumeAsciiString", (char *)"(I)Ljava/lang/String;", |
| (void *)&ConsumeAsciiString}, |
| {(char *)"consumeRemainingAsAsciiString", (char *)"()Ljava/lang/String;", |
| (void *)&ConsumeRemainingAsAsciiString}, |
| {(char *)"consumeString", (char *)"(I)Ljava/lang/String;", |
| (void *)&ConsumeString}, |
| {(char *)"consumeRemainingAsString", (char *)"()Ljava/lang/String;", |
| (void *)&ConsumeRemainingAsString}, |
| {(char *)"consumeBooleans", (char *)"(I)[Z", |
| (void *)&ConsumeIntegralArray<jboolean>}, |
| {(char *)"consumeBytes", (char *)"(I)[B", |
| (void *)&ConsumeIntegralArray<jbyte>}, |
| {(char *)"consumeShorts", (char *)"(I)[S", |
| (void *)&ConsumeIntegralArray<jshort>}, |
| {(char *)"consumeInts", (char *)"(I)[I", |
| (void *)&ConsumeIntegralArray<jint>}, |
| {(char *)"consumeLongs", (char *)"(I)[J", |
| (void *)&ConsumeIntegralArray<jlong>}, |
| {(char *)"consumeRemainingAsBytes", (char *)"()[B", |
| (void *)&ConsumeRemainingAsArray<jbyte>}, |
| {(char *)"remainingBytes", (char *)"()I", (void *)&RemainingBytes}, |
| }; |
| const jint kNumFuzzedDataMethods = |
| sizeof(kFuzzedDataMethods) / sizeof(kFuzzedDataMethods[0]); |
| } // namespace |
| |
| namespace jazzer { |
| |
| void SetUpFuzzedDataProvider(JNIEnv &env) { |
| jclass fuzzed_data_provider_class = |
| env.FindClass(kFuzzedDataProviderImplClass); |
| if (env.ExceptionCheck()) { |
| env.ExceptionDescribe(); |
| throw std::runtime_error("failed to find FuzzedDataProviderImpl class"); |
| } |
| env.RegisterNatives(fuzzed_data_provider_class, kFuzzedDataMethods, |
| kNumFuzzedDataMethods); |
| if (env.ExceptionCheck()) { |
| env.ExceptionDescribe(); |
| throw std::runtime_error( |
| "could not register native callbacks for FuzzedDataProvider"); |
| } |
| } |
| |
| void FeedFuzzedDataProvider(const uint8_t *data, std::size_t size) { |
| gDataPtr = data; |
| gRemainingBytes = size; |
| } |
| } // namespace jazzer |