c++/src/kj/encoding.h - toolchain/capnproto - Git at Google

 // Copyright (c) 2017 Cloudflare, Inc. and contributors
 // Licensed under the MIT License:
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.

 #pragma once
 // Functions for encoding/decoding bytes and text in common formats, including:
 // - UTF-{8,16,32}
 // - Hex
 // - URI encoding
 // - Base64

 #include "string.h"

 KJ_BEGIN_HEADER

 namespace kj {

 template <typename ResultType>
 struct EncodingResult: public ResultType {
   // Equivalent to ResultType (a String or wide-char array) for all intents and purposes, except
   // that the bool `hadErrors` can be inspected to see if any errors were encountered in the input.
   // Each encoding/decoding function that returns this type will "work around" errors in some way,
   // so an application doesn't strictly have to check for errors. E.g. the Unicode functions
   // replace errors with U+FFFD in the output.
   //
   // Through magic, KJ_IF_MAYBE() and KJ_{REQUIRE,ASSERT}_NONNULL() work on EncodingResult<T>
   // exactly if it were a Maybe<T> that is null in case of errors.

   inline EncodingResult(ResultType&& result, bool hadErrors)
       : ResultType(kj::mv(result)), hadErrors(hadErrors) {}

   const bool hadErrors;
 };

 template <typename T>
 inline auto KJ_STRINGIFY(const EncodingResult<T>& value)
     -> decltype(toCharSequence(implicitCast<const T&>(value))) {
   return toCharSequence(implicitCast<const T&>(value));
 }

 EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
 EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
 // Convert UTF-8 text (which KJ strings use) to UTF-16 or UTF-32.
 //
 // If `nulTerminate` is true, an extra NUL character will be added to the end of the output.
 //
 // The returned arrays are in platform-native endianness (otherwise they wouldn't really be
 // char16_t / char32_t).
 //
 // Note that the KJ Unicode encoding and decoding functions actually implement
 // [WTF-8 encoding](http://simonsapin.github.io/wtf-8/), which affects how invalid input is
 // handled. See comments on decodeUtf16() for more info.

 EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16);
 EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf32);
 // Convert UTF-16 or UTF-32 to UTF-8 (which KJ strings use).
 //
 // The input should NOT include a NUL terminator; any NUL characters in the input array will be
 // preserved in the output.
 //
 // The input must be in platform-native endianness. BOMs are NOT recognized by these functions.
 //
 // Note that the KJ Unicode encoding and decoding functions actually implement
 // [WTF-8 encoding](http://simonsapin.github.io/wtf-8/). This means that if you start with an array
 // of char16_t and you pass it through any number of conversions to other Unicode encodings,
 // eventually returning it to UTF-16, all the while ignoring `hadErrors`, you will end up with
 // exactly the same char16_t array you started with, *even if* the array is not valid UTF-16. This
 // is useful because many real-world systems that were designed for UCS-2 (plain 16-bit Unicode)
 // and later "upgraded" to UTF-16 do not enforce that their UTF-16 is well-formed. For example,
 // file names on Windows NT are encoded using 16-bit characters, without enforcing that the
 // character sequence is valid UTF-16. It is important that programs on Windows be able to handle
 // such filenames, even if they choose to convert the name to UTF-8 for internal processing.
 //
 // Specifically, KJ's Unicode handling allows unpaired surrogate code points to round-trip through
 // UTF-8 and UTF-32. Unpaired surrogates will be flagged as an error (setting `hadErrors` in the
 // result), but will NOT be replaced with the Unicode replacement character as other erroneous
 // sequences would be, but rather encoded as an invalid surrogate codepoint in the target encoding.
 //
 // KJ makes the following guarantees about invalid input:
 // - A round trip from UTF-16 to other encodings and back will produce exactly the original input,
 //   with every leg of the trip raising the `hadErrors` flag if the original input was not valid.
 // - A round trip from UTF-8 or UTF-32 to other encodings and back will either produce exactly
 //   the original input, or will have replaced some invalid sequences with the Unicode replacement
 //   character, U+FFFD. No code units will ever be removed unless they are replaced with U+FFFD,
 //   and no code units will ever be added except to encode U+FFFD. If the original input was not
 //   valid, the `hadErrors` flag will be raised on the first leg of the trip, and will also be
 //   raised on subsequent legs unless all invalid sequences were replaced with U+FFFD (which, after
 //   all, is a valid code point).

 EncodingResult<Array<wchar_t>> encodeWideString(
     ArrayPtr<const char> text, bool nulTerminate = false);
 EncodingResult<String> decodeWideString(ArrayPtr<const wchar_t> wide);
 // Encode / decode strings of wchar_t, aka "wide strings". Unfortunately, different platforms have
 // different definitions for wchar_t. For example, on Windows they are 16-bit and encode UTF-16,
 // but on Linux they are 32-bit and encode UTF-32. Some platforms even define wchar_t as 8-bit,
 // encoding UTF-8 (e.g. BeOS did this).
 //
 // KJ assumes that wide strings use the UTF encoding that corresponds to the size of wchar_t on
 // the target platform. So, these functions are simple aliases for encodeUtf*/decodeUtf*, above
 // (or simply make a copy if wchar_t is 8 bits).

 String encodeHex(ArrayPtr<const byte> bytes);
 EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
 // Encode/decode bytes as hex strings.

 String encodeUriComponent(ArrayPtr<const byte> bytes);
 String encodeUriComponent(ArrayPtr<const char> bytes);
 EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
 // Encode/decode URI components using % escapes for characters listed as "reserved" in RFC 2396.
 // This is the same behavior as JavaScript's `encodeURIComponent()`.
 //
 // See https://tools.ietf.org/html/rfc2396#section-2.3

 String encodeUriFragment(ArrayPtr<const byte> bytes);
 String encodeUriFragment(ArrayPtr<const char> bytes);
 // Encode URL fragment components using the fragment percent encode set defined by the WHATWG URL
 // specification. Use decodeUriComponent() to decode.
 //
 // Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
 //   decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
 //
 // See https://url.spec.whatwg.org/#fragment-percent-encode-set

 String encodeUriPath(ArrayPtr<const byte> bytes);
 String encodeUriPath(ArrayPtr<const char> bytes);
 // Encode URL path components (not entire paths!) using the path percent encode set defined by the
 // WHATWG URL specification. Use decodeUriComponent() to decode.
 //
 // Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
 //   decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
 //
 // Quirk: This percent-encodes '/' and '\' characters as well, which are not actually in the set
 //   defined by the WHATWG URL spec. Since a conforming URL implementation will only ever call this
 //   function on individual path components, and never entire paths, augmenting the character set to
 //   include these separators allows this function to be used to implement a URL class that stores
 //   its path components in percent-decoded form.
 //
 // See https://url.spec.whatwg.org/#path-percent-encode-set

 String encodeUriUserInfo(ArrayPtr<const byte> bytes);
 String encodeUriUserInfo(ArrayPtr<const char> bytes);
 // Encode URL userinfo components using the userinfo percent encode set defined by the WHATWG URL
 // specification. Use decodeUriComponent() to decode.
 //
 // Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
 //   decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
 //
 // See https://url.spec.whatwg.org/#userinfo-percent-encode-set

 String encodeWwwForm(ArrayPtr<const byte> bytes);
 String encodeWwwForm(ArrayPtr<const char> bytes);
 EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text);
 // Encode/decode URI components using % escapes and '+' (for spaces) according to the
 // application/x-www-form-urlencoded format defined by the WHATWG URL specification.
 //
 // Note: Like the fragment, path, and userinfo percent-encoding functions above, this function is
 //   not idempotent: we percent-encode '%' signs. However, in this particular case the spec happens
 //   to agree with us!
 //
 // See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer

 struct DecodeUriOptions {
   // Parameter to `decodeBinaryUriComponent()`.

   // This struct is intentionally convertible from bool, in order to maintain backwards
   // compatibility with code written when `decodeBinaryUriComponent()` took a boolean second
   // parameter.
   DecodeUriOptions(bool nulTerminate = false, bool plusToSpace = false)
       : nulTerminate(nulTerminate), plusToSpace(plusToSpace) {}

   bool nulTerminate;
   // Append a terminal NUL byte.

   bool plusToSpace;
   // Convert '+' to ' ' characters before percent decoding. Used to decode
   // application/x-www-form-urlencoded text, such as query strings.
 };
 EncodingResult<Array<byte>> decodeBinaryUriComponent(
     ArrayPtr<const char> text, DecodeUriOptions options = DecodeUriOptions());
 // Decode URI components using % escapes. This is a lower-level interface used to implement both
 // `decodeUriComponent()` and `decodeWwwForm()`

 String encodeCEscape(ArrayPtr<const byte> bytes);
 String encodeCEscape(ArrayPtr<const char> bytes);
 EncodingResult<Array<byte>> decodeBinaryCEscape(
     ArrayPtr<const char> text, bool nulTerminate = false);
 EncodingResult<String> decodeCEscape(ArrayPtr<const char> text);

 String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
 // Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
 // into the output every 72 characters (e.g. for encoding e-mail bodies).

 EncodingResult<Array<byte>> decodeBase64(ArrayPtr<const char> text);
 // Decode base64 text. This function reports errors required by the WHATWG HTML/Infra specs: see
 // https://html.spec.whatwg.org/multipage/webappapis.html#atob for details.

 String encodeBase64Url(ArrayPtr<const byte> bytes);
 // Encode the given bytes as URL-safe base64 text. (RFC 4648, section 5)

 // =======================================================================================
 // inline implementation details

 namespace _ {  // private

 template <typename T>
 NullableValue<T> readMaybe(EncodingResult<T>&& value) {
   if (value.hadErrors) {
     return nullptr;
   } else {
     return kj::mv(value);
   }
 }

 template <typename T>
 T* readMaybe(EncodingResult<T>& value) {
   if (value.hadErrors) {
     return nullptr;
   } else {
     return &value;
   }
 }

 template <typename T>
 const T* readMaybe(const EncodingResult<T>& value) {
   if (value.hadErrors) {
     return nullptr;
   } else {
     return &value;
   }
 }

 String encodeCEscapeImpl(ArrayPtr<const byte> bytes, bool isBinary);

 }  // namespace _ (private)

 inline String encodeUriComponent(ArrayPtr<const char> text) {
   return encodeUriComponent(text.asBytes());
 }
 inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
   auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /*.nulTerminate=*/true });
   return { String(result.releaseAsChars()), result.hadErrors };
 }

 inline String encodeUriFragment(ArrayPtr<const char> text) {
   return encodeUriFragment(text.asBytes());
 }
 inline String encodeUriPath(ArrayPtr<const char> text) {
   return encodeUriPath(text.asBytes());
 }
 inline String encodeUriUserInfo(ArrayPtr<const char> text) {
   return encodeUriUserInfo(text.asBytes());
 }

 inline String encodeWwwForm(ArrayPtr<const char> text) {
   return encodeWwwForm(text.asBytes());
 }
 inline EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text) {
   auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /*.nulTerminate=*/true,
                                                                   /*.plusToSpace=*/true });
   return { String(result.releaseAsChars()), result.hadErrors };
 }

 inline String encodeCEscape(ArrayPtr<const char> text) {
   return _::encodeCEscapeImpl(text.asBytes(), false);
 }

 inline String encodeCEscape(ArrayPtr<const byte> bytes) {
   return _::encodeCEscapeImpl(bytes, true);
 }

 inline EncodingResult<String> decodeCEscape(ArrayPtr<const char> text) {
   auto result = decodeBinaryCEscape(text, true);
   return { String(result.releaseAsChars()), result.hadErrors };
 }

 // If you pass a string literal to a function taking ArrayPtr<const char>, it'll include the NUL
 // termintator, which is surprising. Let's add overloads that avoid that. In practice this probably
 // only even matters for encoding-test.c++.

 template <size_t s>
 inline EncodingResult<Array<char16_t>> encodeUtf16(const char (&text)[s], bool nulTerminate=false) {
   return encodeUtf16(arrayPtr(text, s - 1), nulTerminate);
 }
 template <size_t s>
 inline EncodingResult<Array<char32_t>> encodeUtf32(const char (&text)[s], bool nulTerminate=false) {
   return encodeUtf32(arrayPtr(text, s - 1), nulTerminate);
 }
 template <size_t s>
 inline EncodingResult<Array<wchar_t>> encodeWideString(
     const char (&text)[s], bool nulTerminate=false) {
   return encodeWideString(arrayPtr(text, s - 1), nulTerminate);
 }
 template <size_t s>
 inline EncodingResult<String> decodeUtf16(const char16_t (&utf16)[s]) {
   return decodeUtf16(arrayPtr(utf16, s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeUtf32(const char32_t (&utf32)[s]) {
   return decodeUtf32(arrayPtr(utf32, s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeWideString(const wchar_t (&utf32)[s]) {
   return decodeWideString(arrayPtr(utf32, s - 1));
 }
 template <size_t s>
 inline EncodingResult<Array<byte>> decodeHex(const char (&text)[s]) {
   return decodeHex(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline String encodeUriComponent(const char (&text)[s]) {
   return encodeUriComponent(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline Array<byte> decodeBinaryUriComponent(const char (&text)[s]) {
   return decodeBinaryUriComponent(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
   return decodeUriComponent(arrayPtr(text, s-1));
 }
 template <size_t s>
 inline String encodeUriFragment(const char (&text)[s]) {
   return encodeUriFragment(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline String encodeUriPath(const char (&text)[s]) {
   return encodeUriPath(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline String encodeUriUserInfo(const char (&text)[s]) {
   return encodeUriUserInfo(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline String encodeWwwForm(const char (&text)[s]) {
   return encodeWwwForm(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeWwwForm(const char (&text)[s]) {
   return decodeWwwForm(arrayPtr(text, s-1));
 }
 template <size_t s>
 inline String encodeCEscape(const char (&text)[s]) {
   return encodeCEscape(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
   return decodeBinaryCEscape(arrayPtr(text, s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeCEscape(const char (&text)[s]) {
   return decodeCEscape(arrayPtr(text, s-1));
 }
 template <size_t s>
 EncodingResult<Array<byte>> decodeBase64(const char (&text)[s]) {
   return decodeBase64(arrayPtr(text, s - 1));
 }

 #if __cplusplus >= 202000L
 template <size_t s>
 inline EncodingResult<Array<char16_t>> encodeUtf16(const char8_t (&text)[s], bool nulTerminate=false) {
   return encodeUtf16(arrayPtr(reinterpret_cast<const char*>(text), s - 1), nulTerminate);
 }
 template <size_t s>
 inline EncodingResult<Array<char32_t>> encodeUtf32(const char8_t (&text)[s], bool nulTerminate=false) {
   return encodeUtf32(arrayPtr(reinterpret_cast<const char*>(text), s - 1), nulTerminate);
 }
 template <size_t s>
 inline EncodingResult<Array<wchar_t>> encodeWideString(
     const char8_t (&text)[s], bool nulTerminate=false) {
   return encodeWideString(arrayPtr(reinterpret_cast<const char*>(text), s - 1), nulTerminate);
 }
 template <size_t s>
 inline EncodingResult<Array<byte>> decodeHex(const char8_t (&text)[s]) {
   return decodeHex(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline String encodeUriComponent(const char8_t (&text)[s]) {
   return encodeUriComponent(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline Array<byte> decodeBinaryUriComponent(const char8_t (&text)[s]) {
   return decodeBinaryUriComponent(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeUriComponent(const char8_t (&text)[s]) {
   return decodeUriComponent(arrayPtr(reinterpret_cast<const char*>(text), s-1));
 }
 template <size_t s>
 inline String encodeUriFragment(const char8_t (&text)[s]) {
   return encodeUriFragment(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline String encodeUriPath(const char8_t (&text)[s]) {
   return encodeUriPath(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline String encodeUriUserInfo(const char8_t (&text)[s]) {
   return encodeUriUserInfo(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline String encodeWwwForm(const char8_t (&text)[s]) {
   return encodeWwwForm(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeWwwForm(const char8_t (&text)[s]) {
   return decodeWwwForm(arrayPtr(reinterpret_cast<const char*>(text), s-1));
 }
 template <size_t s>
 inline String encodeCEscape(const char8_t (&text)[s]) {
   return encodeCEscape(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char8_t (&text)[s]) {
   return decodeBinaryCEscape(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 template <size_t s>
 inline EncodingResult<String> decodeCEscape(const char8_t (&text)[s]) {
   return decodeCEscape(arrayPtr(reinterpret_cast<const char*>(text), s-1));
 }
 template <size_t s>
 EncodingResult<Array<byte>> decodeBase64(const char8_t (&text)[s]) {
   return decodeBase64(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
 }
 #endif

 } // namespace kj

 KJ_END_HEADER
	// Copyright (c) 2017 Cloudflare, Inc. and contributors
	// Licensed under the MIT License:
	//
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	// THE SOFTWARE.

	#pragma once
	// Functions for encoding/decoding bytes and text in common formats, including:
	// - UTF-{8,16,32}
	// - Hex
	// - URI encoding
	// - Base64

	#include "string.h"

	KJ_BEGIN_HEADER

	namespace kj {

	template <typename ResultType>
	struct EncodingResult: public ResultType {
	// Equivalent to ResultType (a String or wide-char array) for all intents and purposes, except
	// that the bool `hadErrors` can be inspected to see if any errors were encountered in the input.
	// Each encoding/decoding function that returns this type will "work around" errors in some way,
	// so an application doesn't strictly have to check for errors. E.g. the Unicode functions
	// replace errors with U+FFFD in the output.
	//
	// Through magic, KJ_IF_MAYBE() and KJ_{REQUIRE,ASSERT}_NONNULL() work on EncodingResult<T>
	// exactly if it were a Maybe<T> that is null in case of errors.

	inline EncodingResult(ResultType&& result, bool hadErrors)
	: ResultType(kj::mv(result)), hadErrors(hadErrors) {}

	const bool hadErrors;
	};

	template <typename T>
	inline auto KJ_STRINGIFY(const EncodingResult<T>& value)
	-> decltype(toCharSequence(implicitCast<const T&>(value))) {
	return toCharSequence(implicitCast<const T&>(value));
	}

	EncodingResult<Array<char16_t>> encodeUtf16(ArrayPtr<const char> text, bool nulTerminate = false);
	EncodingResult<Array<char32_t>> encodeUtf32(ArrayPtr<const char> text, bool nulTerminate = false);
	// Convert UTF-8 text (which KJ strings use) to UTF-16 or UTF-32.
	//
	// If `nulTerminate` is true, an extra NUL character will be added to the end of the output.
	//
	// The returned arrays are in platform-native endianness (otherwise they wouldn't really be
	// char16_t / char32_t).
	//
	// Note that the KJ Unicode encoding and decoding functions actually implement
	// [WTF-8 encoding](http://simonsapin.github.io/wtf-8/), which affects how invalid input is
	// handled. See comments on decodeUtf16() for more info.

	EncodingResult<String> decodeUtf16(ArrayPtr<const char16_t> utf16);
	EncodingResult<String> decodeUtf32(ArrayPtr<const char32_t> utf32);
	// Convert UTF-16 or UTF-32 to UTF-8 (which KJ strings use).
	//
	// The input should NOT include a NUL terminator; any NUL characters in the input array will be
	// preserved in the output.
	//
	// The input must be in platform-native endianness. BOMs are NOT recognized by these functions.
	//
	// Note that the KJ Unicode encoding and decoding functions actually implement
	// [WTF-8 encoding](http://simonsapin.github.io/wtf-8/). This means that if you start with an array
	// of char16_t and you pass it through any number of conversions to other Unicode encodings,
	// eventually returning it to UTF-16, all the while ignoring `hadErrors`, you will end up with
	// exactly the same char16_t array you started with, even if the array is not valid UTF-16. This
	// is useful because many real-world systems that were designed for UCS-2 (plain 16-bit Unicode)
	// and later "upgraded" to UTF-16 do not enforce that their UTF-16 is well-formed. For example,
	// file names on Windows NT are encoded using 16-bit characters, without enforcing that the
	// character sequence is valid UTF-16. It is important that programs on Windows be able to handle
	// such filenames, even if they choose to convert the name to UTF-8 for internal processing.
	//
	// Specifically, KJ's Unicode handling allows unpaired surrogate code points to round-trip through
	// UTF-8 and UTF-32. Unpaired surrogates will be flagged as an error (setting `hadErrors` in the
	// result), but will NOT be replaced with the Unicode replacement character as other erroneous
	// sequences would be, but rather encoded as an invalid surrogate codepoint in the target encoding.
	//
	// KJ makes the following guarantees about invalid input:
	// - A round trip from UTF-16 to other encodings and back will produce exactly the original input,
	// with every leg of the trip raising the `hadErrors` flag if the original input was not valid.
	// - A round trip from UTF-8 or UTF-32 to other encodings and back will either produce exactly
	// the original input, or will have replaced some invalid sequences with the Unicode replacement
	// character, U+FFFD. No code units will ever be removed unless they are replaced with U+FFFD,
	// and no code units will ever be added except to encode U+FFFD. If the original input was not
	// valid, the `hadErrors` flag will be raised on the first leg of the trip, and will also be
	// raised on subsequent legs unless all invalid sequences were replaced with U+FFFD (which, after
	// all, is a valid code point).

	EncodingResult<Array<wchar_t>> encodeWideString(
	ArrayPtr<const char> text, bool nulTerminate = false);
	EncodingResult<String> decodeWideString(ArrayPtr<const wchar_t> wide);
	// Encode / decode strings of wchar_t, aka "wide strings". Unfortunately, different platforms have
	// different definitions for wchar_t. For example, on Windows they are 16-bit and encode UTF-16,
	// but on Linux they are 32-bit and encode UTF-32. Some platforms even define wchar_t as 8-bit,
	// encoding UTF-8 (e.g. BeOS did this).
	//
	// KJ assumes that wide strings use the UTF encoding that corresponds to the size of wchar_t on
	// the target platform. So, these functions are simple aliases for encodeUtf/decodeUtf, above
	// (or simply make a copy if wchar_t is 8 bits).

	String encodeHex(ArrayPtr<const byte> bytes);
	EncodingResult<Array<byte>> decodeHex(ArrayPtr<const char> text);
	// Encode/decode bytes as hex strings.

	String encodeUriComponent(ArrayPtr<const byte> bytes);
	String encodeUriComponent(ArrayPtr<const char> bytes);
	EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text);
	// Encode/decode URI components using % escapes for characters listed as "reserved" in RFC 2396.
	// This is the same behavior as JavaScript's `encodeURIComponent()`.
	//
	// See https://tools.ietf.org/html/rfc2396#section-2.3

	String encodeUriFragment(ArrayPtr<const byte> bytes);
	String encodeUriFragment(ArrayPtr<const char> bytes);
	// Encode URL fragment components using the fragment percent encode set defined by the WHATWG URL
	// specification. Use decodeUriComponent() to decode.
	//
	// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
	// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
	//
	// See https://url.spec.whatwg.org/#fragment-percent-encode-set

	String encodeUriPath(ArrayPtr<const byte> bytes);
	String encodeUriPath(ArrayPtr<const char> bytes);
	// Encode URL path components (not entire paths!) using the path percent encode set defined by the
	// WHATWG URL specification. Use decodeUriComponent() to decode.
	//
	// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
	// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
	//
	// Quirk: This percent-encodes '/' and '\' characters as well, which are not actually in the set
	// defined by the WHATWG URL spec. Since a conforming URL implementation will only ever call this
	// function on individual path components, and never entire paths, augmenting the character set to
	// include these separators allows this function to be used to implement a URL class that stores
	// its path components in percent-decoded form.
	//
	// See https://url.spec.whatwg.org/#path-percent-encode-set

	String encodeUriUserInfo(ArrayPtr<const byte> bytes);
	String encodeUriUserInfo(ArrayPtr<const char> bytes);
	// Encode URL userinfo components using the userinfo percent encode set defined by the WHATWG URL
	// specification. Use decodeUriComponent() to decode.
	//
	// Quirk: We also percent-encode the '%' sign itself, because we expect to be called on percent-
	// decoded data. In other words, this function is not idempotent, in contrast to the URL spec.
	//
	// See https://url.spec.whatwg.org/#userinfo-percent-encode-set

	String encodeWwwForm(ArrayPtr<const byte> bytes);
	String encodeWwwForm(ArrayPtr<const char> bytes);
	EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text);
	// Encode/decode URI components using % escapes and '+' (for spaces) according to the
	// application/x-www-form-urlencoded format defined by the WHATWG URL specification.
	//
	// Note: Like the fragment, path, and userinfo percent-encoding functions above, this function is
	// not idempotent: we percent-encode '%' signs. However, in this particular case the spec happens
	// to agree with us!
	//
	// See https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer

	struct DecodeUriOptions {
	// Parameter to `decodeBinaryUriComponent()`.

	// This struct is intentionally convertible from bool, in order to maintain backwards
	// compatibility with code written when `decodeBinaryUriComponent()` took a boolean second
	// parameter.
	DecodeUriOptions(bool nulTerminate = false, bool plusToSpace = false)
	: nulTerminate(nulTerminate), plusToSpace(plusToSpace) {}

	bool nulTerminate;
	// Append a terminal NUL byte.

	bool plusToSpace;
	// Convert '+' to ' ' characters before percent decoding. Used to decode
	// application/x-www-form-urlencoded text, such as query strings.
	};
	EncodingResult<Array<byte>> decodeBinaryUriComponent(
	ArrayPtr<const char> text, DecodeUriOptions options = DecodeUriOptions());
	// Decode URI components using % escapes. This is a lower-level interface used to implement both
	// `decodeUriComponent()` and `decodeWwwForm()`

	String encodeCEscape(ArrayPtr<const byte> bytes);
	String encodeCEscape(ArrayPtr<const char> bytes);
	EncodingResult<Array<byte>> decodeBinaryCEscape(
	ArrayPtr<const char> text, bool nulTerminate = false);
	EncodingResult<String> decodeCEscape(ArrayPtr<const char> text);

	String encodeBase64(ArrayPtr<const byte> bytes, bool breakLines = false);
	// Encode the given bytes as base64 text. If `breakLines` is true, line breaks will be inserted
	// into the output every 72 characters (e.g. for encoding e-mail bodies).

	EncodingResult<Array<byte>> decodeBase64(ArrayPtr<const char> text);
	// Decode base64 text. This function reports errors required by the WHATWG HTML/Infra specs: see
	// https://html.spec.whatwg.org/multipage/webappapis.html#atob for details.

	String encodeBase64Url(ArrayPtr<const byte> bytes);
	// Encode the given bytes as URL-safe base64 text. (RFC 4648, section 5)

	// =======================================================================================
	// inline implementation details

	namespace _ { // private

	template <typename T>
	NullableValue<T> readMaybe(EncodingResult<T>&& value) {
	if (value.hadErrors) {
	return nullptr;
	} else {
	return kj::mv(value);
	}
	}

	template <typename T>
	T* readMaybe(EncodingResult<T>& value) {
	if (value.hadErrors) {
	return nullptr;
	} else {
	return &value;
	}
	}

	template <typename T>
	const T* readMaybe(const EncodingResult<T>& value) {
	if (value.hadErrors) {
	return nullptr;
	} else {
	return &value;
	}
	}

	String encodeCEscapeImpl(ArrayPtr<const byte> bytes, bool isBinary);

	} // namespace _ (private)

	inline String encodeUriComponent(ArrayPtr<const char> text) {
	return encodeUriComponent(text.asBytes());
	}
	inline EncodingResult<String> decodeUriComponent(ArrayPtr<const char> text) {
	auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /.nulTerminate=/true });
	return { String(result.releaseAsChars()), result.hadErrors };
	}

	inline String encodeUriFragment(ArrayPtr<const char> text) {
	return encodeUriFragment(text.asBytes());
	}
	inline String encodeUriPath(ArrayPtr<const char> text) {
	return encodeUriPath(text.asBytes());
	}
	inline String encodeUriUserInfo(ArrayPtr<const char> text) {
	return encodeUriUserInfo(text.asBytes());
	}

	inline String encodeWwwForm(ArrayPtr<const char> text) {
	return encodeWwwForm(text.asBytes());
	}
	inline EncodingResult<String> decodeWwwForm(ArrayPtr<const char> text) {
	auto result = decodeBinaryUriComponent(text, DecodeUriOptions { /.nulTerminate=/true,
	/.plusToSpace=/true });
	return { String(result.releaseAsChars()), result.hadErrors };
	}

	inline String encodeCEscape(ArrayPtr<const char> text) {
	return _::encodeCEscapeImpl(text.asBytes(), false);
	}

	inline String encodeCEscape(ArrayPtr<const byte> bytes) {
	return _::encodeCEscapeImpl(bytes, true);
	}

	inline EncodingResult<String> decodeCEscape(ArrayPtr<const char> text) {
	auto result = decodeBinaryCEscape(text, true);
	return { String(result.releaseAsChars()), result.hadErrors };
	}

	// If you pass a string literal to a function taking ArrayPtr<const char>, it'll include the NUL
	// termintator, which is surprising. Let's add overloads that avoid that. In practice this probably
	// only even matters for encoding-test.c++.

	template <size_t s>
	inline EncodingResult<Array<char16_t>> encodeUtf16(const char (&text)[s], bool nulTerminate=false) {
	return encodeUtf16(arrayPtr(text, s - 1), nulTerminate);
	}
	template <size_t s>
	inline EncodingResult<Array<char32_t>> encodeUtf32(const char (&text)[s], bool nulTerminate=false) {
	return encodeUtf32(arrayPtr(text, s - 1), nulTerminate);
	}
	template <size_t s>
	inline EncodingResult<Array<wchar_t>> encodeWideString(
	const char (&text)[s], bool nulTerminate=false) {
	return encodeWideString(arrayPtr(text, s - 1), nulTerminate);
	}
	template <size_t s>
	inline EncodingResult<String> decodeUtf16(const char16_t (&utf16)[s]) {
	return decodeUtf16(arrayPtr(utf16, s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeUtf32(const char32_t (&utf32)[s]) {
	return decodeUtf32(arrayPtr(utf32, s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeWideString(const wchar_t (&utf32)[s]) {
	return decodeWideString(arrayPtr(utf32, s - 1));
	}
	template <size_t s>
	inline EncodingResult<Array<byte>> decodeHex(const char (&text)[s]) {
	return decodeHex(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline String encodeUriComponent(const char (&text)[s]) {
	return encodeUriComponent(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline Array<byte> decodeBinaryUriComponent(const char (&text)[s]) {
	return decodeBinaryUriComponent(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeUriComponent(const char (&text)[s]) {
	return decodeUriComponent(arrayPtr(text, s-1));
	}
	template <size_t s>
	inline String encodeUriFragment(const char (&text)[s]) {
	return encodeUriFragment(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline String encodeUriPath(const char (&text)[s]) {
	return encodeUriPath(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline String encodeUriUserInfo(const char (&text)[s]) {
	return encodeUriUserInfo(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline String encodeWwwForm(const char (&text)[s]) {
	return encodeWwwForm(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeWwwForm(const char (&text)[s]) {
	return decodeWwwForm(arrayPtr(text, s-1));
	}
	template <size_t s>
	inline String encodeCEscape(const char (&text)[s]) {
	return encodeCEscape(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char (&text)[s]) {
	return decodeBinaryCEscape(arrayPtr(text, s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeCEscape(const char (&text)[s]) {
	return decodeCEscape(arrayPtr(text, s-1));
	}
	template <size_t s>
	EncodingResult<Array<byte>> decodeBase64(const char (&text)[s]) {
	return decodeBase64(arrayPtr(text, s - 1));
	}

	#if __cplusplus >= 202000L
	template <size_t s>
	inline EncodingResult<Array<char16_t>> encodeUtf16(const char8_t (&text)[s], bool nulTerminate=false) {
	return encodeUtf16(arrayPtr(reinterpret_cast<const char*>(text), s - 1), nulTerminate);
	}
	template <size_t s>
	inline EncodingResult<Array<char32_t>> encodeUtf32(const char8_t (&text)[s], bool nulTerminate=false) {
	return encodeUtf32(arrayPtr(reinterpret_cast<const char*>(text), s - 1), nulTerminate);
	}
	template <size_t s>
	inline EncodingResult<Array<wchar_t>> encodeWideString(
	const char8_t (&text)[s], bool nulTerminate=false) {
	return encodeWideString(arrayPtr(reinterpret_cast<const char*>(text), s - 1), nulTerminate);
	}
	template <size_t s>
	inline EncodingResult<Array<byte>> decodeHex(const char8_t (&text)[s]) {
	return decodeHex(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline String encodeUriComponent(const char8_t (&text)[s]) {
	return encodeUriComponent(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline Array<byte> decodeBinaryUriComponent(const char8_t (&text)[s]) {
	return decodeBinaryUriComponent(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeUriComponent(const char8_t (&text)[s]) {
	return decodeUriComponent(arrayPtr(reinterpret_cast<const char*>(text), s-1));
	}
	template <size_t s>
	inline String encodeUriFragment(const char8_t (&text)[s]) {
	return encodeUriFragment(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline String encodeUriPath(const char8_t (&text)[s]) {
	return encodeUriPath(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline String encodeUriUserInfo(const char8_t (&text)[s]) {
	return encodeUriUserInfo(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline String encodeWwwForm(const char8_t (&text)[s]) {
	return encodeWwwForm(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeWwwForm(const char8_t (&text)[s]) {
	return decodeWwwForm(arrayPtr(reinterpret_cast<const char*>(text), s-1));
	}
	template <size_t s>
	inline String encodeCEscape(const char8_t (&text)[s]) {
	return encodeCEscape(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline EncodingResult<Array<byte>> decodeBinaryCEscape(const char8_t (&text)[s]) {
	return decodeBinaryCEscape(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	template <size_t s>
	inline EncodingResult<String> decodeCEscape(const char8_t (&text)[s]) {
	return decodeCEscape(arrayPtr(reinterpret_cast<const char*>(text), s-1));
	}
	template <size_t s>
	EncodingResult<Array<byte>> decodeBase64(const char8_t (&text)[s]) {
	return decodeBase64(arrayPtr(reinterpret_cast<const char*>(text), s - 1));
	}
	#endif

	} // namespace kj

	KJ_END_HEADER