c++/src/kj/compat/url.c++ - toolchain/capnproto - Git at Google

 // Copyright (c) 2017 Cloudflare, Inc. and contributors
 // Licensed under the MIT License:
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.

 #include "url.h"
 #include <kj/encoding.h>
 #include <kj/parse/char.h>
 #include <kj/debug.h>
 #include <stdlib.h>

 namespace kj {

 namespace {

 constexpr auto ALPHAS = parse::charRange('a', 'z').orRange('A', 'Z');
 constexpr auto DIGITS = parse::charRange('0', '9');

 constexpr auto END_AUTHORITY = parse::anyOfChars("/?#");

 // Authority, path, and query components can typically be terminated by the start of a fragment.
 // However, fragments are disallowed in HTTP_REQUEST and HTTP_PROXY_REQUEST contexts. As a quirk, we
 // allow the fragment start character ('#') to live unescaped in path and query components. We do
 // not currently allow it in the authority component, because our parser would reject it as a host
 // character anyway.

 const parse::CharGroup_& getEndPathPart(Url::Context context) {
   static constexpr auto END_PATH_PART_HREF = parse::anyOfChars("/?#");
   static constexpr auto END_PATH_PART_REQUEST = parse::anyOfChars("/?");

   switch (context) {
     case Url::REMOTE_HREF:        return END_PATH_PART_HREF;
     case Url::HTTP_PROXY_REQUEST: return END_PATH_PART_REQUEST;
     case Url::HTTP_REQUEST:       return END_PATH_PART_REQUEST;
   }

   KJ_UNREACHABLE;
 }

 const parse::CharGroup_& getEndQueryPart(Url::Context context) {
   static constexpr auto END_QUERY_PART_HREF = parse::anyOfChars("&#");
   static constexpr auto END_QUERY_PART_REQUEST = parse::anyOfChars("&");

   switch (context) {
     case Url::REMOTE_HREF:        return END_QUERY_PART_HREF;
     case Url::HTTP_PROXY_REQUEST: return END_QUERY_PART_REQUEST;
     case Url::HTTP_REQUEST:       return END_QUERY_PART_REQUEST;
   }

   KJ_UNREACHABLE;
 }

 constexpr auto SCHEME_CHARS = ALPHAS.orGroup(DIGITS).orAny("+-.");
 constexpr auto NOT_SCHEME_CHARS = SCHEME_CHARS.invert();

 constexpr auto HOST_CHARS = ALPHAS.orGroup(DIGITS).orAny(".-:[]_");
 // [] is for ipv6 literals.
 // _ is not allowed in domain names, but the WHATWG URL spec allows it in hostnames, so we do, too.
 // TODO(someday): The URL spec actually allows a lot more than just '_', and requires nameprepping
 //   to Punycode. We'll have to decide how we want to deal with all that.

 void toLower(String& text) {
   for (char& c: text) {
     if ('A' <= c && c <= 'Z') {
       c += 'a' - 'A';
     }
   }
 }

 Maybe<ArrayPtr<const char>> trySplit(StringPtr& text, char c) {
   KJ_IF_MAYBE(pos, text.findFirst(c)) {
     ArrayPtr<const char> result = text.slice(0, *pos);
     text = text.slice(*pos + 1);
     return result;
   } else {
     return nullptr;
   }
 }

 Maybe<ArrayPtr<const char>> trySplit(ArrayPtr<const char>& text, char c) {
   for (auto i: kj::indices(text)) {
     if (text[i] == c) {
       ArrayPtr<const char> result = text.slice(0, i);
       text = text.slice(i + 1, text.size());
       return result;
     }
   }
   return nullptr;
 }

 ArrayPtr<const char> split(StringPtr& text, const parse::CharGroup_& chars) {
   for (auto i: kj::indices(text)) {
     if (chars.contains(text[i])) {
       ArrayPtr<const char> result = text.slice(0, i);
       text = text.slice(i);
       return result;
     }
   }
   auto result = text.asArray();
   text = "";
   return result;
 }

 String percentDecode(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
   if (options.percentDecode) {
     auto result = decodeUriComponent(text);
     if (result.hadErrors) hadErrors = true;
     return kj::mv(result);
   }
   return kj::str(text);
 }

 String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
   if (options.percentDecode) {
     auto result = decodeWwwForm(text);
     if (result.hadErrors) hadErrors = true;
     return kj::mv(result);
   }
   return kj::str(text);
 }

 }  // namespace

 Url::~Url() noexcept(false) {}

 Url Url::clone() const {
   return {
     kj::str(scheme),
     userInfo.map([](const UserInfo& ui) -> UserInfo {
       return {
         kj::str(ui.username),
         ui.password.map([](const String& s) { return kj::str(s); })
       };
     }),
     kj::str(host),
     KJ_MAP(part, path) { return kj::str(part); },
     hasTrailingSlash,
     KJ_MAP(param, query) -> QueryParam {
       // Preserve the "allocated-ness" of `param.value` with this careful copy.
       return { kj::str(param.name), param.value.begin() == nullptr ? kj::String()
                                                                    : kj::str(param.value) };
     },
     fragment.map([](const String& s) { return kj::str(s); }),
     options
   };
 }

 Url Url::parse(StringPtr url, Context context, Options options) {
   return KJ_REQUIRE_NONNULL(tryParse(url, context, options), "invalid URL", url);
 }

 Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
   Url result;
   result.options = options;
   bool err = false;  // tracks percent-decoding errors

   auto& END_PATH_PART = getEndPathPart(context);
   auto& END_QUERY_PART = getEndQueryPart(context);

   if (context == HTTP_REQUEST) {
     if (!text.startsWith("/")) {
       return nullptr;
     }
   } else {
     KJ_IF_MAYBE(scheme, trySplit(text, ':')) {
       result.scheme = kj::str(*scheme);
     } else {
       // missing scheme
       return nullptr;
     }
     toLower(result.scheme);
     if (result.scheme.size() == 0 ||
         !ALPHAS.contains(result.scheme[0]) ||
         !SCHEME_CHARS.containsAll(result.scheme.slice(1))) {
       // bad scheme
       return nullptr;
     }

     if (!text.startsWith("//")) {
       // We require an authority (hostname) part.
       return nullptr;
     }
     text = text.slice(2);

     {
       auto authority = split(text, END_AUTHORITY);

       KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
         if (context != REMOTE_HREF) {
           // No user/pass allowed here.
           return nullptr;
         }
         KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
           result.userInfo = UserInfo {
             percentDecode(*username, err, options),
             percentDecode(*userpass, err, options)
           };
         } else {
           result.userInfo = UserInfo {
             percentDecode(*userpass, err, options),
             nullptr
           };
         }
       }

       result.host = percentDecode(authority, err, options);
       if (!HOST_CHARS.containsAll(result.host)) return nullptr;
       toLower(result.host);
     }
   }

   while (text.startsWith("/")) {
     text = text.slice(1);
     auto part = split(text, END_PATH_PART);
     if (part.size() == 2 && part[0] == '.' && part[1] == '.') {
       if (result.path.size() != 0) {
         result.path.removeLast();
       }
       result.hasTrailingSlash = true;
     } else if ((part.size() == 0 && (!options.allowEmpty || text.size() == 0)) ||
                (part.size() == 1 && part[0] == '.')) {
       // Collapse consecutive slashes and "/./".
       result.hasTrailingSlash = true;
     } else {
       result.path.add(percentDecode(part, err, options));
       result.hasTrailingSlash = false;
     }
   }

   if (text.startsWith("?")) {
     do {
       text = text.slice(1);
       auto part = split(text, END_QUERY_PART);

       if (part.size() > 0 || options.allowEmpty) {
         KJ_IF_MAYBE(key, trySplit(part, '=')) {
           result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
                                         percentDecodeQuery(part, err, options) });
         } else {
           result.query.add(QueryParam { percentDecodeQuery(part, err, options), nullptr });
         }
       }
     } while (text.startsWith("&"));
   }

   if (text.startsWith("#")) {
     if (context != REMOTE_HREF) {
       // No fragment allowed here.
       return nullptr;
     }
     result.fragment = percentDecode(text.slice(1), err, options);
   } else {
     // We should have consumed everything.
     KJ_ASSERT(text.size() == 0);
   }

   if (err) return nullptr;

   return kj::mv(result);
 }

 Url Url::parseRelative(StringPtr url) const {
   return KJ_REQUIRE_NONNULL(tryParseRelative(url), "invalid relative URL", url);
 }

 Maybe<Url> Url::tryParseRelative(StringPtr text) const {
   if (text.size() == 0) return clone();

   Url result;
   result.options = options;
   bool err = false;  // tracks percent-decoding errors

   auto& END_PATH_PART = getEndPathPart(Url::REMOTE_HREF);
   auto& END_QUERY_PART = getEndQueryPart(Url::REMOTE_HREF);

   // scheme
   {
     bool gotScheme = false;
     for (auto i: kj::indices(text)) {
       if (text[i] == ':') {
         // found valid scheme
         result.scheme = kj::str(text.slice(0, i));
         text = text.slice(i + 1);
         gotScheme = true;
         break;
       } else if (NOT_SCHEME_CHARS.contains(text[i])) {
         // no scheme
         break;
       }
     }
     if (!gotScheme) {
       // copy scheme
       result.scheme = kj::str(this->scheme);
     }
   }

   // authority
   bool hadNewAuthority = text.startsWith("//");
   if (hadNewAuthority) {
     text = text.slice(2);

     auto authority = split(text, END_AUTHORITY);

     KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
       KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
         result.userInfo = UserInfo {
           percentDecode(*username, err, options),
           percentDecode(*userpass, err, options)
         };
       } else {
         result.userInfo = UserInfo {
           percentDecode(*userpass, err, options),
           nullptr
         };
       }
     }

     result.host = percentDecode(authority, err, options);
     if (!HOST_CHARS.containsAll(result.host)) return nullptr;
     toLower(result.host);
   } else {
     // copy authority
     result.host = kj::str(this->host);
     result.userInfo = this->userInfo.map([](const UserInfo& userInfo) {
       return UserInfo {
         kj::str(userInfo.username),
         userInfo.password.map([](const String& password) { return kj::str(password); }),
       };
     });
   }

   // path
   bool hadNewPath = text.size() > 0 && text[0] != '?' && text[0] != '#';
   if (hadNewPath) {
     // There's a new path.

     if (text[0] == '/') {
       // New path is absolute, so don't copy the old path.
       text = text.slice(1);
       result.hasTrailingSlash = true;
     } else if (this->path.size() > 0) {
       // New path is relative, so start from the old path, dropping everything after the last
       // slash.
       auto slice = this->path.slice(0, this->path.size() - (this->hasTrailingSlash ? 0 : 1));
       result.path = KJ_MAP(part, slice) { return kj::str(part); };
       result.hasTrailingSlash = true;
     }

     for (;;) {
       auto part = split(text, END_PATH_PART);
       if (part.size() == 2 && part[0] == '.' && part[1] == '.') {
         if (result.path.size() != 0) {
           result.path.removeLast();
         }
         result.hasTrailingSlash = true;
       } else if (part.size() == 0 || (part.size() == 1 && part[0] == '.')) {
         // Collapse consecutive slashes and "/./".
         result.hasTrailingSlash = true;
       } else {
         result.path.add(percentDecode(part, err, options));
         result.hasTrailingSlash = false;
       }

       if (!text.startsWith("/")) break;
       text = text.slice(1);
     }
   } else if (!hadNewAuthority) {
     // copy path
     result.path = KJ_MAP(part, this->path) { return kj::str(part); };
     result.hasTrailingSlash = this->hasTrailingSlash;
   }

   if (text.startsWith("?")) {
     do {
       text = text.slice(1);
       auto part = split(text, END_QUERY_PART);

       if (part.size() > 0) {
         KJ_IF_MAYBE(key, trySplit(part, '=')) {
           result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
                                         percentDecodeQuery(part, err, options) });
         } else {
           result.query.add(QueryParam { percentDecodeQuery(part, err, options),
                                         nullptr });
         }
       }
     } while (text.startsWith("&"));
   } else if (!hadNewAuthority && !hadNewPath) {
     // copy query
     result.query = KJ_MAP(param, this->query) -> QueryParam {
       // Preserve the "allocated-ness" of `param.value` with this careful copy.
       return { kj::str(param.name), param.value.begin() == nullptr ? kj::String()
                                                                    : kj::str(param.value) };
     };
   }

   if (text.startsWith("#")) {
     result.fragment = percentDecode(text.slice(1), err, options);
   } else {
     // We should have consumed everything.
     KJ_ASSERT(text.size() == 0);
   }

   if (err) return nullptr;

   return kj::mv(result);
 }

 String Url::toString(Context context) const {
   Vector<char> chars(128);

   if (context != HTTP_REQUEST) {
     chars.addAll(scheme);
     chars.addAll(StringPtr("://"));

     if (context == REMOTE_HREF) {
       KJ_IF_MAYBE(user, userInfo) {
         chars.addAll(options.percentDecode ? encodeUriUserInfo(user->username)
                                           : kj::str(user->username));
         KJ_IF_MAYBE(pass, user->password) {
           chars.add(':');
           chars.addAll(options.percentDecode ? encodeUriUserInfo(*pass) : kj::str(*pass));
         }
         chars.add('@');
       }
     }

     // RFC3986 specifies that hosts can contain percent-encoding escapes while suggesting that
     // they should only be used for UTF-8 sequences. However, the DNS standard specifies a
     // different way to encode Unicode into domain names and doesn't permit any characters which
     // would need to be escaped. Meanwhile, encodeUriComponent() here would incorrectly try to
     // escape colons and brackets (e.g. around ipv6 literal addresses). So, instead, we throw if
     // the host is invalid.
     if (HOST_CHARS.containsAll(host)) {
       chars.addAll(host);
     } else {
       KJ_FAIL_REQUIRE("invalid hostname when stringifying URL", host) {
         chars.addAll(StringPtr("invalid-host"));
         break;
       }
     }
   }

   for (auto& pathPart: path) {
     // Protect against path injection.
     KJ_REQUIRE((pathPart != "" || options.allowEmpty) && pathPart != "." && pathPart != "..",
                "invalid name in URL path", path) {
       continue;
     }
     chars.add('/');
     chars.addAll(options.percentDecode ? encodeUriPath(pathPart) : kj::str(pathPart));
   }
   if (hasTrailingSlash || (path.size() == 0 && context == HTTP_REQUEST)) {
     chars.add('/');
   }

   bool first = true;
   for (auto& param: query) {
     chars.add(first ? '?' : '&');
     first = false;
     chars.addAll(options.percentDecode ? encodeWwwForm(param.name) : kj::str(param.name));
     if (param.value.begin() != nullptr) {
       chars.add('=');
       chars.addAll(options.percentDecode ? encodeWwwForm(param.value) : kj::str(param.value));
     }
   }

   if (context == REMOTE_HREF) {
     KJ_IF_MAYBE(f, fragment) {
       chars.add('#');
       chars.addAll(options.percentDecode ? encodeUriFragment(*f) : kj::str(*f));
     }
   }

   chars.add('\0');
   return String(chars.releaseAsArray());
 }

 } // namespace kj
	// Copyright (c) 2017 Cloudflare, Inc. and contributors
	// Licensed under the MIT License:
	//
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	// THE SOFTWARE.

	#include "url.h"
	#include <kj/encoding.h>
	#include <kj/parse/char.h>
	#include <kj/debug.h>
	#include <stdlib.h>

	namespace kj {

	namespace {

	constexpr auto ALPHAS = parse::charRange('a', 'z').orRange('A', 'Z');
	constexpr auto DIGITS = parse::charRange('0', '9');

	constexpr auto END_AUTHORITY = parse::anyOfChars("/?#");

	// Authority, path, and query components can typically be terminated by the start of a fragment.
	// However, fragments are disallowed in HTTP_REQUEST and HTTP_PROXY_REQUEST contexts. As a quirk, we
	// allow the fragment start character ('#') to live unescaped in path and query components. We do
	// not currently allow it in the authority component, because our parser would reject it as a host
	// character anyway.

	const parse::CharGroup_& getEndPathPart(Url::Context context) {
	static constexpr auto END_PATH_PART_HREF = parse::anyOfChars("/?#");
	static constexpr auto END_PATH_PART_REQUEST = parse::anyOfChars("/?");

	switch (context) {
	case Url::REMOTE_HREF: return END_PATH_PART_HREF;
	case Url::HTTP_PROXY_REQUEST: return END_PATH_PART_REQUEST;
	case Url::HTTP_REQUEST: return END_PATH_PART_REQUEST;
	}

	KJ_UNREACHABLE;
	}

	const parse::CharGroup_& getEndQueryPart(Url::Context context) {
	static constexpr auto END_QUERY_PART_HREF = parse::anyOfChars("&#");
	static constexpr auto END_QUERY_PART_REQUEST = parse::anyOfChars("&");

	switch (context) {
	case Url::REMOTE_HREF: return END_QUERY_PART_HREF;
	case Url::HTTP_PROXY_REQUEST: return END_QUERY_PART_REQUEST;
	case Url::HTTP_REQUEST: return END_QUERY_PART_REQUEST;
	}

	KJ_UNREACHABLE;
	}

	constexpr auto SCHEME_CHARS = ALPHAS.orGroup(DIGITS).orAny("+-.");
	constexpr auto NOT_SCHEME_CHARS = SCHEME_CHARS.invert();

	constexpr auto HOST_CHARS = ALPHAS.orGroup(DIGITS).orAny(".-:[]_");
	// [] is for ipv6 literals.
	// _ is not allowed in domain names, but the WHATWG URL spec allows it in hostnames, so we do, too.
	// TODO(someday): The URL spec actually allows a lot more than just '_', and requires nameprepping
	// to Punycode. We'll have to decide how we want to deal with all that.

	void toLower(String& text) {
	for (char& c: text) {
	if ('A' <= c && c <= 'Z') {
	c += 'a' - 'A';
	}
	}
	}

	Maybe<ArrayPtr<const char>> trySplit(StringPtr& text, char c) {
	KJ_IF_MAYBE(pos, text.findFirst(c)) {
	ArrayPtr<const char> result = text.slice(0, *pos);
	text = text.slice(*pos + 1);
	return result;
	} else {
	return nullptr;
	}
	}

	Maybe<ArrayPtr<const char>> trySplit(ArrayPtr<const char>& text, char c) {
	for (auto i: kj::indices(text)) {
	if (text[i] == c) {
	ArrayPtr<const char> result = text.slice(0, i);
	text = text.slice(i + 1, text.size());
	return result;
	}
	}
	return nullptr;
	}

	ArrayPtr<const char> split(StringPtr& text, const parse::CharGroup_& chars) {
	for (auto i: kj::indices(text)) {
	if (chars.contains(text[i])) {
	ArrayPtr<const char> result = text.slice(0, i);
	text = text.slice(i);
	return result;
	}
	}
	auto result = text.asArray();
	text = "";
	return result;
	}

	String percentDecode(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
	if (options.percentDecode) {
	auto result = decodeUriComponent(text);
	if (result.hadErrors) hadErrors = true;
	return kj::mv(result);
	}
	return kj::str(text);
	}

	String percentDecodeQuery(ArrayPtr<const char> text, bool& hadErrors, const Url::Options& options) {
	if (options.percentDecode) {
	auto result = decodeWwwForm(text);
	if (result.hadErrors) hadErrors = true;
	return kj::mv(result);
	}
	return kj::str(text);
	}

	} // namespace

	Url::~Url() noexcept(false) {}

	Url Url::clone() const {
	return {
	kj::str(scheme),
	userInfo.map([](const UserInfo& ui) -> UserInfo {
	return {
	kj::str(ui.username),
	ui.password.map([](const String& s) { return kj::str(s); })
	};
	}),
	kj::str(host),
	KJ_MAP(part, path) { return kj::str(part); },
	hasTrailingSlash,
	KJ_MAP(param, query) -> QueryParam {
	// Preserve the "allocated-ness" of `param.value` with this careful copy.
	return { kj::str(param.name), param.value.begin() == nullptr ? kj::String()
	: kj::str(param.value) };
	},
	fragment.map([](const String& s) { return kj::str(s); }),
	options
	};
	}

	Url Url::parse(StringPtr url, Context context, Options options) {
	return KJ_REQUIRE_NONNULL(tryParse(url, context, options), "invalid URL", url);
	}

	Maybe<Url> Url::tryParse(StringPtr text, Context context, Options options) {
	Url result;
	result.options = options;
	bool err = false; // tracks percent-decoding errors

	auto& END_PATH_PART = getEndPathPart(context);
	auto& END_QUERY_PART = getEndQueryPart(context);

	if (context == HTTP_REQUEST) {
	if (!text.startsWith("/")) {
	return nullptr;
	}
	} else {
	KJ_IF_MAYBE(scheme, trySplit(text, ':')) {
	result.scheme = kj::str(*scheme);
	} else {
	// missing scheme
	return nullptr;
	}
	toLower(result.scheme);
	if (result.scheme.size() == 0 \|\|
	!ALPHAS.contains(result.scheme[0]) \|\|
	!SCHEME_CHARS.containsAll(result.scheme.slice(1))) {
	// bad scheme
	return nullptr;
	}

	if (!text.startsWith("//")) {
	// We require an authority (hostname) part.
	return nullptr;
	}
	text = text.slice(2);

	{
	auto authority = split(text, END_AUTHORITY);

	KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
	if (context != REMOTE_HREF) {
	// No user/pass allowed here.
	return nullptr;
	}
	KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
	result.userInfo = UserInfo {
	percentDecode(*username, err, options),
	percentDecode(*userpass, err, options)
	};
	} else {
	result.userInfo = UserInfo {
	percentDecode(*userpass, err, options),
	nullptr
	};
	}
	}

	result.host = percentDecode(authority, err, options);
	if (!HOST_CHARS.containsAll(result.host)) return nullptr;
	toLower(result.host);
	}
	}

	while (text.startsWith("/")) {
	text = text.slice(1);
	auto part = split(text, END_PATH_PART);
	if (part.size() == 2 && part[0] == '.' && part[1] == '.') {
	if (result.path.size() != 0) {
	result.path.removeLast();
	}
	result.hasTrailingSlash = true;
	} else if ((part.size() == 0 && (!options.allowEmpty \|\| text.size() == 0)) \|\|
	(part.size() == 1 && part[0] == '.')) {
	// Collapse consecutive slashes and "/./".
	result.hasTrailingSlash = true;
	} else {
	result.path.add(percentDecode(part, err, options));
	result.hasTrailingSlash = false;
	}
	}

	if (text.startsWith("?")) {
	do {
	text = text.slice(1);
	auto part = split(text, END_QUERY_PART);

	if (part.size() > 0 \|\| options.allowEmpty) {
	KJ_IF_MAYBE(key, trySplit(part, '=')) {
	result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
	percentDecodeQuery(part, err, options) });
	} else {
	result.query.add(QueryParam { percentDecodeQuery(part, err, options), nullptr });
	}
	}
	} while (text.startsWith("&"));
	}

	if (text.startsWith("#")) {
	if (context != REMOTE_HREF) {
	// No fragment allowed here.
	return nullptr;
	}
	result.fragment = percentDecode(text.slice(1), err, options);
	} else {
	// We should have consumed everything.
	KJ_ASSERT(text.size() == 0);
	}

	if (err) return nullptr;

	return kj::mv(result);
	}

	Url Url::parseRelative(StringPtr url) const {
	return KJ_REQUIRE_NONNULL(tryParseRelative(url), "invalid relative URL", url);
	}

	Maybe<Url> Url::tryParseRelative(StringPtr text) const {
	if (text.size() == 0) return clone();

	Url result;
	result.options = options;
	bool err = false; // tracks percent-decoding errors

	auto& END_PATH_PART = getEndPathPart(Url::REMOTE_HREF);
	auto& END_QUERY_PART = getEndQueryPart(Url::REMOTE_HREF);

	// scheme
	{
	bool gotScheme = false;
	for (auto i: kj::indices(text)) {
	if (text[i] == ':') {
	// found valid scheme
	result.scheme = kj::str(text.slice(0, i));
	text = text.slice(i + 1);
	gotScheme = true;
	break;
	} else if (NOT_SCHEME_CHARS.contains(text[i])) {
	// no scheme
	break;
	}
	}
	if (!gotScheme) {
	// copy scheme
	result.scheme = kj::str(this->scheme);
	}
	}

	// authority
	bool hadNewAuthority = text.startsWith("//");
	if (hadNewAuthority) {
	text = text.slice(2);

	auto authority = split(text, END_AUTHORITY);

	KJ_IF_MAYBE(userpass, trySplit(authority, '@')) {
	KJ_IF_MAYBE(username, trySplit(*userpass, ':')) {
	result.userInfo = UserInfo {
	percentDecode(*username, err, options),
	percentDecode(*userpass, err, options)
	};
	} else {
	result.userInfo = UserInfo {
	percentDecode(*userpass, err, options),
	nullptr
	};
	}
	}

	result.host = percentDecode(authority, err, options);
	if (!HOST_CHARS.containsAll(result.host)) return nullptr;
	toLower(result.host);
	} else {
	// copy authority
	result.host = kj::str(this->host);
	result.userInfo = this->userInfo.map([](const UserInfo& userInfo) {
	return UserInfo {
	kj::str(userInfo.username),
	userInfo.password.map([](const String& password) { return kj::str(password); }),
	};
	});
	}

	// path
	bool hadNewPath = text.size() > 0 && text[0] != '?' && text[0] != '#';
	if (hadNewPath) {
	// There's a new path.

	if (text[0] == '/') {
	// New path is absolute, so don't copy the old path.
	text = text.slice(1);
	result.hasTrailingSlash = true;
	} else if (this->path.size() > 0) {
	// New path is relative, so start from the old path, dropping everything after the last
	// slash.
	auto slice = this->path.slice(0, this->path.size() - (this->hasTrailingSlash ? 0 : 1));
	result.path = KJ_MAP(part, slice) { return kj::str(part); };
	result.hasTrailingSlash = true;
	}

	for (;;) {
	auto part = split(text, END_PATH_PART);
	if (part.size() == 2 && part[0] == '.' && part[1] == '.') {
	if (result.path.size() != 0) {
	result.path.removeLast();
	}
	result.hasTrailingSlash = true;
	} else if (part.size() == 0 \|\| (part.size() == 1 && part[0] == '.')) {
	// Collapse consecutive slashes and "/./".
	result.hasTrailingSlash = true;
	} else {
	result.path.add(percentDecode(part, err, options));
	result.hasTrailingSlash = false;
	}

	if (!text.startsWith("/")) break;
	text = text.slice(1);
	}
	} else if (!hadNewAuthority) {
	// copy path
	result.path = KJ_MAP(part, this->path) { return kj::str(part); };
	result.hasTrailingSlash = this->hasTrailingSlash;
	}

	if (text.startsWith("?")) {
	do {
	text = text.slice(1);
	auto part = split(text, END_QUERY_PART);

	if (part.size() > 0) {
	KJ_IF_MAYBE(key, trySplit(part, '=')) {
	result.query.add(QueryParam { percentDecodeQuery(*key, err, options),
	percentDecodeQuery(part, err, options) });
	} else {
	result.query.add(QueryParam { percentDecodeQuery(part, err, options),
	nullptr });
	}
	}
	} while (text.startsWith("&"));
	} else if (!hadNewAuthority && !hadNewPath) {
	// copy query
	result.query = KJ_MAP(param, this->query) -> QueryParam {
	// Preserve the "allocated-ness" of `param.value` with this careful copy.
	return { kj::str(param.name), param.value.begin() == nullptr ? kj::String()
	: kj::str(param.value) };
	};
	}

	if (text.startsWith("#")) {
	result.fragment = percentDecode(text.slice(1), err, options);
	} else {
	// We should have consumed everything.
	KJ_ASSERT(text.size() == 0);
	}

	if (err) return nullptr;

	return kj::mv(result);
	}

	String Url::toString(Context context) const {
	Vector<char> chars(128);

	if (context != HTTP_REQUEST) {
	chars.addAll(scheme);
	chars.addAll(StringPtr("://"));

	if (context == REMOTE_HREF) {
	KJ_IF_MAYBE(user, userInfo) {
	chars.addAll(options.percentDecode ? encodeUriUserInfo(user->username)
	: kj::str(user->username));
	KJ_IF_MAYBE(pass, user->password) {
	chars.add(':');
	chars.addAll(options.percentDecode ? encodeUriUserInfo(pass) : kj::str(pass));
	}
	chars.add('@');
	}
	}

	// RFC3986 specifies that hosts can contain percent-encoding escapes while suggesting that
	// they should only be used for UTF-8 sequences. However, the DNS standard specifies a
	// different way to encode Unicode into domain names and doesn't permit any characters which
	// would need to be escaped. Meanwhile, encodeUriComponent() here would incorrectly try to
	// escape colons and brackets (e.g. around ipv6 literal addresses). So, instead, we throw if
	// the host is invalid.
	if (HOST_CHARS.containsAll(host)) {
	chars.addAll(host);
	} else {
	KJ_FAIL_REQUIRE("invalid hostname when stringifying URL", host) {
	chars.addAll(StringPtr("invalid-host"));
	break;
	}
	}
	}

	for (auto& pathPart: path) {
	// Protect against path injection.
	KJ_REQUIRE((pathPart != "" \|\| options.allowEmpty) && pathPart != "." && pathPart != "..",
	"invalid name in URL path", path) {
	continue;
	}
	chars.add('/');
	chars.addAll(options.percentDecode ? encodeUriPath(pathPart) : kj::str(pathPart));
	}
	if (hasTrailingSlash \|\| (path.size() == 0 && context == HTTP_REQUEST)) {
	chars.add('/');
	}

	bool first = true;
	for (auto& param: query) {
	chars.add(first ? '?' : '&');
	first = false;
	chars.addAll(options.percentDecode ? encodeWwwForm(param.name) : kj::str(param.name));
	if (param.value.begin() != nullptr) {
	chars.add('=');
	chars.addAll(options.percentDecode ? encodeWwwForm(param.value) : kj::str(param.value));
	}
	}

	if (context == REMOTE_HREF) {
	KJ_IF_MAYBE(f, fragment) {
	chars.add('#');
	chars.addAll(options.percentDecode ? encodeUriFragment(f) : kj::str(f));
	}
	}

	chars.add('\0');
	return String(chars.releaseAsArray());
	}

	} // namespace kj