Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 1 | // Copyright 2001-2010 The RE2 Authors. All Rights Reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Paul Wankadia | 89a5907 | 2016-07-26 21:09:42 +1000 | [diff] [blame] | 5 | #ifndef RE2_STRINGPIECE_H_ |
| 6 | #define RE2_STRINGPIECE_H_ |
| 7 | |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 8 | // A string-like object that points to a sized piece of memory. |
| 9 | // |
| 10 | // Functions or methods may use const StringPiece& parameters to accept either |
| 11 | // a "const char*" or a "string" value that will be implicitly converted to |
| 12 | // a StringPiece. The implicit conversion means that it is often appropriate |
| 13 | // to include this .h file in other files rather than forward-declaring |
| 14 | // StringPiece as would be appropriate for most other Google classes. |
| 15 | // |
| 16 | // Systematic usage of StringPiece is encouraged as it will reduce unnecessary |
| 17 | // conversions from "const char*" to "string" and back again. |
| 18 | // |
| 19 | // |
| 20 | // Arghh! I wish C++ literals were "string". |
| 21 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 22 | #include <stddef.h> |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 23 | #include <string.h> |
Paul Wankadia | 3806a87 | 2015-06-24 15:50:08 +1000 | [diff] [blame] | 24 | #include <algorithm> |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 25 | #include <iosfwd> |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 26 | #include <iterator> |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 27 | #include <string> |
| 28 | |
| 29 | namespace re2 { |
| 30 | |
| 31 | class StringPiece { |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 32 | public: |
Paul Wankadia | c134b8e | 2018-02-08 07:55:51 -0800 | [diff] [blame] | 33 | typedef std::char_traits<char> traits_type; |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 34 | typedef char value_type; |
| 35 | typedef char* pointer; |
| 36 | typedef const char* const_pointer; |
| 37 | typedef char& reference; |
| 38 | typedef const char& const_reference; |
| 39 | typedef const char* const_iterator; |
| 40 | typedef const_iterator iterator; |
| 41 | typedef std::reverse_iterator<const_iterator> const_reverse_iterator; |
| 42 | typedef const_reverse_iterator reverse_iterator; |
| 43 | typedef size_t size_type; |
| 44 | typedef ptrdiff_t difference_type; |
| 45 | static const size_type npos = static_cast<size_type>(-1); |
| 46 | |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 47 | // We provide non-explicit singleton constructors so users can pass |
| 48 | // in a "const char*" or a "string" wherever a "StringPiece" is |
| 49 | // expected. |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 50 | StringPiece() |
| 51 | : data_(NULL), size_(0) {} |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 52 | StringPiece(const std::string& str) |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 53 | : data_(str.data()), size_(str.size()) {} |
| 54 | StringPiece(const char* str) |
| 55 | : data_(str), size_(str == NULL ? 0 : strlen(str)) {} |
| 56 | StringPiece(const char* str, size_type len) |
| 57 | : data_(str), size_(len) {} |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 58 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 59 | const_iterator begin() const { return data_; } |
| 60 | const_iterator end() const { return data_ + size_; } |
| 61 | const_reverse_iterator rbegin() const { |
| 62 | return const_reverse_iterator(data_ + size_); |
| 63 | } |
| 64 | const_reverse_iterator rend() const { |
| 65 | return const_reverse_iterator(data_); |
| 66 | } |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 67 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 68 | size_type size() const { return size_; } |
| 69 | size_type length() const { return size_; } |
| 70 | bool empty() const { return size_ == 0; } |
| 71 | |
| 72 | const_reference operator[](size_type i) const { return data_[i]; } |
| 73 | const_pointer data() const { return data_; } |
| 74 | |
| 75 | void remove_prefix(size_type n) { |
| 76 | data_ += n; |
| 77 | size_ -= n; |
| 78 | } |
| 79 | |
| 80 | void remove_suffix(size_type n) { |
| 81 | size_ -= n; |
| 82 | } |
| 83 | |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 84 | void set(const char* str) { |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 85 | data_ = str; |
| 86 | size_ = str == NULL ? 0 : strlen(str); |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 87 | } |
| 88 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 89 | void set(const char* str, size_type len) { |
| 90 | data_ = str; |
| 91 | size_ = len; |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 92 | } |
| 93 | |
Paul Wankadia | c134b8e | 2018-02-08 07:55:51 -0800 | [diff] [blame] | 94 | // Converts to `std::basic_string`. |
| 95 | template <typename A> |
| 96 | explicit operator std::basic_string<char, traits_type, A>() const { |
| 97 | if (!data_) return {}; |
| 98 | return std::basic_string<char, traits_type, A>(data_, size_); |
| 99 | } |
| 100 | |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 101 | std::string as_string() const { |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 102 | return std::string(data_, size_); |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 103 | } |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 104 | |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 105 | // We also define ToString() here, since many other string-like |
| 106 | // interfaces name the routine that converts to a C++ string |
| 107 | // "ToString", and it's confusing to have the method that does that |
| 108 | // for a StringPiece be called "as_string()". We also leave the |
| 109 | // "as_string()" method defined here for existing code. |
| 110 | std::string ToString() const { |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 111 | return std::string(data_, size_); |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 112 | } |
| 113 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 114 | void CopyToString(std::string* target) const { |
| 115 | target->assign(data_, size_); |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 116 | } |
| 117 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 118 | void AppendToString(std::string* target) const { |
| 119 | target->append(data_, size_); |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 120 | } |
| 121 | |
Paul Wankadia | 7116fe6 | 2015-12-06 15:38:44 +1100 | [diff] [blame] | 122 | size_type copy(char* buf, size_type n, size_type pos = 0) const; |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 123 | StringPiece substr(size_type pos = 0, size_type n = npos) const; |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 124 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 125 | int compare(const StringPiece& x) const { |
Paul Wankadia | 606f78e | 2017-01-12 23:55:21 +1100 | [diff] [blame] | 126 | size_type min_size = std::min(size(), x.size()); |
Paul Wankadia | 20f9edd | 2017-01-13 01:49:37 +1100 | [diff] [blame] | 127 | if (min_size > 0) { |
| 128 | int r = memcmp(data(), x.data(), min_size); |
| 129 | if (r < 0) return -1; |
| 130 | if (r > 0) return 1; |
| 131 | } |
Paul Wankadia | 606f78e | 2017-01-12 23:55:21 +1100 | [diff] [blame] | 132 | if (size() < x.size()) return -1; |
| 133 | if (size() > x.size()) return 1; |
| 134 | return 0; |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 135 | } |
| 136 | |
| 137 | // Does "this" start with "x"? |
| 138 | bool starts_with(const StringPiece& x) const { |
Paul Wankadia | 606f78e | 2017-01-12 23:55:21 +1100 | [diff] [blame] | 139 | return x.empty() || |
| 140 | (size() >= x.size() && memcmp(data(), x.data(), x.size()) == 0); |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 141 | } |
| 142 | |
| 143 | // Does "this" end with "x"? |
| 144 | bool ends_with(const StringPiece& x) const { |
Paul Wankadia | 606f78e | 2017-01-12 23:55:21 +1100 | [diff] [blame] | 145 | return x.empty() || |
| 146 | (size() >= x.size() && |
| 147 | memcmp(data() + (size() - x.size()), x.data(), x.size()) == 0); |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | bool contains(const StringPiece& s) const { |
| 151 | return find(s) != npos; |
| 152 | } |
Russ Cox | 499ef7e | 2014-12-18 12:24:33 -0500 | [diff] [blame] | 153 | |
Paul Wankadia | 7116fe6 | 2015-12-06 15:38:44 +1100 | [diff] [blame] | 154 | size_type find(const StringPiece& s, size_type pos = 0) const; |
| 155 | size_type find(char c, size_type pos = 0) const; |
| 156 | size_type rfind(const StringPiece& s, size_type pos = npos) const; |
| 157 | size_type rfind(char c, size_type pos = npos) const; |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 158 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 159 | private: |
| 160 | const_pointer data_; |
| 161 | size_type size_; |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 162 | }; |
| 163 | |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 164 | inline bool operator==(const StringPiece& x, const StringPiece& y) { |
Paul Wankadia | 606f78e | 2017-01-12 23:55:21 +1100 | [diff] [blame] | 165 | StringPiece::size_type len = x.size(); |
| 166 | if (len != y.size()) return false; |
| 167 | return x.data() == y.data() || len == 0 || |
| 168 | memcmp(x.data(), y.data(), len) == 0; |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 169 | } |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 170 | |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 171 | inline bool operator!=(const StringPiece& x, const StringPiece& y) { |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 172 | return !(x == y); |
| 173 | } |
| 174 | |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 175 | inline bool operator<(const StringPiece& x, const StringPiece& y) { |
Paul Wankadia | 606f78e | 2017-01-12 23:55:21 +1100 | [diff] [blame] | 176 | StringPiece::size_type min_size = std::min(x.size(), y.size()); |
| 177 | int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); |
| 178 | return (r < 0) || (r == 0 && x.size() < y.size()); |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 179 | } |
| 180 | |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 181 | inline bool operator>(const StringPiece& x, const StringPiece& y) { |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 182 | return y < x; |
| 183 | } |
| 184 | |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 185 | inline bool operator<=(const StringPiece& x, const StringPiece& y) { |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 186 | return !(x > y); |
| 187 | } |
| 188 | |
Russ Cox | ff5573b | 2011-05-01 15:26:33 -0400 | [diff] [blame] | 189 | inline bool operator>=(const StringPiece& x, const StringPiece& y) { |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 190 | return !(x < y); |
| 191 | } |
| 192 | |
Paul Wankadia | 070cf98 | 2016-08-22 20:32:08 +1000 | [diff] [blame] | 193 | // Allow StringPiece to be logged. |
Zonr Chang | ea07366 | 2017-01-12 18:13:28 +0800 | [diff] [blame] | 194 | std::ostream& operator<<(std::ostream& o, const StringPiece& p); |
| 195 | |
| 196 | } // namespace re2 |
Russ Cox | 0a38cba | 2010-03-02 17:17:51 -0800 | [diff] [blame] | 197 | |
Paul Wankadia | 89a5907 | 2016-07-26 21:09:42 +1000 | [diff] [blame] | 198 | #endif // RE2_STRINGPIECE_H_ |