blob: 1bcd330e5b57ec9fffb2ef2f0d5121451f6277c3 [file] [log] [blame]
Inna Palantff3f07a2019-07-11 16:15:26 -07001/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
2 *
3 * Additional changes are licensed under the same terms as NGINX and
4 * copyright Joyent, Inc. and other Node contributors. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24#include "http_parser.h"
25#include <assert.h>
26#include <stddef.h>
27#include <ctype.h>
28#include <stdlib.h>
29#include <string.h>
30#include <limits.h>
31
32#ifndef ULLONG_MAX
33# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
34#endif
35
36#ifndef MIN
37# define MIN(a,b) ((a) < (b) ? (a) : (b))
38#endif
39
40#ifndef ARRAY_SIZE
41# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
42#endif
43
44#ifndef BIT_AT
45# define BIT_AT(a, i) \
46 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
47 (1 << ((unsigned int) (i) & 7))))
48#endif
49
50#ifndef ELEM_AT
51# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
52#endif
53
54#define SET_ERRNO(e) \
55do { \
56 parser->http_errno = (e); \
57} while(0)
58
59
60/* Run the notify callback FOR, returning ER if it fails */
61#define CALLBACK_NOTIFY_(FOR, ER) \
62do { \
63 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
64 \
65 if (settings->on_##FOR) { \
66 if (0 != settings->on_##FOR(parser)) { \
67 SET_ERRNO(HPE_CB_##FOR); \
68 } \
69 \
70 /* We either errored above or got paused; get out */ \
71 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
72 return (ER); \
73 } \
74 } \
75} while (0)
76
77/* Run the notify callback FOR and consume the current byte */
78#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
79
80/* Run the notify callback FOR and don't consume the current byte */
81#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
82
83/* Run data callback FOR with LEN bytes, returning ER if it fails */
84#define CALLBACK_DATA_(FOR, LEN, ER) \
85do { \
86 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
87 \
88 if (FOR##_mark) { \
89 if (settings->on_##FOR) { \
90 if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \
91 SET_ERRNO(HPE_CB_##FOR); \
92 } \
93 \
94 /* We either errored above or got paused; get out */ \
95 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \
96 return (ER); \
97 } \
98 } \
99 FOR##_mark = NULL; \
100 } \
101} while (0)
102
103/* Run the data callback FOR and consume the current byte */
104#define CALLBACK_DATA(FOR) \
105 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
106
107/* Run the data callback FOR and don't consume the current byte */
108#define CALLBACK_DATA_NOADVANCE(FOR) \
109 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
110
111/* Set the mark FOR; non-destructive if mark is already set */
112#define MARK(FOR) \
113do { \
114 if (!FOR##_mark) { \
115 FOR##_mark = p; \
116 } \
117} while (0)
118
119
120#define PROXY_CONNECTION "proxy-connection"
121#define CONNECTION "connection"
122#define CONTENT_LENGTH "content-length"
123#define TRANSFER_ENCODING "transfer-encoding"
124#define UPGRADE "upgrade"
125#define CHUNKED "chunked"
126#define KEEP_ALIVE "keep-alive"
127#define CLOSE "close"
128
129
130static const char *method_strings[] =
131 {
132#define XX(num, name, string) #string,
133 HTTP_METHOD_MAP(XX)
134#undef XX
135 };
136
137
138/* Tokens as defined by rfc 2616. Also lowercases them.
139 * token = 1*<any CHAR except CTLs or separators>
140 * separators = "(" | ")" | "<" | ">" | "@"
141 * | "," | ";" | ":" | "\" | <">
142 * | "/" | "[" | "]" | "?" | "="
143 * | "{" | "}" | SP | HT
144 */
145static const char tokens[256] = {
146/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
147 0, 0, 0, 0, 0, 0, 0, 0,
148/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
149 0, 0, 0, 0, 0, 0, 0, 0,
150/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
151 0, 0, 0, 0, 0, 0, 0, 0,
152/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
153 0, 0, 0, 0, 0, 0, 0, 0,
154/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
155 0, '!', 0, '#', '$', '%', '&', '\'',
156/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
157 0, 0, '*', '+', 0, '-', '.', 0,
158/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
159 '0', '1', '2', '3', '4', '5', '6', '7',
160/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
161 '8', '9', 0, 0, 0, 0, 0, 0,
162/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
163 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
164/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
165 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
166/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
167 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
168/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
169 'x', 'y', 'z', 0, 0, 0, '^', '_',
170/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
171 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
172/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
173 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
174/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
175 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
176/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
177 'x', 'y', 'z', 0, '|', 0, '~', 0 };
178
179
180static const int8_t unhex[256] =
181 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
182 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
183 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
184 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
185 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
186 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
187 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
188 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
189 };
190
191
192#if HTTP_PARSER_STRICT
193# define T(v) 0
194#else
195# define T(v) v
196#endif
197
198
199static const uint8_t normal_url_char[32] = {
200/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
201 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
202/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
203 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
204/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
205 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
206/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
207 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
208/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
209 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
210/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
211 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
212/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
213 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
214/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
215 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
216/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
217 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
218/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
219 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
220/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
221 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
222/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
223 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
224/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
225 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
226/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
227 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
228/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
229 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
230/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
231 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
232
233#undef T
234
235enum state
236 { s_dead = 1 /* important that this is > 0 */
237
238 , s_start_req_or_res
239 , s_res_or_resp_H
240 , s_start_res
241 , s_res_H
242 , s_res_HT
243 , s_res_HTT
244 , s_res_HTTP
245 , s_res_first_http_major
246 , s_res_http_major
247 , s_res_first_http_minor
248 , s_res_http_minor
249 , s_res_first_status_code
250 , s_res_status_code
251 , s_res_status
252 , s_res_line_almost_done
253
254 , s_start_req
255
256 , s_req_method
257 , s_req_spaces_before_url
258 , s_req_schema
259 , s_req_schema_slash
260 , s_req_schema_slash_slash
261 , s_req_server_start
262 , s_req_server
263 , s_req_server_with_at
264 , s_req_path
265 , s_req_query_string_start
266 , s_req_query_string
267 , s_req_fragment_start
268 , s_req_fragment
269 , s_req_http_start
270 , s_req_http_H
271 , s_req_http_HT
272 , s_req_http_HTT
273 , s_req_http_HTTP
274 , s_req_first_http_major
275 , s_req_http_major
276 , s_req_first_http_minor
277 , s_req_http_minor
278 , s_req_line_almost_done
279
280 , s_header_field_start
281 , s_header_field
282 , s_header_value_start
283 , s_header_value
284 , s_header_value_lws
285
286 , s_header_almost_done
287
288 , s_chunk_size_start
289 , s_chunk_size
290 , s_chunk_parameters
291 , s_chunk_size_almost_done
292
293 , s_headers_almost_done
294 , s_headers_done
295
296 /* Important: 's_headers_done' must be the last 'header' state. All
297 * states beyond this must be 'body' states. It is used for overflow
298 * checking. See the PARSING_HEADER() macro.
299 */
300
301 , s_chunk_data
302 , s_chunk_data_almost_done
303 , s_chunk_data_done
304
305 , s_body_identity
306 , s_body_identity_eof
307
308 , s_message_done
309 };
310
311
312#define PARSING_HEADER(state) (state <= s_headers_done)
313
314
315enum header_states
316 { h_general = 0
317 , h_C
318 , h_CO
319 , h_CON
320
321 , h_matching_connection
322 , h_matching_proxy_connection
323 , h_matching_content_length
324 , h_matching_transfer_encoding
325 , h_matching_upgrade
326
327 , h_connection
328 , h_content_length
329 , h_transfer_encoding
330 , h_upgrade
331
332 , h_matching_transfer_encoding_chunked
333 , h_matching_connection_keep_alive
334 , h_matching_connection_close
335
336 , h_transfer_encoding_chunked
337 , h_connection_keep_alive
338 , h_connection_close
339 };
340
341enum http_host_state
342 {
343 s_http_host_dead = 1
344 , s_http_userinfo_start
345 , s_http_userinfo
346 , s_http_host_start
347 , s_http_host_v6_start
348 , s_http_host
349 , s_http_host_v6
350 , s_http_host_v6_end
351 , s_http_host_port_start
352 , s_http_host_port
353};
354
355/* Macros for character classes; depends on strict-mode */
356#define CR '\r'
357#define LF '\n'
358#define LOWER(c) (unsigned char)(c | 0x20)
359#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
360#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
361#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
362#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
363#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
364 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
365 (c) == ')')
366#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
367 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
368 (c) == '$' || (c) == ',')
369
370#if HTTP_PARSER_STRICT
371#define TOKEN(c) (tokens[(unsigned char)c])
372#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
373#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
374#else
375#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
376#define IS_URL_CHAR(c) \
377 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
378#define IS_HOST_CHAR(c) \
379 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
380#endif
381
382
383#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
384
385
386#if HTTP_PARSER_STRICT
387# define STRICT_CHECK(cond) \
388do { \
389 if (cond) { \
390 SET_ERRNO(HPE_STRICT); \
391 goto error; \
392 } \
393} while (0)
394# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
395#else
396# define STRICT_CHECK(cond)
397# define NEW_MESSAGE() start_state
398#endif
399
400
401/* Map errno values to strings for human-readable output */
402#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
403static struct {
404 const char *name;
405 const char *description;
406} http_strerror_tab[] = {
407 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
408};
409#undef HTTP_STRERROR_GEN
410
411int http_message_needs_eof(const http_parser *parser);
412
413/* Our URL parser.
414 *
415 * This is designed to be shared by http_parser_execute() for URL validation,
416 * hence it has a state transition + byte-for-byte interface. In addition, it
417 * is meant to be embedded in http_parser_parse_url(), which does the dirty
418 * work of turning state transitions URL components for its API.
419 *
420 * This function should only be invoked with non-space characters. It is
421 * assumed that the caller cares about (and can detect) the transition between
422 * URL and non-URL states by looking for these.
423 */
424static enum state
425parse_url_char(enum state s, const char ch)
426{
427 if (ch == ' ' || ch == '\r' || ch == '\n') {
428 return s_dead;
429 }
430
431#if HTTP_PARSER_STRICT
432 if (ch == '\t' || ch == '\f') {
433 return s_dead;
434 }
435#endif
436
437 switch (s) {
438 case s_req_spaces_before_url:
439 /* Proxied requests are followed by scheme of an absolute URI (alpha).
440 * All methods except CONNECT are followed by '/' or '*'.
441 */
442
443 if (ch == '/' || ch == '*') {
444 return s_req_path;
445 }
446
447 /* The schema must start with an alpha character. After that, it may
448 * consist of digits, '+', '-' or '.', followed by a ':'.
449 */
450 if (IS_ALPHA(ch)) {
451 return s_req_schema;
452 }
453
454 break;
455
456 case s_req_schema:
457 if (IS_ALPHANUM(ch) || ch == '+' || ch == '-' || ch == '.') {
458 return s;
459 }
460
461 if (ch == ':') {
462 return s_req_schema_slash;
463 }
464
465 break;
466
467 case s_req_schema_slash:
468 if (ch == '/') {
469 return s_req_schema_slash_slash;
470 }
471
472 break;
473
474 case s_req_schema_slash_slash:
475 if (ch == '/') {
476 return s_req_server_start;
477 }
478
479 break;
480
481 case s_req_server_with_at:
482 if (ch == '@') {
483 return s_dead;
484 }
485
486 /* FALLTHROUGH */
487 case s_req_server_start:
488 case s_req_server:
489 if (ch == '/') {
490 return s_req_path;
491 }
492
493 if (ch == '?') {
494 return s_req_query_string_start;
495 }
496
497 if (ch == '@') {
498 return s_req_server_with_at;
499 }
500
501 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
502 return s_req_server;
503 }
504
505 break;
506
507 case s_req_path:
508 if (IS_URL_CHAR(ch)) {
509 return s;
510 }
511
512 switch (ch) {
513 case '?':
514 return s_req_query_string_start;
515
516 case '#':
517 return s_req_fragment_start;
518 }
519
520 break;
521
522 case s_req_query_string_start:
523 case s_req_query_string:
524 if (IS_URL_CHAR(ch)) {
525 return s_req_query_string;
526 }
527
528 switch (ch) {
529 case '?':
530 /* allow extra '?' in query string */
531 return s_req_query_string;
532
533 case '#':
534 return s_req_fragment_start;
535 }
536
537 break;
538
539 case s_req_fragment_start:
540 if (IS_URL_CHAR(ch)) {
541 return s_req_fragment;
542 }
543
544 switch (ch) {
545 case '?':
546 return s_req_fragment;
547
548 case '#':
549 return s;
550 }
551
552 break;
553
554 case s_req_fragment:
555 if (IS_URL_CHAR(ch)) {
556 return s;
557 }
558
559 switch (ch) {
560 case '?':
561 case '#':
562 return s;
563 }
564
565 break;
566
567 default:
568 break;
569 }
570
571 /* We should never fall out of the switch above unless there's an error */
572 return s_dead;
573}
574
575size_t http_parser_execute (http_parser *parser,
576 const http_parser_settings *settings,
577 const char *data,
578 size_t len)
579{
580 char c, ch;
581 int8_t unhex_val;
582 const char *p = data;
583 const char *header_field_mark = 0;
584 const char *header_value_mark = 0;
585 const char *url_mark = 0;
586 const char *body_mark = 0;
587
588 /* We're in an error state. Don't bother doing anything. */
589 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
590 return 0;
591 }
592
593 if (len == 0) {
594 switch (parser->state) {
595 case s_body_identity_eof:
596 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
597 * we got paused.
598 */
599 CALLBACK_NOTIFY_NOADVANCE(message_complete);
600 return 0;
601
602 case s_dead:
603 case s_start_req_or_res:
604 case s_start_res:
605 case s_start_req:
606 return 0;
607
608 default:
609 SET_ERRNO(HPE_INVALID_EOF_STATE);
610 return 1;
611 }
612 }
613
614
615 if (parser->state == s_header_field)
616 header_field_mark = data;
617 if (parser->state == s_header_value)
618 header_value_mark = data;
619 switch (parser->state) {
620 case s_req_path:
621 case s_req_schema:
622 case s_req_schema_slash:
623 case s_req_schema_slash_slash:
624 case s_req_server_start:
625 case s_req_server:
626 case s_req_server_with_at:
627 case s_req_query_string_start:
628 case s_req_query_string:
629 case s_req_fragment_start:
630 case s_req_fragment:
631 url_mark = data;
632 break;
633 }
634
635 for (p=data; p != data + len; p++) {
636 ch = *p;
637
638 if (PARSING_HEADER(parser->state)) {
639 ++parser->nread;
640 /* Buffer overflow attack */
641 if (parser->nread > HTTP_MAX_HEADER_SIZE) {
642 SET_ERRNO(HPE_HEADER_OVERFLOW);
643 goto error;
644 }
645 }
646
647 reexecute_byte:
648 switch (parser->state) {
649
650 case s_dead:
651 /* this state is used after a 'Connection: close' message
652 * the parser will error out if it reads another message
653 */
654 if (ch == CR || ch == LF)
655 break;
656
657 SET_ERRNO(HPE_CLOSED_CONNECTION);
658 goto error;
659
660 case s_start_req_or_res:
661 {
662 if (ch == CR || ch == LF)
663 break;
664 parser->flags = 0;
665 parser->content_length = ULLONG_MAX;
666
667 if (ch == 'H') {
668 parser->state = s_res_or_resp_H;
669
670 CALLBACK_NOTIFY(message_begin);
671 } else {
672 parser->type = HTTP_REQUEST;
673 parser->state = s_start_req;
674 goto reexecute_byte;
675 }
676
677 break;
678 }
679
680 case s_res_or_resp_H:
681 if (ch == 'T') {
682 parser->type = HTTP_RESPONSE;
683 parser->state = s_res_HT;
684 } else {
685 if (ch != 'E') {
686 SET_ERRNO(HPE_INVALID_CONSTANT);
687 goto error;
688 }
689
690 parser->type = HTTP_REQUEST;
691 parser->method = HTTP_HEAD;
692 parser->index = 2;
693 parser->state = s_req_method;
694 }
695 break;
696
697 case s_start_res:
698 {
699 parser->flags = 0;
700 parser->content_length = ULLONG_MAX;
701
702 switch (ch) {
703 case 'H':
704 parser->state = s_res_H;
705 break;
706
707 case CR:
708 case LF:
709 break;
710
711 default:
712 SET_ERRNO(HPE_INVALID_CONSTANT);
713 goto error;
714 }
715
716 CALLBACK_NOTIFY(message_begin);
717 break;
718 }
719
720 case s_res_H:
721 STRICT_CHECK(ch != 'T');
722 parser->state = s_res_HT;
723 break;
724
725 case s_res_HT:
726 STRICT_CHECK(ch != 'T');
727 parser->state = s_res_HTT;
728 break;
729
730 case s_res_HTT:
731 STRICT_CHECK(ch != 'P');
732 parser->state = s_res_HTTP;
733 break;
734
735 case s_res_HTTP:
736 STRICT_CHECK(ch != '/');
737 parser->state = s_res_first_http_major;
738 break;
739
740 case s_res_first_http_major:
741 if (ch < '0' || ch > '9') {
742 SET_ERRNO(HPE_INVALID_VERSION);
743 goto error;
744 }
745
746 parser->http_major = ch - '0';
747 parser->state = s_res_http_major;
748 break;
749
750 /* major HTTP version or dot */
751 case s_res_http_major:
752 {
753 if (ch == '.') {
754 parser->state = s_res_first_http_minor;
755 break;
756 }
757
758 if (!IS_NUM(ch)) {
759 SET_ERRNO(HPE_INVALID_VERSION);
760 goto error;
761 }
762
763 parser->http_major *= 10;
764 parser->http_major += ch - '0';
765
766 if (parser->http_major > 999) {
767 SET_ERRNO(HPE_INVALID_VERSION);
768 goto error;
769 }
770
771 break;
772 }
773
774 /* first digit of minor HTTP version */
775 case s_res_first_http_minor:
776 if (!IS_NUM(ch)) {
777 SET_ERRNO(HPE_INVALID_VERSION);
778 goto error;
779 }
780
781 parser->http_minor = ch - '0';
782 parser->state = s_res_http_minor;
783 break;
784
785 /* minor HTTP version or end of request line */
786 case s_res_http_minor:
787 {
788 if (ch == ' ') {
789 parser->state = s_res_first_status_code;
790 break;
791 }
792
793 if (!IS_NUM(ch)) {
794 SET_ERRNO(HPE_INVALID_VERSION);
795 goto error;
796 }
797
798 parser->http_minor *= 10;
799 parser->http_minor += ch - '0';
800
801 if (parser->http_minor > 999) {
802 SET_ERRNO(HPE_INVALID_VERSION);
803 goto error;
804 }
805
806 break;
807 }
808
809 case s_res_first_status_code:
810 {
811 if (!IS_NUM(ch)) {
812 if (ch == ' ') {
813 break;
814 }
815
816 SET_ERRNO(HPE_INVALID_STATUS);
817 goto error;
818 }
819 parser->status_code = ch - '0';
820 parser->state = s_res_status_code;
821 break;
822 }
823
824 case s_res_status_code:
825 {
826 if (!IS_NUM(ch)) {
827 switch (ch) {
828 case ' ':
829 parser->state = s_res_status;
830 break;
831 case CR:
832 parser->state = s_res_line_almost_done;
833 break;
834 case LF:
835 parser->state = s_header_field_start;
836 break;
837 default:
838 SET_ERRNO(HPE_INVALID_STATUS);
839 goto error;
840 }
841 break;
842 }
843
844 parser->status_code *= 10;
845 parser->status_code += ch - '0';
846
847 if (parser->status_code > 999) {
848 SET_ERRNO(HPE_INVALID_STATUS);
849 goto error;
850 }
851
852 break;
853 }
854
855 case s_res_status:
856 /* the human readable status. e.g. "NOT FOUND"
857 * we are not humans so just ignore this */
858 if (ch == CR) {
859 parser->state = s_res_line_almost_done;
860 break;
861 }
862
863 if (ch == LF) {
864 parser->state = s_header_field_start;
865 break;
866 }
867 break;
868
869 case s_res_line_almost_done:
870 STRICT_CHECK(ch != LF);
871 parser->state = s_header_field_start;
872 break;
873
874 case s_start_req:
875 {
876 if (ch == CR || ch == LF)
877 break;
878 parser->flags = 0;
879 parser->content_length = ULLONG_MAX;
880
881 if (!IS_ALPHA(ch)) {
882 SET_ERRNO(HPE_INVALID_METHOD);
883 goto error;
884 }
885
886 parser->method = (enum http_method) 0;
887 parser->index = 1;
888 switch (ch) {
889 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
890 case 'D': parser->method = HTTP_DELETE; break;
891 case 'G': parser->method = HTTP_GET; break;
892 case 'H': parser->method = HTTP_HEAD; break;
893 case 'L': parser->method = HTTP_LOCK; break;
894 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break;
895 case 'N': parser->method = HTTP_NOTIFY; break;
896 case 'O': parser->method = HTTP_OPTIONS; break;
897 case 'P': parser->method = HTTP_POST;
898 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
899 break;
900 case 'R': parser->method = HTTP_REPORT; break;
901 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
902 case 'T': parser->method = HTTP_TRACE; break;
903 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break;
904 default:
905 SET_ERRNO(HPE_INVALID_METHOD);
906 goto error;
907 }
908 parser->state = s_req_method;
909
910 CALLBACK_NOTIFY(message_begin);
911
912 break;
913 }
914
915 case s_req_method:
916 {
917 const char *matcher;
918 if (ch == '\0') {
919 SET_ERRNO(HPE_INVALID_METHOD);
920 goto error;
921 }
922
923 matcher = method_strings[parser->method];
924 if (ch == ' ' && matcher[parser->index] == '\0') {
925 parser->state = s_req_spaces_before_url;
926 } else if (ch == matcher[parser->index]) {
927 ; /* nada */
928 } else if (parser->method == HTTP_CONNECT) {
929 if (parser->index == 1 && ch == 'H') {
930 parser->method = HTTP_CHECKOUT;
931 } else if (parser->index == 2 && ch == 'P') {
932 parser->method = HTTP_COPY;
933 } else {
934 goto error;
935 }
936 } else if (parser->method == HTTP_MKCOL) {
937 if (parser->index == 1 && ch == 'O') {
938 parser->method = HTTP_MOVE;
939 } else if (parser->index == 1 && ch == 'E') {
940 parser->method = HTTP_MERGE;
941 } else if (parser->index == 1 && ch == '-') {
942 parser->method = HTTP_MSEARCH;
943 } else if (parser->index == 2 && ch == 'A') {
944 parser->method = HTTP_MKACTIVITY;
945 } else {
946 goto error;
947 }
948 } else if (parser->method == HTTP_SUBSCRIBE) {
949 if (parser->index == 1 && ch == 'E') {
950 parser->method = HTTP_SEARCH;
951 } else {
952 goto error;
953 }
954 } else if (parser->index == 1 && parser->method == HTTP_POST) {
955 if (ch == 'R') {
956 parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
957 } else if (ch == 'U') {
958 parser->method = HTTP_PUT; /* or HTTP_PURGE */
959 } else if (ch == 'A') {
960 parser->method = HTTP_PATCH;
961 } else {
962 goto error;
963 }
964 } else if (parser->index == 2) {
965 if (parser->method == HTTP_PUT) {
966 if (ch == 'R') parser->method = HTTP_PURGE;
967 } else if (parser->method == HTTP_UNLOCK) {
968 if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE;
969 }
970 } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
971 parser->method = HTTP_PROPPATCH;
972 } else {
973 SET_ERRNO(HPE_INVALID_METHOD);
974 goto error;
975 }
976
977 ++parser->index;
978 break;
979 }
980
981 case s_req_spaces_before_url:
982 {
983 if (ch == ' ') break;
984
985 MARK(url);
986 if (parser->method == HTTP_CONNECT) {
987 parser->state = s_req_server_start;
988 }
989
990 parser->state = parse_url_char((enum state)parser->state, ch);
991 if (parser->state == s_dead) {
992 SET_ERRNO(HPE_INVALID_URL);
993 goto error;
994 }
995
996 break;
997 }
998
999 case s_req_schema:
1000 case s_req_schema_slash:
1001 case s_req_schema_slash_slash:
1002 case s_req_server_start:
1003 {
1004 switch (ch) {
1005 /* No whitespace allowed here */
1006 case ' ':
1007 case CR:
1008 case LF:
1009 SET_ERRNO(HPE_INVALID_URL);
1010 goto error;
1011 default:
1012 parser->state = parse_url_char((enum state)parser->state, ch);
1013 if (parser->state == s_dead) {
1014 SET_ERRNO(HPE_INVALID_URL);
1015 goto error;
1016 }
1017 }
1018
1019 break;
1020 }
1021
1022 case s_req_server:
1023 case s_req_server_with_at:
1024 case s_req_path:
1025 case s_req_query_string_start:
1026 case s_req_query_string:
1027 case s_req_fragment_start:
1028 case s_req_fragment:
1029 {
1030 switch (ch) {
1031 case ' ':
1032 parser->state = s_req_http_start;
1033 CALLBACK_DATA(url);
1034 break;
1035 case CR:
1036 case LF:
1037 parser->http_major = 0;
1038 parser->http_minor = 9;
1039 parser->state = (ch == CR) ?
1040 s_req_line_almost_done :
1041 s_header_field_start;
1042 CALLBACK_DATA(url);
1043 break;
1044 default:
1045 parser->state = parse_url_char((enum state)parser->state, ch);
1046 if (parser->state == s_dead) {
1047 SET_ERRNO(HPE_INVALID_URL);
1048 goto error;
1049 }
1050 }
1051 break;
1052 }
1053
1054 case s_req_http_start:
1055 switch (ch) {
1056 case 'H':
1057 parser->state = s_req_http_H;
1058 break;
1059 case ' ':
1060 break;
1061 default:
1062 SET_ERRNO(HPE_INVALID_CONSTANT);
1063 goto error;
1064 }
1065 break;
1066
1067 case s_req_http_H:
1068 STRICT_CHECK(ch != 'T');
1069 parser->state = s_req_http_HT;
1070 break;
1071
1072 case s_req_http_HT:
1073 STRICT_CHECK(ch != 'T');
1074 parser->state = s_req_http_HTT;
1075 break;
1076
1077 case s_req_http_HTT:
1078 STRICT_CHECK(ch != 'P');
1079 parser->state = s_req_http_HTTP;
1080 break;
1081
1082 case s_req_http_HTTP:
1083 STRICT_CHECK(ch != '/');
1084 parser->state = s_req_first_http_major;
1085 break;
1086
1087 /* first digit of major HTTP version */
1088 case s_req_first_http_major:
1089 if (ch < '1' || ch > '9') {
1090 SET_ERRNO(HPE_INVALID_VERSION);
1091 goto error;
1092 }
1093
1094 parser->http_major = ch - '0';
1095 parser->state = s_req_http_major;
1096 break;
1097
1098 /* major HTTP version or dot */
1099 case s_req_http_major:
1100 {
1101 if (ch == '.') {
1102 parser->state = s_req_first_http_minor;
1103 break;
1104 }
1105
1106 if (!IS_NUM(ch)) {
1107 SET_ERRNO(HPE_INVALID_VERSION);
1108 goto error;
1109 }
1110
1111 parser->http_major *= 10;
1112 parser->http_major += ch - '0';
1113
1114 if (parser->http_major > 999) {
1115 SET_ERRNO(HPE_INVALID_VERSION);
1116 goto error;
1117 }
1118
1119 break;
1120 }
1121
1122 /* first digit of minor HTTP version */
1123 case s_req_first_http_minor:
1124 if (!IS_NUM(ch)) {
1125 SET_ERRNO(HPE_INVALID_VERSION);
1126 goto error;
1127 }
1128
1129 parser->http_minor = ch - '0';
1130 parser->state = s_req_http_minor;
1131 break;
1132
1133 /* minor HTTP version or end of request line */
1134 case s_req_http_minor:
1135 {
1136 if (ch == CR) {
1137 parser->state = s_req_line_almost_done;
1138 break;
1139 }
1140
1141 if (ch == LF) {
1142 parser->state = s_header_field_start;
1143 break;
1144 }
1145
1146 /* XXX allow spaces after digit? */
1147
1148 if (!IS_NUM(ch)) {
1149 SET_ERRNO(HPE_INVALID_VERSION);
1150 goto error;
1151 }
1152
1153 parser->http_minor *= 10;
1154 parser->http_minor += ch - '0';
1155
1156 if (parser->http_minor > 999) {
1157 SET_ERRNO(HPE_INVALID_VERSION);
1158 goto error;
1159 }
1160
1161 break;
1162 }
1163
1164 /* end of request line */
1165 case s_req_line_almost_done:
1166 {
1167 if (ch != LF) {
1168 SET_ERRNO(HPE_LF_EXPECTED);
1169 goto error;
1170 }
1171
1172 parser->state = s_header_field_start;
1173 break;
1174 }
1175
1176 case s_header_field_start:
1177 {
1178 if (ch == CR) {
1179 parser->state = s_headers_almost_done;
1180 break;
1181 }
1182
1183 if (ch == LF) {
1184 /* they might be just sending \n instead of \r\n so this would be
1185 * the second \n to denote the end of headers*/
1186 parser->state = s_headers_almost_done;
1187 goto reexecute_byte;
1188 }
1189
1190 c = TOKEN(ch);
1191
1192 if (!c) {
1193 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1194 goto error;
1195 }
1196
1197 MARK(header_field);
1198
1199 parser->index = 0;
1200 parser->state = s_header_field;
1201
1202 switch (c) {
1203 case 'c':
1204 parser->header_state = h_C;
1205 break;
1206
1207 case 'p':
1208 parser->header_state = h_matching_proxy_connection;
1209 break;
1210
1211 case 't':
1212 parser->header_state = h_matching_transfer_encoding;
1213 break;
1214
1215 case 'u':
1216 parser->header_state = h_matching_upgrade;
1217 break;
1218
1219 default:
1220 parser->header_state = h_general;
1221 break;
1222 }
1223 break;
1224 }
1225
1226 case s_header_field:
1227 {
1228 c = TOKEN(ch);
1229
1230 if (c) {
1231 switch (parser->header_state) {
1232 case h_general:
1233 break;
1234
1235 case h_C:
1236 parser->index++;
1237 parser->header_state = (c == 'o' ? h_CO : h_general);
1238 break;
1239
1240 case h_CO:
1241 parser->index++;
1242 parser->header_state = (c == 'n' ? h_CON : h_general);
1243 break;
1244
1245 case h_CON:
1246 parser->index++;
1247 switch (c) {
1248 case 'n':
1249 parser->header_state = h_matching_connection;
1250 break;
1251 case 't':
1252 parser->header_state = h_matching_content_length;
1253 break;
1254 default:
1255 parser->header_state = h_general;
1256 break;
1257 }
1258 break;
1259
1260 /* connection */
1261
1262 case h_matching_connection:
1263 parser->index++;
1264 if (parser->index > sizeof(CONNECTION)-1
1265 || c != CONNECTION[parser->index]) {
1266 parser->header_state = h_general;
1267 } else if (parser->index == sizeof(CONNECTION)-2) {
1268 parser->header_state = h_connection;
1269 }
1270 break;
1271
1272 /* proxy-connection */
1273
1274 case h_matching_proxy_connection:
1275 parser->index++;
1276 if (parser->index > sizeof(PROXY_CONNECTION)-1
1277 || c != PROXY_CONNECTION[parser->index]) {
1278 parser->header_state = h_general;
1279 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
1280 parser->header_state = h_connection;
1281 }
1282 break;
1283
1284 /* content-length */
1285
1286 case h_matching_content_length:
1287 parser->index++;
1288 if (parser->index > sizeof(CONTENT_LENGTH)-1
1289 || c != CONTENT_LENGTH[parser->index]) {
1290 parser->header_state = h_general;
1291 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
1292 parser->header_state = h_content_length;
1293 }
1294 break;
1295
1296 /* transfer-encoding */
1297
1298 case h_matching_transfer_encoding:
1299 parser->index++;
1300 if (parser->index > sizeof(TRANSFER_ENCODING)-1
1301 || c != TRANSFER_ENCODING[parser->index]) {
1302 parser->header_state = h_general;
1303 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
1304 parser->header_state = h_transfer_encoding;
1305 }
1306 break;
1307
1308 /* upgrade */
1309
1310 case h_matching_upgrade:
1311 parser->index++;
1312 if (parser->index > sizeof(UPGRADE)-1
1313 || c != UPGRADE[parser->index]) {
1314 parser->header_state = h_general;
1315 } else if (parser->index == sizeof(UPGRADE)-2) {
1316 parser->header_state = h_upgrade;
1317 }
1318 break;
1319
1320 case h_connection:
1321 case h_content_length:
1322 case h_transfer_encoding:
1323 case h_upgrade:
1324 if (ch != ' ') parser->header_state = h_general;
1325 break;
1326
1327 default:
1328 assert(0 && "Unknown header_state");
1329 break;
1330 }
1331 break;
1332 }
1333
1334 if (ch == ':') {
1335 parser->state = s_header_value_start;
1336 CALLBACK_DATA(header_field);
1337 break;
1338 }
1339
1340 if (ch == CR) {
1341 parser->state = s_header_almost_done;
1342 CALLBACK_DATA(header_field);
1343 break;
1344 }
1345
1346 if (ch == LF) {
1347 parser->state = s_header_field_start;
1348 CALLBACK_DATA(header_field);
1349 break;
1350 }
1351
1352 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
1353 goto error;
1354 }
1355
1356 case s_header_value_start:
1357 {
1358 if (ch == ' ' || ch == '\t') break;
1359
1360 MARK(header_value);
1361
1362 parser->state = s_header_value;
1363 parser->index = 0;
1364
1365 if (ch == CR) {
1366 parser->header_state = h_general;
1367 parser->state = s_header_almost_done;
1368 CALLBACK_DATA(header_value);
1369 break;
1370 }
1371
1372 if (ch == LF) {
1373 parser->state = s_header_field_start;
1374 CALLBACK_DATA(header_value);
1375 break;
1376 }
1377
1378 c = LOWER(ch);
1379
1380 switch (parser->header_state) {
1381 case h_upgrade:
1382 parser->flags |= F_UPGRADE;
1383 parser->header_state = h_general;
1384 break;
1385
1386 case h_transfer_encoding:
1387 /* looking for 'Transfer-Encoding: chunked' */
1388 if ('c' == c) {
1389 parser->header_state = h_matching_transfer_encoding_chunked;
1390 } else {
1391 parser->header_state = h_general;
1392 }
1393 break;
1394
1395 case h_content_length:
1396 if (!IS_NUM(ch)) {
1397 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1398 goto error;
1399 }
1400
1401 parser->content_length = ch - '0';
1402 break;
1403
1404 case h_connection:
1405 /* looking for 'Connection: keep-alive' */
1406 if (c == 'k') {
1407 parser->header_state = h_matching_connection_keep_alive;
1408 /* looking for 'Connection: close' */
1409 } else if (c == 'c') {
1410 parser->header_state = h_matching_connection_close;
1411 } else {
1412 parser->header_state = h_general;
1413 }
1414 break;
1415
1416 default:
1417 parser->header_state = h_general;
1418 break;
1419 }
1420 break;
1421 }
1422
1423 case s_header_value:
1424 {
1425
1426 if (ch == CR) {
1427 parser->state = s_header_almost_done;
1428 CALLBACK_DATA(header_value);
1429 break;
1430 }
1431
1432 if (ch == LF) {
1433 parser->state = s_header_almost_done;
1434 CALLBACK_DATA_NOADVANCE(header_value);
1435 goto reexecute_byte;
1436 }
1437
1438 c = LOWER(ch);
1439
1440 switch (parser->header_state) {
1441 case h_general:
1442 break;
1443
1444 case h_connection:
1445 case h_transfer_encoding:
1446 assert(0 && "Shouldn't get here.");
1447 break;
1448
1449 case h_content_length:
1450 {
1451 uint64_t t;
1452
1453 if (ch == ' ') break;
1454
1455 if (!IS_NUM(ch)) {
1456 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1457 goto error;
1458 }
1459
1460 t = parser->content_length;
1461 t *= 10;
1462 t += ch - '0';
1463
1464 /* Overflow? */
1465 if (t < parser->content_length || t == ULLONG_MAX) {
1466 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1467 goto error;
1468 }
1469
1470 parser->content_length = t;
1471 break;
1472 }
1473
1474 /* Transfer-Encoding: chunked */
1475 case h_matching_transfer_encoding_chunked:
1476 parser->index++;
1477 if (parser->index > sizeof(CHUNKED)-1
1478 || c != CHUNKED[parser->index]) {
1479 parser->header_state = h_general;
1480 } else if (parser->index == sizeof(CHUNKED)-2) {
1481 parser->header_state = h_transfer_encoding_chunked;
1482 }
1483 break;
1484
1485 /* looking for 'Connection: keep-alive' */
1486 case h_matching_connection_keep_alive:
1487 parser->index++;
1488 if (parser->index > sizeof(KEEP_ALIVE)-1
1489 || c != KEEP_ALIVE[parser->index]) {
1490 parser->header_state = h_general;
1491 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
1492 parser->header_state = h_connection_keep_alive;
1493 }
1494 break;
1495
1496 /* looking for 'Connection: close' */
1497 case h_matching_connection_close:
1498 parser->index++;
1499 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
1500 parser->header_state = h_general;
1501 } else if (parser->index == sizeof(CLOSE)-2) {
1502 parser->header_state = h_connection_close;
1503 }
1504 break;
1505
1506 case h_transfer_encoding_chunked:
1507 case h_connection_keep_alive:
1508 case h_connection_close:
1509 if (ch != ' ') parser->header_state = h_general;
1510 break;
1511
1512 default:
1513 parser->state = s_header_value;
1514 parser->header_state = h_general;
1515 break;
1516 }
1517 break;
1518 }
1519
1520 case s_header_almost_done:
1521 {
1522 STRICT_CHECK(ch != LF);
1523
1524 parser->state = s_header_value_lws;
1525
1526 switch (parser->header_state) {
1527 case h_connection_keep_alive:
1528 parser->flags |= F_CONNECTION_KEEP_ALIVE;
1529 break;
1530 case h_connection_close:
1531 parser->flags |= F_CONNECTION_CLOSE;
1532 break;
1533 case h_transfer_encoding_chunked:
1534 parser->flags |= F_CHUNKED;
1535 break;
1536 default:
1537 break;
1538 }
1539
1540 break;
1541 }
1542
1543 case s_header_value_lws:
1544 {
1545 if (ch == ' ' || ch == '\t')
1546 parser->state = s_header_value_start;
1547 else
1548 {
1549 parser->state = s_header_field_start;
1550 goto reexecute_byte;
1551 }
1552 break;
1553 }
1554
1555 case s_headers_almost_done:
1556 {
1557 STRICT_CHECK(ch != LF);
1558
1559 if (parser->flags & F_TRAILING) {
1560 /* End of a chunked request */
1561 parser->state = NEW_MESSAGE();
1562 CALLBACK_NOTIFY(message_complete);
1563 break;
1564 }
1565
1566 parser->state = s_headers_done;
1567
1568 /* Set this here so that on_headers_complete() callbacks can see it */
1569 parser->upgrade =
1570 (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT);
1571
1572 /* Here we call the headers_complete callback. This is somewhat
1573 * different than other callbacks because if the user returns 1, we
1574 * will interpret that as saying that this message has no body. This
1575 * is needed for the annoying case of recieving a response to a HEAD
1576 * request.
1577 *
1578 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
1579 * we have to simulate it by handling a change in errno below.
1580 */
1581 if (settings->on_headers_complete) {
1582 switch (settings->on_headers_complete(parser)) {
1583 case 0:
1584 break;
1585
1586 case 1:
1587 parser->flags |= F_SKIPBODY;
1588 break;
1589
1590 default:
1591 SET_ERRNO(HPE_CB_headers_complete);
1592 return p - data; /* Error */
1593 }
1594 }
1595
1596 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
1597 return p - data;
1598 }
1599
1600 goto reexecute_byte;
1601 }
1602
1603 case s_headers_done:
1604 {
1605 STRICT_CHECK(ch != LF);
1606
1607 parser->nread = 0;
1608
1609 /* Exit, the rest of the connect is in a different protocol. */
1610 if (parser->upgrade) {
1611 parser->state = NEW_MESSAGE();
1612 CALLBACK_NOTIFY(message_complete);
1613 return (p - data) + 1;
1614 }
1615
1616 if (parser->flags & F_SKIPBODY) {
1617 parser->state = NEW_MESSAGE();
1618 CALLBACK_NOTIFY(message_complete);
1619 } else if (parser->flags & F_CHUNKED) {
1620 /* chunked encoding - ignore Content-Length header */
1621 parser->state = s_chunk_size_start;
1622 } else {
1623 if (parser->content_length == 0) {
1624 /* Content-Length header given but zero: Content-Length: 0\r\n */
1625 parser->state = NEW_MESSAGE();
1626 CALLBACK_NOTIFY(message_complete);
1627 } else if (parser->content_length != ULLONG_MAX) {
1628 /* Content-Length header given and non-zero */
1629 parser->state = s_body_identity;
1630 } else {
1631 if (parser->type == HTTP_REQUEST ||
1632 !http_message_needs_eof(parser)) {
1633 /* Assume content-length 0 - read the next */
1634 parser->state = NEW_MESSAGE();
1635 CALLBACK_NOTIFY(message_complete);
1636 } else {
1637 /* Read body until EOF */
1638 parser->state = s_body_identity_eof;
1639 }
1640 }
1641 }
1642
1643 break;
1644 }
1645
1646 case s_body_identity:
1647 {
1648 uint64_t to_read = MIN(parser->content_length,
1649 (uint64_t) ((data + len) - p));
1650
1651 assert(parser->content_length != 0
1652 && parser->content_length != ULLONG_MAX);
1653
1654 /* The difference between advancing content_length and p is because
1655 * the latter will automaticaly advance on the next loop iteration.
1656 * Further, if content_length ends up at 0, we want to see the last
1657 * byte again for our message complete callback.
1658 */
1659 MARK(body);
1660 parser->content_length -= to_read;
1661 p += to_read - 1;
1662
1663 if (parser->content_length == 0) {
1664 parser->state = s_message_done;
1665
1666 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
1667 *
1668 * The alternative to doing this is to wait for the next byte to
1669 * trigger the data callback, just as in every other case. The
1670 * problem with this is that this makes it difficult for the test
1671 * harness to distinguish between complete-on-EOF and
1672 * complete-on-length. It's not clear that this distinction is
1673 * important for applications, but let's keep it for now.
1674 */
1675 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
1676 goto reexecute_byte;
1677 }
1678
1679 break;
1680 }
1681
1682 /* read until EOF */
1683 case s_body_identity_eof:
1684 MARK(body);
1685 p = data + len - 1;
1686
1687 break;
1688
1689 case s_message_done:
1690 parser->state = NEW_MESSAGE();
1691 CALLBACK_NOTIFY(message_complete);
1692 break;
1693
1694 case s_chunk_size_start:
1695 {
1696 assert(parser->nread == 1);
1697 assert(parser->flags & F_CHUNKED);
1698
1699 unhex_val = unhex[(unsigned char)ch];
1700 if (unhex_val == -1) {
1701 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1702 goto error;
1703 }
1704
1705 parser->content_length = unhex_val;
1706 parser->state = s_chunk_size;
1707 break;
1708 }
1709
1710 case s_chunk_size:
1711 {
1712 uint64_t t;
1713
1714 assert(parser->flags & F_CHUNKED);
1715
1716 if (ch == CR) {
1717 parser->state = s_chunk_size_almost_done;
1718 break;
1719 }
1720
1721 unhex_val = unhex[(unsigned char)ch];
1722
1723 if (unhex_val == -1) {
1724 if (ch == ';' || ch == ' ') {
1725 parser->state = s_chunk_parameters;
1726 break;
1727 }
1728
1729 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
1730 goto error;
1731 }
1732
1733 t = parser->content_length;
1734 t *= 16;
1735 t += unhex_val;
1736
1737 /* Overflow? */
1738 if (t < parser->content_length || t == ULLONG_MAX) {
1739 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
1740 goto error;
1741 }
1742
1743 parser->content_length = t;
1744 break;
1745 }
1746
1747 case s_chunk_parameters:
1748 {
1749 assert(parser->flags & F_CHUNKED);
1750 /* just ignore this. TODO check for overflow */
1751 if (ch == CR) {
1752 parser->state = s_chunk_size_almost_done;
1753 break;
1754 }
1755 break;
1756 }
1757
1758 case s_chunk_size_almost_done:
1759 {
1760 assert(parser->flags & F_CHUNKED);
1761 STRICT_CHECK(ch != LF);
1762
1763 parser->nread = 0;
1764
1765 if (parser->content_length == 0) {
1766 parser->flags |= F_TRAILING;
1767 parser->state = s_header_field_start;
1768 } else {
1769 parser->state = s_chunk_data;
1770 }
1771 break;
1772 }
1773
1774 case s_chunk_data:
1775 {
1776 uint64_t to_read = MIN(parser->content_length,
1777 (uint64_t) ((data + len) - p));
1778
1779 assert(parser->flags & F_CHUNKED);
1780 assert(parser->content_length != 0
1781 && parser->content_length != ULLONG_MAX);
1782
1783 /* See the explanation in s_body_identity for why the content
1784 * length and data pointers are managed this way.
1785 */
1786 MARK(body);
1787 parser->content_length -= to_read;
1788 p += to_read - 1;
1789
1790 if (parser->content_length == 0) {
1791 parser->state = s_chunk_data_almost_done;
1792 }
1793
1794 break;
1795 }
1796
1797 case s_chunk_data_almost_done:
1798 assert(parser->flags & F_CHUNKED);
1799 assert(parser->content_length == 0);
1800 STRICT_CHECK(ch != CR);
1801 parser->state = s_chunk_data_done;
1802 CALLBACK_DATA(body);
1803 break;
1804
1805 case s_chunk_data_done:
1806 assert(parser->flags & F_CHUNKED);
1807 STRICT_CHECK(ch != LF);
1808 parser->nread = 0;
1809 parser->state = s_chunk_size_start;
1810 break;
1811
1812 default:
1813 assert(0 && "unhandled state");
1814 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
1815 goto error;
1816 }
1817 }
1818
1819 /* Run callbacks for any marks that we have leftover after we ran our of
1820 * bytes. There should be at most one of these set, so it's OK to invoke
1821 * them in series (unset marks will not result in callbacks).
1822 *
1823 * We use the NOADVANCE() variety of callbacks here because 'p' has already
1824 * overflowed 'data' and this allows us to correct for the off-by-one that
1825 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
1826 * value that's in-bounds).
1827 */
1828
1829 assert(((header_field_mark ? 1 : 0) +
1830 (header_value_mark ? 1 : 0) +
1831 (url_mark ? 1 : 0) +
1832 (body_mark ? 1 : 0)) <= 1);
1833
1834 CALLBACK_DATA_NOADVANCE(header_field);
1835 CALLBACK_DATA_NOADVANCE(header_value);
1836 CALLBACK_DATA_NOADVANCE(url);
1837 CALLBACK_DATA_NOADVANCE(body);
1838
1839 return len;
1840
1841error:
1842 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
1843 SET_ERRNO(HPE_UNKNOWN);
1844 }
1845
1846 return (p - data);
1847}
1848
1849
1850/* Does the parser need to see an EOF to find the end of the message? */
1851int
1852http_message_needs_eof (const http_parser *parser)
1853{
1854 if (parser->type == HTTP_REQUEST) {
1855 return 0;
1856 }
1857
1858 /* See RFC 2616 section 4.4 */
1859 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
1860 parser->status_code == 204 || /* No Content */
1861 parser->status_code == 304 || /* Not Modified */
1862 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
1863 return 0;
1864 }
1865
1866 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
1867 return 0;
1868 }
1869
1870 return 1;
1871}
1872
1873
1874int
1875http_should_keep_alive (const http_parser *parser)
1876{
1877 if (parser->http_major > 0 && parser->http_minor > 0) {
1878 /* HTTP/1.1 */
1879 if (parser->flags & F_CONNECTION_CLOSE) {
1880 return 0;
1881 }
1882 } else {
1883 /* HTTP/1.0 or earlier */
1884 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
1885 return 0;
1886 }
1887 }
1888
1889 return !http_message_needs_eof(parser);
1890}
1891
1892
1893const char *
1894http_method_str (enum http_method m)
1895{
1896 return ELEM_AT(method_strings, m, "<unknown>");
1897}
1898
1899
1900void
1901http_parser_init (http_parser *parser, enum http_parser_type t)
1902{
1903 void *data = parser->data; /* preserve application data */
1904 memset(parser, 0, sizeof(*parser));
1905 parser->data = data;
1906 parser->type = t;
1907 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
1908 parser->http_errno = HPE_OK;
1909}
1910
1911const char *
1912http_errno_name(enum http_errno err) {
1913 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1914 return http_strerror_tab[err].name;
1915}
1916
1917const char *
1918http_errno_description(enum http_errno err) {
1919 assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0])));
1920 return http_strerror_tab[err].description;
1921}
1922
1923static enum http_host_state
1924http_parse_host_char(enum http_host_state s, const char ch) {
1925 switch(s) {
1926 case s_http_userinfo:
1927 case s_http_userinfo_start:
1928 if (ch == '@') {
1929 return s_http_host_start;
1930 }
1931
1932 if (IS_USERINFO_CHAR(ch)) {
1933 return s_http_userinfo;
1934 }
1935 break;
1936
1937 case s_http_host_start:
1938 if (ch == '[') {
1939 return s_http_host_v6_start;
1940 }
1941
1942 if (IS_HOST_CHAR(ch)) {
1943 return s_http_host;
1944 }
1945
1946 break;
1947
1948 case s_http_host:
1949 if (IS_HOST_CHAR(ch)) {
1950 return s_http_host;
1951 }
1952
1953 /* FALLTHROUGH */
1954 case s_http_host_v6_end:
1955 if (ch == ':') {
1956 return s_http_host_port_start;
1957 }
1958
1959 break;
1960
1961 case s_http_host_v6:
1962 if (ch == ']') {
1963 return s_http_host_v6_end;
1964 }
1965
1966 /* FALLTHROUGH */
1967 case s_http_host_v6_start:
1968 if (IS_HEX(ch) || ch == ':') {
1969 return s_http_host_v6;
1970 }
1971
1972 break;
1973
1974 case s_http_host_port:
1975 case s_http_host_port_start:
1976 if (IS_NUM(ch)) {
1977 return s_http_host_port;
1978 }
1979
1980 break;
1981
1982 default:
1983 break;
1984 }
1985 return s_http_host_dead;
1986}
1987
1988static int
1989http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
1990 enum http_host_state s;
1991
1992 const char *p;
1993 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
1994
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08001995 if (buflen > UINT16_MAX)
1996 return 1;
1997
Inna Palantff3f07a2019-07-11 16:15:26 -07001998 u->field_data[UF_HOST].len = 0;
1999
2000 s = found_at ? s_http_userinfo_start : s_http_host_start;
2001
2002 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
2003 enum http_host_state new_s = http_parse_host_char(s, *p);
2004
2005 if (new_s == s_http_host_dead) {
2006 return 1;
2007 }
2008
2009 switch(new_s) {
2010 case s_http_host:
2011 if (s != s_http_host) {
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08002012 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
Inna Palantff3f07a2019-07-11 16:15:26 -07002013 }
2014 u->field_data[UF_HOST].len++;
2015 break;
2016
2017 case s_http_host_v6:
2018 if (s != s_http_host_v6) {
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08002019 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
Inna Palantff3f07a2019-07-11 16:15:26 -07002020 }
2021 u->field_data[UF_HOST].len++;
2022 break;
2023
2024 case s_http_host_port:
2025 if (s != s_http_host_port) {
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08002026 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
Inna Palantff3f07a2019-07-11 16:15:26 -07002027 u->field_data[UF_PORT].len = 0;
2028 u->field_set |= (1 << UF_PORT);
2029 }
2030 u->field_data[UF_PORT].len++;
2031 break;
2032
2033 case s_http_userinfo:
2034 if (s != s_http_userinfo) {
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08002035 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
Inna Palantff3f07a2019-07-11 16:15:26 -07002036 u->field_data[UF_USERINFO].len = 0;
2037 u->field_set |= (1 << UF_USERINFO);
2038 }
2039 u->field_data[UF_USERINFO].len++;
2040 break;
2041
2042 default:
2043 break;
2044 }
2045 s = new_s;
2046 }
2047
2048 /* Make sure we don't end somewhere unexpected */
2049 switch (s) {
2050 case s_http_host_start:
2051 case s_http_host_v6_start:
2052 case s_http_host_v6:
Inna Palantff3f07a2019-07-11 16:15:26 -07002053 case s_http_userinfo:
2054 case s_http_userinfo_start:
2055 return 1;
2056 default:
2057 break;
2058 }
2059
2060 return 0;
2061}
2062
2063int
2064http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
2065 struct http_parser_url *u)
2066{
2067 enum state s;
2068 const char *p;
2069 enum http_parser_url_fields uf, old_uf;
2070 int found_at = 0;
2071
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08002072 if (buflen > UINT16_MAX)
2073 return 1;
2074
Inna Palantff3f07a2019-07-11 16:15:26 -07002075 u->port = u->field_set = 0;
2076 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
2077 uf = old_uf = UF_MAX;
2078
2079 for (p = buf; p < buf + buflen; p++) {
2080 s = parse_url_char(s, *p);
2081
2082 /* Figure out the next field that we're operating on */
2083 switch (s) {
2084 case s_dead:
2085 return 1;
2086
2087 /* Skip delimeters */
2088 case s_req_schema_slash:
2089 case s_req_schema_slash_slash:
2090 case s_req_server_start:
2091 case s_req_query_string_start:
2092 case s_req_fragment_start:
2093 continue;
2094
2095 case s_req_schema:
2096 uf = UF_SCHEMA;
2097 break;
2098
2099 case s_req_server_with_at:
2100 found_at = 1;
2101
2102 /* FALLTROUGH */
2103 case s_req_server:
2104 uf = UF_HOST;
2105 break;
2106
2107 case s_req_path:
2108 uf = UF_PATH;
2109 break;
2110
2111 case s_req_query_string:
2112 uf = UF_QUERY;
2113 break;
2114
2115 case s_req_fragment:
2116 uf = UF_FRAGMENT;
2117 break;
2118
2119 default:
2120 assert(!"Unexpected state");
2121 return 1;
2122 }
2123
2124 /* Nothing's changed; soldier on */
2125 if (uf == old_uf) {
2126 u->field_data[uf].len++;
2127 continue;
2128 }
2129
Chih-Hung Hsieh43f06942019-12-19 15:01:08 -08002130 u->field_data[uf].off = (uint16_t)(p - buf);
Inna Palantff3f07a2019-07-11 16:15:26 -07002131 u->field_data[uf].len = 1;
2132
2133 u->field_set |= (1 << uf);
2134 old_uf = uf;
2135 }
2136
2137 /* host must be present if there is a schema */
2138 /* parsing http:///toto will fail */
2139 if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) {
2140 if (http_parse_host(buf, u, found_at) != 0) {
2141 return 1;
2142 }
2143 }
2144
2145 /* CONNECT requests can only contain "hostname:port" */
2146 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
2147 return 1;
2148 }
2149
2150 if (u->field_set & (1 << UF_PORT)) {
2151 /* Don't bother with endp; we've already validated the string */
2152 unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
2153
2154 /* Ports have a max value of 2^16 */
2155 if (v > 0xffff) {
2156 return 1;
2157 }
2158
2159 u->port = (uint16_t) v;
2160 }
2161
2162 return 0;
2163}
2164
2165void
2166http_parser_pause(http_parser *parser, int paused) {
2167 /* Users should only be pausing/unpausing a parser that is not in an error
2168 * state. In non-debug builds, there's not much that we can do about this
2169 * other than ignore it.
2170 */
2171 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
2172 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
2173 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
2174 } else {
2175 assert(0 && "Attempting to pause parser in error state");
2176 }
2177}
2178
2179int
2180http_body_is_final(const struct http_parser *parser) {
2181 return parser->state == s_message_done;
2182}