1 /* Copyright Joyent, Inc. and other Node contributors.
\r
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
\r
4 * of this software and associated documentation files (the "Software"), to
\r
5 * deal in the Software without restriction, including without limitation the
\r
6 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
\r
7 * sell copies of the Software, and to permit persons to whom the Software is
\r
8 * furnished to do so, subject to the following conditions:
\r
10 * The above copyright notice and this permission notice shall be included in
\r
11 * all copies or substantial portions of the Software.
\r
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
\r
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
\r
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
\r
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
\r
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
\r
18 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
\r
21 #include "http_parser.h"
\r
28 static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE;
\r
31 # define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */
\r
35 # define MIN(a,b) ((a) < (b) ? (a) : (b))
\r
39 # define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
\r
43 # define BIT_AT(a, i) \
\r
44 (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
\r
45 (1 << ((unsigned int) (i) & 7))))
\r
49 # define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v))
\r
52 #define SET_ERRNO(e) \
\r
54 parser->nread = nread; \
\r
55 parser->http_errno = (e); \
\r
58 #define CURRENT_STATE() p_state
\r
59 #define UPDATE_STATE(V) p_state = (enum state) (V);
\r
62 parser->nread = nread; \
\r
63 parser->state = CURRENT_STATE(); \
\r
66 #define REEXECUTE() \
\r
71 # define LIKELY(X) __builtin_expect(!!(X), 1)
\r
72 # define UNLIKELY(X) __builtin_expect(!!(X), 0)
\r
74 # define LIKELY(X) (X)
\r
75 # define UNLIKELY(X) (X)
\r
79 /* Run the notify callback FOR, returning ER if it fails */
\r
80 #define CALLBACK_NOTIFY_(FOR, ER) \
\r
82 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
\r
84 if (LIKELY(settings->on_##FOR)) { \
\r
85 parser->state = CURRENT_STATE(); \
\r
86 if (UNLIKELY(0 != settings->on_##FOR(parser))) { \
\r
87 SET_ERRNO(HPE_CB_##FOR); \
\r
89 UPDATE_STATE(parser->state); \
\r
91 /* We either errored above or got paused; get out */ \
\r
92 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
\r
98 /* Run the notify callback FOR and consume the current byte */
\r
99 #define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1)
\r
101 /* Run the notify callback FOR and don't consume the current byte */
\r
102 #define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data)
\r
104 /* Run data callback FOR with LEN bytes, returning ER if it fails */
\r
105 #define CALLBACK_DATA_(FOR, LEN, ER) \
\r
107 assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \
\r
109 if (FOR##_mark) { \
\r
110 if (LIKELY(settings->on_##FOR)) { \
\r
111 parser->state = CURRENT_STATE(); \
\r
112 if (UNLIKELY(0 != \
\r
113 settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \
\r
114 SET_ERRNO(HPE_CB_##FOR); \
\r
116 UPDATE_STATE(parser->state); \
\r
118 /* We either errored above or got paused; get out */ \
\r
119 if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \
\r
123 FOR##_mark = NULL; \
\r
127 /* Run the data callback FOR and consume the current byte */
\r
128 #define CALLBACK_DATA(FOR) \
\r
129 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1)
\r
131 /* Run the data callback FOR and don't consume the current byte */
\r
132 #define CALLBACK_DATA_NOADVANCE(FOR) \
\r
133 CALLBACK_DATA_(FOR, p - FOR##_mark, p - data)
\r
135 /* Set the mark FOR; non-destructive if mark is already set */
\r
136 #define MARK(FOR) \
\r
138 if (!FOR##_mark) { \
\r
143 /* Don't allow the total size of the HTTP headers (including the status
\r
144 * line) to exceed max_header_size. This check is here to protect
\r
145 * embedders against denial-of-service attacks where the attacker feeds
\r
146 * us a never-ending header that the embedder keeps buffering.
\r
148 * This check is arguably the responsibility of embedders but we're doing
\r
149 * it on the embedder's behalf because most won't bother and this way we
\r
150 * make the web a little safer. max_header_size is still far bigger
\r
151 * than any reasonable request or response so this should never affect
\r
152 * day-to-day operation.
\r
154 #define COUNT_HEADER_SIZE(V) \
\r
156 nread += (uint32_t)(V); \
\r
157 if (UNLIKELY(nread > max_header_size)) { \
\r
158 SET_ERRNO(HPE_HEADER_OVERFLOW); \
\r
164 #define PROXY_CONNECTION "proxy-connection"
\r
165 #define CONNECTION "connection"
\r
166 #define CONTENT_LENGTH "content-length"
\r
167 #define TRANSFER_ENCODING "transfer-encoding"
\r
168 #define UPGRADE "upgrade"
\r
169 #define CHUNKED "chunked"
\r
170 #define KEEP_ALIVE "keep-alive"
\r
171 #define CLOSE "close"
\r
174 static const char *method_strings[] =
\r
176 #define XX(num, name, string) #string,
\r
177 HTTP_METHOD_MAP(XX)
\r
182 /* Tokens as defined by rfc 2616. Also lowercases them.
\r
183 * token = 1*<any CHAR except CTLs or separators>
\r
184 * separators = "(" | ")" | "<" | ">" | "@"
\r
185 * | "," | ";" | ":" | "\" | <">
\r
186 * | "/" | "[" | "]" | "?" | "="
\r
187 * | "{" | "}" | SP | HT
\r
189 static const char tokens[256] = {
\r
190 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
\r
191 0, 0, 0, 0, 0, 0, 0, 0,
\r
192 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
\r
193 0, 0, 0, 0, 0, 0, 0, 0,
\r
194 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
\r
195 0, 0, 0, 0, 0, 0, 0, 0,
\r
196 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
\r
197 0, 0, 0, 0, 0, 0, 0, 0,
\r
198 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
\r
199 ' ', '!', 0, '#', '$', '%', '&', '\'',
\r
200 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
\r
201 0, 0, '*', '+', 0, '-', '.', 0,
\r
202 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
\r
203 '0', '1', '2', '3', '4', '5', '6', '7',
\r
204 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
\r
205 '8', '9', 0, 0, 0, 0, 0, 0,
\r
206 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
\r
207 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
\r
208 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
\r
209 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
\r
210 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
\r
211 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
\r
212 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
\r
213 'x', 'y', 'z', 0, 0, 0, '^', '_',
\r
214 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
\r
215 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
\r
216 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
\r
217 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
\r
218 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
\r
219 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
\r
220 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
\r
221 'x', 'y', 'z', 0, '|', 0, '~', 0 };
\r
224 static const int8_t unhex[256] =
\r
225 {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
226 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
227 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
228 , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
\r
229 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
230 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
231 ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
232 ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
\r
236 #if HTTP_PARSER_STRICT
\r
243 static const uint8_t normal_url_char[32] = {
\r
244 /* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
\r
245 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
\r
246 /* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
\r
247 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
\r
248 /* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
\r
249 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
\r
250 /* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
\r
251 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
\r
252 /* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
\r
253 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
\r
254 /* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
\r
255 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
256 /* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
\r
257 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
258 /* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
\r
259 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
\r
260 /* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
\r
261 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
262 /* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
\r
263 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
264 /* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
\r
265 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
266 /* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
\r
267 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
268 /* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
\r
269 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
270 /* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
\r
271 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
272 /* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
\r
273 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
\r
274 /* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
\r
275 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
\r
280 { s_dead = 1 /* important that this is > 0 */
\r
282 , s_start_req_or_res
\r
293 , s_res_first_status_code
\r
294 , s_res_status_code
\r
295 , s_res_status_start
\r
297 , s_res_line_almost_done
\r
302 , s_req_spaces_before_url
\r
304 , s_req_schema_slash
\r
305 , s_req_schema_slash_slash
\r
306 , s_req_server_start
\r
308 , s_req_server_with_at
\r
310 , s_req_query_string_start
\r
311 , s_req_query_string
\r
312 , s_req_fragment_start
\r
325 , s_req_line_almost_done
\r
327 , s_header_field_start
\r
329 , s_header_value_discard_ws
\r
330 , s_header_value_discard_ws_almost_done
\r
331 , s_header_value_discard_lws
\r
332 , s_header_value_start
\r
334 , s_header_value_lws
\r
336 , s_header_almost_done
\r
338 , s_chunk_size_start
\r
340 , s_chunk_parameters
\r
341 , s_chunk_size_almost_done
\r
343 , s_headers_almost_done
\r
346 /* Important: 's_headers_done' must be the last 'header' state. All
\r
347 * states beyond this must be 'body' states. It is used for overflow
\r
348 * checking. See the PARSING_HEADER() macro.
\r
352 , s_chunk_data_almost_done
\r
353 , s_chunk_data_done
\r
356 , s_body_identity_eof
\r
362 #define PARSING_HEADER(state) (state <= s_headers_done)
\r
371 , h_matching_connection
\r
372 , h_matching_proxy_connection
\r
373 , h_matching_content_length
\r
374 , h_matching_transfer_encoding
\r
375 , h_matching_upgrade
\r
379 , h_content_length_num
\r
380 , h_content_length_ws
\r
381 , h_transfer_encoding
\r
384 , h_matching_transfer_encoding_chunked
\r
385 , h_matching_connection_token_start
\r
386 , h_matching_connection_keep_alive
\r
387 , h_matching_connection_close
\r
388 , h_matching_connection_upgrade
\r
389 , h_matching_connection_token
\r
391 , h_transfer_encoding_chunked
\r
392 , h_connection_keep_alive
\r
393 , h_connection_close
\r
394 , h_connection_upgrade
\r
397 enum http_host_state
\r
399 s_http_host_dead = 1
\r
400 , s_http_userinfo_start
\r
402 , s_http_host_start
\r
403 , s_http_host_v6_start
\r
406 , s_http_host_v6_end
\r
407 , s_http_host_v6_zone_start
\r
408 , s_http_host_v6_zone
\r
409 , s_http_host_port_start
\r
413 /* Macros for character classes; depends on strict-mode */
\r
416 #define LOWER(c) (unsigned char)(c | 0x20)
\r
417 #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
\r
418 #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
\r
419 #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
\r
420 #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
\r
421 #define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
\r
422 (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
\r
424 #define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
\r
425 (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
\r
426 (c) == '$' || (c) == ',')
\r
428 #define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c])
\r
430 #if HTTP_PARSER_STRICT
\r
431 #define TOKEN(c) STRICT_TOKEN(c)
\r
432 #define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
\r
433 #define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
\r
435 #define TOKEN(c) tokens[(unsigned char)c]
\r
436 #define IS_URL_CHAR(c) \
\r
437 (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
\r
438 #define IS_HOST_CHAR(c) \
\r
439 (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
\r
443 * Verify that a char is a valid visible (printable) US-ASCII
\r
444 * character or %x80-FF
\r
446 #define IS_HEADER_CHAR(ch) \
\r
447 (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127))
\r
449 #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
\r
452 #if HTTP_PARSER_STRICT
\r
453 # define STRICT_CHECK(cond) \
\r
456 SET_ERRNO(HPE_STRICT); \
\r
460 # define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead)
\r
462 # define STRICT_CHECK(cond)
\r
463 # define NEW_MESSAGE() start_state
\r
467 /* Map errno values to strings for human-readable output */
\r
468 #define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s },
\r
471 const char *description;
\r
472 } http_strerror_tab[] = {
\r
473 HTTP_ERRNO_MAP(HTTP_STRERROR_GEN)
\r
475 #undef HTTP_STRERROR_GEN
\r
477 int http_message_needs_eof(const http_parser *parser);
\r
481 * This is designed to be shared by http_parser_execute() for URL validation,
\r
482 * hence it has a state transition + byte-for-byte interface. In addition, it
\r
483 * is meant to be embedded in http_parser_parse_url(), which does the dirty
\r
484 * work of turning state transitions URL components for its API.
\r
486 * This function should only be invoked with non-space characters. It is
\r
487 * assumed that the caller cares about (and can detect) the transition between
\r
488 * URL and non-URL states by looking for these.
\r
491 parse_url_char(enum state s, const char ch)
\r
493 if (ch == ' ' || ch == '\r' || ch == '\n') {
\r
497 #if HTTP_PARSER_STRICT
\r
498 if (ch == '\t' || ch == '\f') {
\r
504 case s_req_spaces_before_url:
\r
505 /* Proxied requests are followed by scheme of an absolute URI (alpha).
\r
506 * All methods except CONNECT are followed by '/' or '*'.
\r
509 if (ch == '/' || ch == '*') {
\r
513 if (IS_ALPHA(ch)) {
\r
514 return s_req_schema;
\r
520 if (IS_ALPHA(ch)) {
\r
525 return s_req_schema_slash;
\r
530 case s_req_schema_slash:
\r
532 return s_req_schema_slash_slash;
\r
537 case s_req_schema_slash_slash:
\r
539 return s_req_server_start;
\r
544 case s_req_server_with_at:
\r
550 case s_req_server_start:
\r
557 return s_req_query_string_start;
\r
561 return s_req_server_with_at;
\r
564 if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
\r
565 return s_req_server;
\r
571 if (IS_URL_CHAR(ch)) {
\r
577 return s_req_query_string_start;
\r
580 return s_req_fragment_start;
\r
585 case s_req_query_string_start:
\r
586 case s_req_query_string:
\r
587 if (IS_URL_CHAR(ch)) {
\r
588 return s_req_query_string;
\r
593 /* allow extra '?' in query string */
\r
594 return s_req_query_string;
\r
597 return s_req_fragment_start;
\r
602 case s_req_fragment_start:
\r
603 if (IS_URL_CHAR(ch)) {
\r
604 return s_req_fragment;
\r
609 return s_req_fragment;
\r
617 case s_req_fragment:
\r
618 if (IS_URL_CHAR(ch)) {
\r
634 /* We should never fall out of the switch above unless there's an error */
\r
638 size_t http_parser_execute (http_parser *parser,
\r
639 const http_parser_settings *settings,
\r
645 const char *p = data;
\r
646 const char *header_field_mark = 0;
\r
647 const char *header_value_mark = 0;
\r
648 const char *url_mark = 0;
\r
649 const char *body_mark = 0;
\r
650 const char *status_mark = 0;
\r
651 enum state p_state = (enum state) parser->state;
\r
652 const unsigned int lenient = parser->lenient_http_headers;
\r
653 uint32_t nread = parser->nread;
\r
655 /* We're in an error state. Don't bother doing anything. */
\r
656 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
\r
661 switch (CURRENT_STATE()) {
\r
662 case s_body_identity_eof:
\r
663 /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if
\r
666 CALLBACK_NOTIFY_NOADVANCE(message_complete);
\r
670 case s_start_req_or_res:
\r
676 SET_ERRNO(HPE_INVALID_EOF_STATE);
\r
682 if (CURRENT_STATE() == s_header_field)
\r
683 header_field_mark = data;
\r
684 if (CURRENT_STATE() == s_header_value)
\r
685 header_value_mark = data;
\r
686 switch (CURRENT_STATE()) {
\r
689 case s_req_schema_slash:
\r
690 case s_req_schema_slash_slash:
\r
691 case s_req_server_start:
\r
693 case s_req_server_with_at:
\r
694 case s_req_query_string_start:
\r
695 case s_req_query_string:
\r
696 case s_req_fragment_start:
\r
697 case s_req_fragment:
\r
701 status_mark = data;
\r
707 for (p=data; p != data + len; p++) {
\r
710 if (PARSING_HEADER(CURRENT_STATE()))
\r
711 COUNT_HEADER_SIZE(1);
\r
714 switch (CURRENT_STATE()) {
\r
717 /* this state is used after a 'Connection: close' message
\r
718 * the parser will error out if it reads another message
\r
720 if (LIKELY(ch == CR || ch == LF))
\r
723 SET_ERRNO(HPE_CLOSED_CONNECTION);
\r
726 case s_start_req_or_res:
\r
728 if (ch == CR || ch == LF)
\r
731 parser->content_length = ULLONG_MAX;
\r
734 UPDATE_STATE(s_res_or_resp_H);
\r
736 CALLBACK_NOTIFY(message_begin);
\r
738 parser->type = HTTP_REQUEST;
\r
739 UPDATE_STATE(s_start_req);
\r
746 case s_res_or_resp_H:
\r
748 parser->type = HTTP_RESPONSE;
\r
749 UPDATE_STATE(s_res_HT);
\r
751 if (UNLIKELY(ch != 'E')) {
\r
752 SET_ERRNO(HPE_INVALID_CONSTANT);
\r
756 parser->type = HTTP_REQUEST;
\r
757 parser->method = HTTP_HEAD;
\r
759 UPDATE_STATE(s_req_method);
\r
765 if (ch == CR || ch == LF)
\r
768 parser->content_length = ULLONG_MAX;
\r
771 UPDATE_STATE(s_res_H);
\r
773 SET_ERRNO(HPE_INVALID_CONSTANT);
\r
777 CALLBACK_NOTIFY(message_begin);
\r
782 STRICT_CHECK(ch != 'T');
\r
783 UPDATE_STATE(s_res_HT);
\r
787 STRICT_CHECK(ch != 'T');
\r
788 UPDATE_STATE(s_res_HTT);
\r
792 STRICT_CHECK(ch != 'P');
\r
793 UPDATE_STATE(s_res_HTTP);
\r
797 STRICT_CHECK(ch != '/');
\r
798 UPDATE_STATE(s_res_http_major);
\r
801 case s_res_http_major:
\r
802 if (UNLIKELY(!IS_NUM(ch))) {
\r
803 SET_ERRNO(HPE_INVALID_VERSION);
\r
807 parser->http_major = ch - '0';
\r
808 UPDATE_STATE(s_res_http_dot);
\r
811 case s_res_http_dot:
\r
813 if (UNLIKELY(ch != '.')) {
\r
814 SET_ERRNO(HPE_INVALID_VERSION);
\r
818 UPDATE_STATE(s_res_http_minor);
\r
822 case s_res_http_minor:
\r
823 if (UNLIKELY(!IS_NUM(ch))) {
\r
824 SET_ERRNO(HPE_INVALID_VERSION);
\r
828 parser->http_minor = ch - '0';
\r
829 UPDATE_STATE(s_res_http_end);
\r
832 case s_res_http_end:
\r
834 if (UNLIKELY(ch != ' ')) {
\r
835 SET_ERRNO(HPE_INVALID_VERSION);
\r
839 UPDATE_STATE(s_res_first_status_code);
\r
843 case s_res_first_status_code:
\r
850 SET_ERRNO(HPE_INVALID_STATUS);
\r
853 parser->status_code = ch - '0';
\r
854 UPDATE_STATE(s_res_status_code);
\r
858 case s_res_status_code:
\r
863 UPDATE_STATE(s_res_status_start);
\r
867 UPDATE_STATE(s_res_status_start);
\r
871 SET_ERRNO(HPE_INVALID_STATUS);
\r
877 parser->status_code *= 10;
\r
878 parser->status_code += ch - '0';
\r
880 if (UNLIKELY(parser->status_code > 999)) {
\r
881 SET_ERRNO(HPE_INVALID_STATUS);
\r
888 case s_res_status_start:
\r
891 UPDATE_STATE(s_res_status);
\r
894 if (ch == CR || ch == LF)
\r
902 UPDATE_STATE(s_res_line_almost_done);
\r
903 CALLBACK_DATA(status);
\r
908 UPDATE_STATE(s_header_field_start);
\r
909 CALLBACK_DATA(status);
\r
915 case s_res_line_almost_done:
\r
916 STRICT_CHECK(ch != LF);
\r
917 UPDATE_STATE(s_header_field_start);
\r
922 if (ch == CR || ch == LF)
\r
925 parser->content_length = ULLONG_MAX;
\r
927 if (UNLIKELY(!IS_ALPHA(ch))) {
\r
928 SET_ERRNO(HPE_INVALID_METHOD);
\r
932 parser->method = (enum http_method) 0;
\r
935 case 'A': parser->method = HTTP_ACL; break;
\r
936 case 'B': parser->method = HTTP_BIND; break;
\r
937 case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
\r
938 case 'D': parser->method = HTTP_DELETE; break;
\r
939 case 'G': parser->method = HTTP_GET; break;
\r
940 case 'H': parser->method = HTTP_HEAD; break;
\r
941 case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
\r
942 case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
\r
943 case 'N': parser->method = HTTP_NOTIFY; break;
\r
944 case 'O': parser->method = HTTP_OPTIONS; break;
\r
945 case 'P': parser->method = HTTP_POST;
\r
946 /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
\r
948 case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
\r
949 case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break;
\r
950 case 'T': parser->method = HTTP_TRACE; break;
\r
951 case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
\r
953 SET_ERRNO(HPE_INVALID_METHOD);
\r
956 UPDATE_STATE(s_req_method);
\r
958 CALLBACK_NOTIFY(message_begin);
\r
965 const char *matcher;
\r
966 if (UNLIKELY(ch == '\0')) {
\r
967 SET_ERRNO(HPE_INVALID_METHOD);
\r
971 matcher = method_strings[parser->method];
\r
972 if (ch == ' ' && matcher[parser->index] == '\0') {
\r
973 UPDATE_STATE(s_req_spaces_before_url);
\r
974 } else if (ch == matcher[parser->index]) {
\r
976 } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') {
\r
978 switch (parser->method << 16 | parser->index << 8 | ch) {
\r
979 #define XX(meth, pos, ch, new_meth) \
\r
980 case (HTTP_##meth << 16 | pos << 8 | ch): \
\r
981 parser->method = HTTP_##new_meth; break;
\r
983 XX(POST, 1, 'U', PUT)
\r
984 XX(POST, 1, 'A', PATCH)
\r
985 XX(POST, 1, 'R', PROPFIND)
\r
986 XX(PUT, 2, 'R', PURGE)
\r
987 XX(CONNECT, 1, 'H', CHECKOUT)
\r
988 XX(CONNECT, 2, 'P', COPY)
\r
989 XX(MKCOL, 1, 'O', MOVE)
\r
990 XX(MKCOL, 1, 'E', MERGE)
\r
991 XX(MKCOL, 1, '-', MSEARCH)
\r
992 XX(MKCOL, 2, 'A', MKACTIVITY)
\r
993 XX(MKCOL, 3, 'A', MKCALENDAR)
\r
994 XX(SUBSCRIBE, 1, 'E', SEARCH)
\r
995 XX(SUBSCRIBE, 1, 'O', SOURCE)
\r
996 XX(REPORT, 2, 'B', REBIND)
\r
997 XX(PROPFIND, 4, 'P', PROPPATCH)
\r
998 XX(LOCK, 1, 'I', LINK)
\r
999 XX(UNLOCK, 2, 'S', UNSUBSCRIBE)
\r
1000 XX(UNLOCK, 2, 'B', UNBIND)
\r
1001 XX(UNLOCK, 3, 'I', UNLINK)
\r
1004 SET_ERRNO(HPE_INVALID_METHOD);
\r
1008 SET_ERRNO(HPE_INVALID_METHOD);
\r
1016 case s_req_spaces_before_url:
\r
1018 if (ch == ' ') break;
\r
1021 if (parser->method == HTTP_CONNECT) {
\r
1022 UPDATE_STATE(s_req_server_start);
\r
1025 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
\r
1026 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
\r
1027 SET_ERRNO(HPE_INVALID_URL);
\r
1034 case s_req_schema:
\r
1035 case s_req_schema_slash:
\r
1036 case s_req_schema_slash_slash:
\r
1037 case s_req_server_start:
\r
1040 /* No whitespace allowed here */
\r
1044 SET_ERRNO(HPE_INVALID_URL);
\r
1047 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
\r
1048 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
\r
1049 SET_ERRNO(HPE_INVALID_URL);
\r
1057 case s_req_server:
\r
1058 case s_req_server_with_at:
\r
1060 case s_req_query_string_start:
\r
1061 case s_req_query_string:
\r
1062 case s_req_fragment_start:
\r
1063 case s_req_fragment:
\r
1067 UPDATE_STATE(s_req_http_start);
\r
1068 CALLBACK_DATA(url);
\r
1072 parser->http_major = 0;
\r
1073 parser->http_minor = 9;
\r
1074 UPDATE_STATE((ch == CR) ?
\r
1075 s_req_line_almost_done :
\r
1076 s_header_field_start);
\r
1077 CALLBACK_DATA(url);
\r
1080 UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch));
\r
1081 if (UNLIKELY(CURRENT_STATE() == s_dead)) {
\r
1082 SET_ERRNO(HPE_INVALID_URL);
\r
1089 case s_req_http_start:
\r
1094 UPDATE_STATE(s_req_http_H);
\r
1097 if (parser->method == HTTP_SOURCE) {
\r
1098 UPDATE_STATE(s_req_http_I);
\r
1101 /* fall through */
\r
1103 SET_ERRNO(HPE_INVALID_CONSTANT);
\r
1108 case s_req_http_H:
\r
1109 STRICT_CHECK(ch != 'T');
\r
1110 UPDATE_STATE(s_req_http_HT);
\r
1113 case s_req_http_HT:
\r
1114 STRICT_CHECK(ch != 'T');
\r
1115 UPDATE_STATE(s_req_http_HTT);
\r
1118 case s_req_http_HTT:
\r
1119 STRICT_CHECK(ch != 'P');
\r
1120 UPDATE_STATE(s_req_http_HTTP);
\r
1123 case s_req_http_I:
\r
1124 STRICT_CHECK(ch != 'C');
\r
1125 UPDATE_STATE(s_req_http_IC);
\r
1128 case s_req_http_IC:
\r
1129 STRICT_CHECK(ch != 'E');
\r
1130 UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */
\r
1133 case s_req_http_HTTP:
\r
1134 STRICT_CHECK(ch != '/');
\r
1135 UPDATE_STATE(s_req_http_major);
\r
1138 case s_req_http_major:
\r
1139 if (UNLIKELY(!IS_NUM(ch))) {
\r
1140 SET_ERRNO(HPE_INVALID_VERSION);
\r
1144 parser->http_major = ch - '0';
\r
1145 UPDATE_STATE(s_req_http_dot);
\r
1148 case s_req_http_dot:
\r
1150 if (UNLIKELY(ch != '.')) {
\r
1151 SET_ERRNO(HPE_INVALID_VERSION);
\r
1155 UPDATE_STATE(s_req_http_minor);
\r
1159 case s_req_http_minor:
\r
1160 if (UNLIKELY(!IS_NUM(ch))) {
\r
1161 SET_ERRNO(HPE_INVALID_VERSION);
\r
1165 parser->http_minor = ch - '0';
\r
1166 UPDATE_STATE(s_req_http_end);
\r
1169 case s_req_http_end:
\r
1172 UPDATE_STATE(s_req_line_almost_done);
\r
1177 UPDATE_STATE(s_header_field_start);
\r
1181 SET_ERRNO(HPE_INVALID_VERSION);
\r
1186 /* end of request line */
\r
1187 case s_req_line_almost_done:
\r
1189 if (UNLIKELY(ch != LF)) {
\r
1190 SET_ERRNO(HPE_LF_EXPECTED);
\r
1194 UPDATE_STATE(s_header_field_start);
\r
1198 case s_header_field_start:
\r
1201 UPDATE_STATE(s_headers_almost_done);
\r
1206 /* they might be just sending \n instead of \r\n so this would be
\r
1207 * the second \n to denote the end of headers*/
\r
1208 UPDATE_STATE(s_headers_almost_done);
\r
1214 if (UNLIKELY(!c)) {
\r
1215 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
\r
1219 MARK(header_field);
\r
1221 parser->index = 0;
\r
1222 UPDATE_STATE(s_header_field);
\r
1226 parser->header_state = h_C;
\r
1230 parser->header_state = h_matching_proxy_connection;
\r
1234 parser->header_state = h_matching_transfer_encoding;
\r
1238 parser->header_state = h_matching_upgrade;
\r
1242 parser->header_state = h_general;
\r
1248 case s_header_field:
\r
1250 const char* start = p;
\r
1251 for (; p != data + len; p++) {
\r
1258 switch (parser->header_state) {
\r
1260 size_t left = data + len - p;
\r
1261 const char* pe = p + MIN(left, max_header_size);
\r
1262 while (p+1 < pe && TOKEN(p[1])) {
\r
1270 parser->header_state = (c == 'o' ? h_CO : h_general);
\r
1275 parser->header_state = (c == 'n' ? h_CON : h_general);
\r
1282 parser->header_state = h_matching_connection;
\r
1285 parser->header_state = h_matching_content_length;
\r
1288 parser->header_state = h_general;
\r
1295 case h_matching_connection:
\r
1297 if (parser->index > sizeof(CONNECTION)-1
\r
1298 || c != CONNECTION[parser->index]) {
\r
1299 parser->header_state = h_general;
\r
1300 } else if (parser->index == sizeof(CONNECTION)-2) {
\r
1301 parser->header_state = h_connection;
\r
1305 /* proxy-connection */
\r
1307 case h_matching_proxy_connection:
\r
1309 if (parser->index > sizeof(PROXY_CONNECTION)-1
\r
1310 || c != PROXY_CONNECTION[parser->index]) {
\r
1311 parser->header_state = h_general;
\r
1312 } else if (parser->index == sizeof(PROXY_CONNECTION)-2) {
\r
1313 parser->header_state = h_connection;
\r
1317 /* content-length */
\r
1319 case h_matching_content_length:
\r
1321 if (parser->index > sizeof(CONTENT_LENGTH)-1
\r
1322 || c != CONTENT_LENGTH[parser->index]) {
\r
1323 parser->header_state = h_general;
\r
1324 } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
\r
1325 parser->header_state = h_content_length;
\r
1329 /* transfer-encoding */
\r
1331 case h_matching_transfer_encoding:
\r
1333 if (parser->index > sizeof(TRANSFER_ENCODING)-1
\r
1334 || c != TRANSFER_ENCODING[parser->index]) {
\r
1335 parser->header_state = h_general;
\r
1336 } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) {
\r
1337 parser->header_state = h_transfer_encoding;
\r
1343 case h_matching_upgrade:
\r
1345 if (parser->index > sizeof(UPGRADE)-1
\r
1346 || c != UPGRADE[parser->index]) {
\r
1347 parser->header_state = h_general;
\r
1348 } else if (parser->index == sizeof(UPGRADE)-2) {
\r
1349 parser->header_state = h_upgrade;
\r
1353 case h_connection:
\r
1354 case h_content_length:
\r
1355 case h_transfer_encoding:
\r
1357 if (ch != ' ') parser->header_state = h_general;
\r
1361 assert(0 && "Unknown header_state");
\r
1366 if (p == data + len) {
\r
1368 COUNT_HEADER_SIZE(p - start);
\r
1372 COUNT_HEADER_SIZE(p - start);
\r
1375 UPDATE_STATE(s_header_value_discard_ws);
\r
1376 CALLBACK_DATA(header_field);
\r
1380 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
\r
1384 case s_header_value_discard_ws:
\r
1385 if (ch == ' ' || ch == '\t') break;
\r
1388 UPDATE_STATE(s_header_value_discard_ws_almost_done);
\r
1393 UPDATE_STATE(s_header_value_discard_lws);
\r
1397 /* fall through */
\r
1399 case s_header_value_start:
\r
1401 MARK(header_value);
\r
1403 UPDATE_STATE(s_header_value);
\r
1404 parser->index = 0;
\r
1408 switch (parser->header_state) {
\r
1410 parser->flags |= F_UPGRADE;
\r
1411 parser->header_state = h_general;
\r
1414 case h_transfer_encoding:
\r
1415 /* looking for 'Transfer-Encoding: chunked' */
\r
1417 parser->header_state = h_matching_transfer_encoding_chunked;
\r
1419 parser->header_state = h_general;
\r
1423 case h_content_length:
\r
1424 if (UNLIKELY(!IS_NUM(ch))) {
\r
1425 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
\r
1429 if (parser->flags & F_CONTENTLENGTH) {
\r
1430 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
\r
1434 parser->flags |= F_CONTENTLENGTH;
\r
1435 parser->content_length = ch - '0';
\r
1436 parser->header_state = h_content_length_num;
\r
1439 /* when obsolete line folding is encountered for content length
\r
1440 * continue to the s_header_value state */
\r
1441 case h_content_length_ws:
\r
1444 case h_connection:
\r
1445 /* looking for 'Connection: keep-alive' */
\r
1447 parser->header_state = h_matching_connection_keep_alive;
\r
1448 /* looking for 'Connection: close' */
\r
1449 } else if (c == 'c') {
\r
1450 parser->header_state = h_matching_connection_close;
\r
1451 } else if (c == 'u') {
\r
1452 parser->header_state = h_matching_connection_upgrade;
\r
1454 parser->header_state = h_matching_connection_token;
\r
1458 /* Multi-value `Connection` header */
\r
1459 case h_matching_connection_token_start:
\r
1463 parser->header_state = h_general;
\r
1469 case s_header_value:
\r
1471 const char* start = p;
\r
1472 enum header_states h_state = (enum header_states) parser->header_state;
\r
1473 for (; p != data + len; p++) {
\r
1476 UPDATE_STATE(s_header_almost_done);
\r
1477 parser->header_state = h_state;
\r
1478 CALLBACK_DATA(header_value);
\r
1483 UPDATE_STATE(s_header_almost_done);
\r
1484 COUNT_HEADER_SIZE(p - start);
\r
1485 parser->header_state = h_state;
\r
1486 CALLBACK_DATA_NOADVANCE(header_value);
\r
1490 if (!lenient && !IS_HEADER_CHAR(ch)) {
\r
1491 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
\r
1497 switch (h_state) {
\r
1500 size_t left = data + len - p;
\r
1501 const char* pe = p + MIN(left, max_header_size);
\r
1503 for (; p != pe; p++) {
\r
1505 if (ch == CR || ch == LF) {
\r
1509 if (!lenient && !IS_HEADER_CHAR(ch)) {
\r
1510 SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
\r
1514 if (p == data + len)
\r
1519 case h_connection:
\r
1520 case h_transfer_encoding:
\r
1521 assert(0 && "Shouldn't get here.");
\r
1524 case h_content_length:
\r
1525 if (ch == ' ') break;
\r
1526 h_state = h_content_length_num;
\r
1527 /* fall through */
\r
1529 case h_content_length_num:
\r
1534 h_state = h_content_length_ws;
\r
1538 if (UNLIKELY(!IS_NUM(ch))) {
\r
1539 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
\r
1540 parser->header_state = h_state;
\r
1544 t = parser->content_length;
\r
1548 /* Overflow? Test against a conservative limit for simplicity. */
\r
1549 if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) {
\r
1550 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
\r
1551 parser->header_state = h_state;
\r
1555 parser->content_length = t;
\r
1559 case h_content_length_ws:
\r
1560 if (ch == ' ') break;
\r
1561 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
\r
1562 parser->header_state = h_state;
\r
1565 /* Transfer-Encoding: chunked */
\r
1566 case h_matching_transfer_encoding_chunked:
\r
1568 if (parser->index > sizeof(CHUNKED)-1
\r
1569 || c != CHUNKED[parser->index]) {
\r
1570 h_state = h_general;
\r
1571 } else if (parser->index == sizeof(CHUNKED)-2) {
\r
1572 h_state = h_transfer_encoding_chunked;
\r
1576 case h_matching_connection_token_start:
\r
1577 /* looking for 'Connection: keep-alive' */
\r
1579 h_state = h_matching_connection_keep_alive;
\r
1580 /* looking for 'Connection: close' */
\r
1581 } else if (c == 'c') {
\r
1582 h_state = h_matching_connection_close;
\r
1583 } else if (c == 'u') {
\r
1584 h_state = h_matching_connection_upgrade;
\r
1585 } else if (STRICT_TOKEN(c)) {
\r
1586 h_state = h_matching_connection_token;
\r
1587 } else if (c == ' ' || c == '\t') {
\r
1590 h_state = h_general;
\r
1594 /* looking for 'Connection: keep-alive' */
\r
1595 case h_matching_connection_keep_alive:
\r
1597 if (parser->index > sizeof(KEEP_ALIVE)-1
\r
1598 || c != KEEP_ALIVE[parser->index]) {
\r
1599 h_state = h_matching_connection_token;
\r
1600 } else if (parser->index == sizeof(KEEP_ALIVE)-2) {
\r
1601 h_state = h_connection_keep_alive;
\r
1605 /* looking for 'Connection: close' */
\r
1606 case h_matching_connection_close:
\r
1608 if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) {
\r
1609 h_state = h_matching_connection_token;
\r
1610 } else if (parser->index == sizeof(CLOSE)-2) {
\r
1611 h_state = h_connection_close;
\r
1615 /* looking for 'Connection: upgrade' */
\r
1616 case h_matching_connection_upgrade:
\r
1618 if (parser->index > sizeof(UPGRADE) - 1 ||
\r
1619 c != UPGRADE[parser->index]) {
\r
1620 h_state = h_matching_connection_token;
\r
1621 } else if (parser->index == sizeof(UPGRADE)-2) {
\r
1622 h_state = h_connection_upgrade;
\r
1626 case h_matching_connection_token:
\r
1628 h_state = h_matching_connection_token_start;
\r
1629 parser->index = 0;
\r
1633 case h_transfer_encoding_chunked:
\r
1634 if (ch != ' ') h_state = h_general;
\r
1637 case h_connection_keep_alive:
\r
1638 case h_connection_close:
\r
1639 case h_connection_upgrade:
\r
1641 if (h_state == h_connection_keep_alive) {
\r
1642 parser->flags |= F_CONNECTION_KEEP_ALIVE;
\r
1643 } else if (h_state == h_connection_close) {
\r
1644 parser->flags |= F_CONNECTION_CLOSE;
\r
1645 } else if (h_state == h_connection_upgrade) {
\r
1646 parser->flags |= F_CONNECTION_UPGRADE;
\r
1648 h_state = h_matching_connection_token_start;
\r
1649 parser->index = 0;
\r
1650 } else if (ch != ' ') {
\r
1651 h_state = h_matching_connection_token;
\r
1656 UPDATE_STATE(s_header_value);
\r
1657 h_state = h_general;
\r
1661 parser->header_state = h_state;
\r
1663 if (p == data + len)
\r
1666 COUNT_HEADER_SIZE(p - start);
\r
1670 case s_header_almost_done:
\r
1672 if (UNLIKELY(ch != LF)) {
\r
1673 SET_ERRNO(HPE_LF_EXPECTED);
\r
1677 UPDATE_STATE(s_header_value_lws);
\r
1681 case s_header_value_lws:
\r
1683 if (ch == ' ' || ch == '\t') {
\r
1684 if (parser->header_state == h_content_length_num) {
\r
1685 /* treat obsolete line folding as space */
\r
1686 parser->header_state = h_content_length_ws;
\r
1688 UPDATE_STATE(s_header_value_start);
\r
1692 /* finished the header */
\r
1693 switch (parser->header_state) {
\r
1694 case h_connection_keep_alive:
\r
1695 parser->flags |= F_CONNECTION_KEEP_ALIVE;
\r
1697 case h_connection_close:
\r
1698 parser->flags |= F_CONNECTION_CLOSE;
\r
1700 case h_transfer_encoding_chunked:
\r
1701 parser->flags |= F_CHUNKED;
\r
1703 case h_connection_upgrade:
\r
1704 parser->flags |= F_CONNECTION_UPGRADE;
\r
1710 UPDATE_STATE(s_header_field_start);
\r
1714 case s_header_value_discard_ws_almost_done:
\r
1716 STRICT_CHECK(ch != LF);
\r
1717 UPDATE_STATE(s_header_value_discard_lws);
\r
1721 case s_header_value_discard_lws:
\r
1723 if (ch == ' ' || ch == '\t') {
\r
1724 UPDATE_STATE(s_header_value_discard_ws);
\r
1727 switch (parser->header_state) {
\r
1728 case h_connection_keep_alive:
\r
1729 parser->flags |= F_CONNECTION_KEEP_ALIVE;
\r
1731 case h_connection_close:
\r
1732 parser->flags |= F_CONNECTION_CLOSE;
\r
1734 case h_connection_upgrade:
\r
1735 parser->flags |= F_CONNECTION_UPGRADE;
\r
1737 case h_transfer_encoding_chunked:
\r
1738 parser->flags |= F_CHUNKED;
\r
1740 case h_content_length:
\r
1741 /* do not allow empty content length */
\r
1742 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
\r
1749 /* header value was empty */
\r
1750 MARK(header_value);
\r
1751 UPDATE_STATE(s_header_field_start);
\r
1752 CALLBACK_DATA_NOADVANCE(header_value);
\r
1757 case s_headers_almost_done:
\r
1759 STRICT_CHECK(ch != LF);
\r
1761 if (parser->flags & F_TRAILING) {
\r
1762 /* End of a chunked request */
\r
1763 UPDATE_STATE(s_message_done);
\r
1764 CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
\r
1768 /* Cannot use chunked encoding and a content-length header together
\r
1769 per the HTTP specification. */
\r
1770 if ((parser->flags & F_CHUNKED) &&
\r
1771 (parser->flags & F_CONTENTLENGTH)) {
\r
1772 SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
\r
1776 UPDATE_STATE(s_headers_done);
\r
1778 /* Set this here so that on_headers_complete() callbacks can see it */
\r
1779 if ((parser->flags & F_UPGRADE) &&
\r
1780 (parser->flags & F_CONNECTION_UPGRADE)) {
\r
1781 /* For responses, "Upgrade: foo" and "Connection: upgrade" are
\r
1782 * mandatory only when it is a 101 Switching Protocols response,
\r
1783 * otherwise it is purely informational, to announce support.
\r
1786 (parser->type == HTTP_REQUEST || parser->status_code == 101);
\r
1788 parser->upgrade = (parser->method == HTTP_CONNECT);
\r
1791 /* Here we call the headers_complete callback. This is somewhat
\r
1792 * different than other callbacks because if the user returns 1, we
\r
1793 * will interpret that as saying that this message has no body. This
\r
1794 * is needed for the annoying case of recieving a response to a HEAD
\r
1797 * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so
\r
1798 * we have to simulate it by handling a change in errno below.
\r
1800 if (settings->on_headers_complete) {
\r
1801 switch (settings->on_headers_complete(parser)) {
\r
1806 parser->upgrade = 1;
\r
1808 /* fall through */
\r
1810 parser->flags |= F_SKIPBODY;
\r
1814 SET_ERRNO(HPE_CB_headers_complete);
\r
1815 RETURN(p - data); /* Error */
\r
1819 if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
\r
1826 case s_headers_done:
\r
1829 STRICT_CHECK(ch != LF);
\r
1831 parser->nread = 0;
\r
1834 hasBody = parser->flags & F_CHUNKED ||
\r
1835 (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
\r
1836 if (parser->upgrade && (parser->method == HTTP_CONNECT ||
\r
1837 (parser->flags & F_SKIPBODY) || !hasBody)) {
\r
1838 /* Exit, the rest of the message is in a different protocol. */
\r
1839 UPDATE_STATE(NEW_MESSAGE());
\r
1840 CALLBACK_NOTIFY(message_complete);
\r
1841 RETURN((p - data) + 1);
\r
1844 if (parser->flags & F_SKIPBODY) {
\r
1845 UPDATE_STATE(NEW_MESSAGE());
\r
1846 CALLBACK_NOTIFY(message_complete);
\r
1847 } else if (parser->flags & F_CHUNKED) {
\r
1848 /* chunked encoding - ignore Content-Length header */
\r
1849 UPDATE_STATE(s_chunk_size_start);
\r
1851 if (parser->content_length == 0) {
\r
1852 /* Content-Length header given but zero: Content-Length: 0\r\n */
\r
1853 UPDATE_STATE(NEW_MESSAGE());
\r
1854 CALLBACK_NOTIFY(message_complete);
\r
1855 } else if (parser->content_length != ULLONG_MAX) {
\r
1856 /* Content-Length header given and non-zero */
\r
1857 UPDATE_STATE(s_body_identity);
\r
1859 if (!http_message_needs_eof(parser)) {
\r
1860 /* Assume content-length 0 - read the next */
\r
1861 UPDATE_STATE(NEW_MESSAGE());
\r
1862 CALLBACK_NOTIFY(message_complete);
\r
1864 /* Read body until EOF */
\r
1865 UPDATE_STATE(s_body_identity_eof);
\r
1873 case s_body_identity:
\r
1875 uint64_t to_read = MIN(parser->content_length,
\r
1876 (uint64_t) ((data + len) - p));
\r
1878 assert(parser->content_length != 0
\r
1879 && parser->content_length != ULLONG_MAX);
\r
1881 /* The difference between advancing content_length and p is because
\r
1882 * the latter will automaticaly advance on the next loop iteration.
\r
1883 * Further, if content_length ends up at 0, we want to see the last
\r
1884 * byte again for our message complete callback.
\r
1887 parser->content_length -= to_read;
\r
1890 if (parser->content_length == 0) {
\r
1891 UPDATE_STATE(s_message_done);
\r
1893 /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte.
\r
1895 * The alternative to doing this is to wait for the next byte to
\r
1896 * trigger the data callback, just as in every other case. The
\r
1897 * problem with this is that this makes it difficult for the test
\r
1898 * harness to distinguish between complete-on-EOF and
\r
1899 * complete-on-length. It's not clear that this distinction is
\r
1900 * important for applications, but let's keep it for now.
\r
1902 CALLBACK_DATA_(body, p - body_mark + 1, p - data);
\r
1909 /* read until EOF */
\r
1910 case s_body_identity_eof:
\r
1912 p = data + len - 1;
\r
1916 case s_message_done:
\r
1917 UPDATE_STATE(NEW_MESSAGE());
\r
1918 CALLBACK_NOTIFY(message_complete);
\r
1919 if (parser->upgrade) {
\r
1920 /* Exit, the rest of the message is in a different protocol. */
\r
1921 RETURN((p - data) + 1);
\r
1925 case s_chunk_size_start:
\r
1927 assert(nread == 1);
\r
1928 assert(parser->flags & F_CHUNKED);
\r
1930 unhex_val = unhex[(unsigned char)ch];
\r
1931 if (UNLIKELY(unhex_val == -1)) {
\r
1932 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
\r
1936 parser->content_length = unhex_val;
\r
1937 UPDATE_STATE(s_chunk_size);
\r
1941 case s_chunk_size:
\r
1945 assert(parser->flags & F_CHUNKED);
\r
1948 UPDATE_STATE(s_chunk_size_almost_done);
\r
1952 unhex_val = unhex[(unsigned char)ch];
\r
1954 if (unhex_val == -1) {
\r
1955 if (ch == ';' || ch == ' ') {
\r
1956 UPDATE_STATE(s_chunk_parameters);
\r
1960 SET_ERRNO(HPE_INVALID_CHUNK_SIZE);
\r
1964 t = parser->content_length;
\r
1968 /* Overflow? Test against a conservative limit for simplicity. */
\r
1969 if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) {
\r
1970 SET_ERRNO(HPE_INVALID_CONTENT_LENGTH);
\r
1974 parser->content_length = t;
\r
1978 case s_chunk_parameters:
\r
1980 assert(parser->flags & F_CHUNKED);
\r
1981 /* just ignore this shit. TODO check for overflow */
\r
1983 UPDATE_STATE(s_chunk_size_almost_done);
\r
1989 case s_chunk_size_almost_done:
\r
1991 assert(parser->flags & F_CHUNKED);
\r
1992 STRICT_CHECK(ch != LF);
\r
1994 parser->nread = 0;
\r
1997 if (parser->content_length == 0) {
\r
1998 parser->flags |= F_TRAILING;
\r
1999 UPDATE_STATE(s_header_field_start);
\r
2001 UPDATE_STATE(s_chunk_data);
\r
2003 CALLBACK_NOTIFY(chunk_header);
\r
2007 case s_chunk_data:
\r
2009 uint64_t to_read = MIN(parser->content_length,
\r
2010 (uint64_t) ((data + len) - p));
\r
2012 assert(parser->flags & F_CHUNKED);
\r
2013 assert(parser->content_length != 0
\r
2014 && parser->content_length != ULLONG_MAX);
\r
2016 /* See the explanation in s_body_identity for why the content
\r
2017 * length and data pointers are managed this way.
\r
2020 parser->content_length -= to_read;
\r
2023 if (parser->content_length == 0) {
\r
2024 UPDATE_STATE(s_chunk_data_almost_done);
\r
2030 case s_chunk_data_almost_done:
\r
2031 assert(parser->flags & F_CHUNKED);
\r
2032 assert(parser->content_length == 0);
\r
2033 STRICT_CHECK(ch != CR);
\r
2034 UPDATE_STATE(s_chunk_data_done);
\r
2035 CALLBACK_DATA(body);
\r
2038 case s_chunk_data_done:
\r
2039 assert(parser->flags & F_CHUNKED);
\r
2040 STRICT_CHECK(ch != LF);
\r
2041 parser->nread = 0;
\r
2043 UPDATE_STATE(s_chunk_size_start);
\r
2044 CALLBACK_NOTIFY(chunk_complete);
\r
2048 assert(0 && "unhandled state");
\r
2049 SET_ERRNO(HPE_INVALID_INTERNAL_STATE);
\r
2054 /* Run callbacks for any marks that we have leftover after we ran out of
\r
2055 * bytes. There should be at most one of these set, so it's OK to invoke
\r
2056 * them in series (unset marks will not result in callbacks).
\r
2058 * We use the NOADVANCE() variety of callbacks here because 'p' has already
\r
2059 * overflowed 'data' and this allows us to correct for the off-by-one that
\r
2060 * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p'
\r
2061 * value that's in-bounds).
\r
2064 assert(((header_field_mark ? 1 : 0) +
\r
2065 (header_value_mark ? 1 : 0) +
\r
2066 (url_mark ? 1 : 0) +
\r
2067 (body_mark ? 1 : 0) +
\r
2068 (status_mark ? 1 : 0)) <= 1);
\r
2070 CALLBACK_DATA_NOADVANCE(header_field);
\r
2071 CALLBACK_DATA_NOADVANCE(header_value);
\r
2072 CALLBACK_DATA_NOADVANCE(url);
\r
2073 CALLBACK_DATA_NOADVANCE(body);
\r
2074 CALLBACK_DATA_NOADVANCE(status);
\r
2079 if (HTTP_PARSER_ERRNO(parser) == HPE_OK) {
\r
2080 SET_ERRNO(HPE_UNKNOWN);
\r
2087 /* Does the parser need to see an EOF to find the end of the message? */
\r
2089 http_message_needs_eof (const http_parser *parser)
\r
2091 if (parser->type == HTTP_REQUEST) {
\r
2095 /* See RFC 2616 section 4.4 */
\r
2096 if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */
\r
2097 parser->status_code == 204 || /* No Content */
\r
2098 parser->status_code == 304 || /* Not Modified */
\r
2099 parser->flags & F_SKIPBODY) { /* response to a HEAD request */
\r
2103 if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) {
\r
2112 http_should_keep_alive (const http_parser *parser)
\r
2114 if (parser->http_major > 0 && parser->http_minor > 0) {
\r
2116 if (parser->flags & F_CONNECTION_CLOSE) {
\r
2120 /* HTTP/1.0 or earlier */
\r
2121 if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) {
\r
2126 return !http_message_needs_eof(parser);
\r
2131 http_method_str (enum http_method m)
\r
2133 return ELEM_AT(method_strings, m, "<unknown>");
\r
2137 http_status_str (enum http_status s)
\r
2140 #define XX(num, name, string) case HTTP_STATUS_##name: return #string;
\r
2141 HTTP_STATUS_MAP(XX)
\r
2143 default: return "<unknown>";
\r
2148 http_parser_init (http_parser *parser, enum http_parser_type t)
\r
2150 void *data = parser->data; /* preserve application data */
\r
2151 memset(parser, 0, sizeof(*parser));
\r
2152 parser->data = data;
\r
2154 parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res));
\r
2155 parser->http_errno = HPE_OK;
\r
2159 http_parser_settings_init(http_parser_settings *settings)
\r
2161 memset(settings, 0, sizeof(*settings));
\r
2165 http_errno_name(enum http_errno err) {
\r
2166 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
\r
2167 return http_strerror_tab[err].name;
\r
2171 http_errno_description(enum http_errno err) {
\r
2172 assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
\r
2173 return http_strerror_tab[err].description;
\r
2176 static enum http_host_state
\r
2177 http_parse_host_char(enum http_host_state s, const char ch) {
\r
2179 case s_http_userinfo:
\r
2180 case s_http_userinfo_start:
\r
2182 return s_http_host_start;
\r
2185 if (IS_USERINFO_CHAR(ch)) {
\r
2186 return s_http_userinfo;
\r
2190 case s_http_host_start:
\r
2192 return s_http_host_v6_start;
\r
2195 if (IS_HOST_CHAR(ch)) {
\r
2196 return s_http_host;
\r
2202 if (IS_HOST_CHAR(ch)) {
\r
2203 return s_http_host;
\r
2206 /* fall through */
\r
2207 case s_http_host_v6_end:
\r
2209 return s_http_host_port_start;
\r
2214 case s_http_host_v6:
\r
2216 return s_http_host_v6_end;
\r
2219 /* fall through */
\r
2220 case s_http_host_v6_start:
\r
2221 if (IS_HEX(ch) || ch == ':' || ch == '.') {
\r
2222 return s_http_host_v6;
\r
2225 if (s == s_http_host_v6 && ch == '%') {
\r
2226 return s_http_host_v6_zone_start;
\r
2230 case s_http_host_v6_zone:
\r
2232 return s_http_host_v6_end;
\r
2235 /* fall through */
\r
2236 case s_http_host_v6_zone_start:
\r
2237 /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
\r
2238 if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
\r
2240 return s_http_host_v6_zone;
\r
2244 case s_http_host_port:
\r
2245 case s_http_host_port_start:
\r
2247 return s_http_host_port;
\r
2255 return s_http_host_dead;
\r
2259 http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
\r
2260 enum http_host_state s;
\r
2263 size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
\r
2265 assert(u->field_set & (1 << UF_HOST));
\r
2267 u->field_data[UF_HOST].len = 0;
\r
2269 s = found_at ? s_http_userinfo_start : s_http_host_start;
\r
2271 for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
\r
2272 enum http_host_state new_s = http_parse_host_char(s, *p);
\r
2274 if (new_s == s_http_host_dead) {
\r
2280 if (s != s_http_host) {
\r
2281 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
\r
2283 u->field_data[UF_HOST].len++;
\r
2286 case s_http_host_v6:
\r
2287 if (s != s_http_host_v6) {
\r
2288 u->field_data[UF_HOST].off = (uint16_t)(p - buf);
\r
2290 u->field_data[UF_HOST].len++;
\r
2293 case s_http_host_v6_zone_start:
\r
2294 case s_http_host_v6_zone:
\r
2295 u->field_data[UF_HOST].len++;
\r
2298 case s_http_host_port:
\r
2299 if (s != s_http_host_port) {
\r
2300 u->field_data[UF_PORT].off = (uint16_t)(p - buf);
\r
2301 u->field_data[UF_PORT].len = 0;
\r
2302 u->field_set |= (1 << UF_PORT);
\r
2304 u->field_data[UF_PORT].len++;
\r
2307 case s_http_userinfo:
\r
2308 if (s != s_http_userinfo) {
\r
2309 u->field_data[UF_USERINFO].off = (uint16_t)(p - buf);
\r
2310 u->field_data[UF_USERINFO].len = 0;
\r
2311 u->field_set |= (1 << UF_USERINFO);
\r
2313 u->field_data[UF_USERINFO].len++;
\r
2322 /* Make sure we don't end somewhere unexpected */
\r
2324 case s_http_host_start:
\r
2325 case s_http_host_v6_start:
\r
2326 case s_http_host_v6:
\r
2327 case s_http_host_v6_zone_start:
\r
2328 case s_http_host_v6_zone:
\r
2329 case s_http_host_port_start:
\r
2330 case s_http_userinfo:
\r
2331 case s_http_userinfo_start:
\r
2341 http_parser_url_init(struct http_parser_url *u) {
\r
2342 memset(u, 0, sizeof(*u));
\r
2346 http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
\r
2347 struct http_parser_url *u)
\r
2351 enum http_parser_url_fields uf, old_uf;
\r
2354 if (buflen == 0) {
\r
2358 u->port = u->field_set = 0;
\r
2359 s = is_connect ? s_req_server_start : s_req_spaces_before_url;
\r
2362 for (p = buf; p < buf + buflen; p++) {
\r
2363 s = parse_url_char(s, *p);
\r
2365 /* Figure out the next field that we're operating on */
\r
2370 /* Skip delimeters */
\r
2371 case s_req_schema_slash:
\r
2372 case s_req_schema_slash_slash:
\r
2373 case s_req_server_start:
\r
2374 case s_req_query_string_start:
\r
2375 case s_req_fragment_start:
\r
2378 case s_req_schema:
\r
2382 case s_req_server_with_at:
\r
2385 /* fall through */
\r
2386 case s_req_server:
\r
2394 case s_req_query_string:
\r
2398 case s_req_fragment:
\r
2403 assert(!"Unexpected state");
\r
2407 /* Nothing's changed; soldier on */
\r
2408 if (uf == old_uf) {
\r
2409 u->field_data[uf].len++;
\r
2413 u->field_data[uf].off = (uint16_t)(p - buf);
\r
2414 u->field_data[uf].len = 1;
\r
2416 u->field_set |= (1 << uf);
\r
2420 /* host must be present if there is a schema */
\r
2421 /* parsing http:///toto will fail */
\r
2422 if ((u->field_set & (1 << UF_SCHEMA)) &&
\r
2423 (u->field_set & (1 << UF_HOST)) == 0) {
\r
2427 if (u->field_set & (1 << UF_HOST)) {
\r
2428 if (http_parse_host(buf, u, found_at) != 0) {
\r
2433 /* CONNECT requests can only contain "hostname:port" */
\r
2434 if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
\r
2438 if (u->field_set & (1 << UF_PORT)) {
\r
2445 off = u->field_data[UF_PORT].off;
\r
2446 len = u->field_data[UF_PORT].len;
\r
2447 end = buf + off + len;
\r
2449 /* NOTE: The characters are already validated and are in the [0-9] range */
\r
2450 assert(off + len <= buflen && "Port number overflow");
\r
2452 for (p = buf + off; p < end; p++) {
\r
2456 /* Ports have a max value of 2^16 */
\r
2462 u->port = (uint16_t) v;
\r
2469 http_parser_pause(http_parser *parser, int paused) {
\r
2470 /* Users should only be pausing/unpausing a parser that is not in an error
\r
2471 * state. In non-debug builds, there's not much that we can do about this
\r
2472 * other than ignore it.
\r
2474 if (HTTP_PARSER_ERRNO(parser) == HPE_OK ||
\r
2475 HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) {
\r
2476 uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */
\r
2477 SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK);
\r
2479 assert(0 && "Attempting to pause parser in error state");
\r
2484 http_body_is_final(const struct http_parser *parser) {
\r
2485 return parser->state == s_message_done;
\r
2489 http_parser_version(void) {
\r
2490 return HTTP_PARSER_VERSION_MAJOR * 0x10000 |
\r
2491 HTTP_PARSER_VERSION_MINOR * 0x00100 |
\r
2492 HTTP_PARSER_VERSION_PATCH * 0x00001;
\r
2496 http_parser_set_max_header_size(uint32_t size) {
\r
2497 max_header_size = size;
\r