diff options
Diffstat (limited to 'ext/http-parser')
-rw-r--r-- | ext/http-parser/AUTHORS | 5 | ||||
-rw-r--r-- | ext/http-parser/README.md | 246 | ||||
-rw-r--r-- | ext/http-parser/http_parser.c | 121 | ||||
-rw-r--r-- | ext/http-parser/http_parser.h | 9 |
4 files changed, 296 insertions, 85 deletions
diff --git a/ext/http-parser/AUTHORS b/ext/http-parser/AUTHORS index 29cdbb16..5323b685 100644 --- a/ext/http-parser/AUTHORS +++ b/ext/http-parser/AUTHORS @@ -61,3 +61,8 @@ Marc O'Morain <github.com@marcomorain.com> Jeff Pinner <jpinner@twitter.com> Timothy J Fontaine <tjfontaine@gmail.com> Akagi201 <akagi201@gmail.com> +Romain Giraud <giraud.romain@gmail.com> +Jay Satiro <raysatiro@yahoo.com> +Arne Steen <Arne.Steen@gmx.de> +Kjell Schubert <kjell.schubert@gmail.com> +Olivier Mengué <dolmen@cpan.org> diff --git a/ext/http-parser/README.md b/ext/http-parser/README.md new file mode 100644 index 00000000..439b3099 --- /dev/null +++ b/ext/http-parser/README.md @@ -0,0 +1,246 @@ +HTTP Parser +=========== + +[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser) + +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires about 40 bytes of data per message +stream (in a web server that is per connection). + +Features: + + * No dependencies + * Handles persistent streams (keep-alive). + * Decodes chunked encoding. + * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request URL + * Message body + + +Usage +----- + +One `http_parser` object is used per TCP connection. Initialize the struct +using `http_parser_init()` and set the callbacks. That might look something +like this for a request parser: +```c +http_parser_settings settings; +settings.on_url = my_url_callback; +settings.on_header_field = my_header_field_callback; +/* ... */ + +http_parser *parser = malloc(sizeof(http_parser)); +http_parser_init(parser, HTTP_REQUEST); +parser->data = my_socket; +``` + +When data is received on the socket execute the parser and check for errors. + +```c +size_t len = 80*1024, nparsed; +char buf[len]; +ssize_t recved; + +recved = recv(fd, buf, len, 0); + +if (recved < 0) { + /* Handle error. */ +} + +/* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been received. + */ +nparsed = http_parser_execute(parser, &settings, buf, recved); + +if (parser->upgrade) { + /* handle new protocol */ +} else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ +} +``` + +HTTP needs to know where the end of the stream is. For example, sometimes +servers send responses without Content-Length and expect the client to +consume input (for the body) until EOF. To tell http_parser about EOF, give +`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors +can still be encountered during an EOF, so one must still be prepared +to receive them. + +Scalar valued message information such as `status_code`, `method`, and the +HTTP version are stored in the parser structure. This data is only +temporally stored in `http_parser` and gets reset on each new message. If +this information is needed later, copy it out of the structure during the +`headers_complete` callback. + +The parser decodes the transfer-encoding for both requests and responses +transparently. That is, a chunked encoding is decoded before being sent to +the on_body callback. + + +The Special Problem of Upgrade +------------------------------ + +HTTP supports upgrading the connection to a different protocol. An +increasingly common example of this is the WebSocket protocol which sends +a request like + + GET /demo HTTP/1.1 + Upgrade: WebSocket + Connection: Upgrade + Host: example.com + Origin: http://example.com + WebSocket-Protocol: sample + +followed by non-HTTP data. + +(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the +WebSocket protocol.) + +To support this, the parser will treat this as a normal HTTP message without a +body, issuing both on_headers_complete and on_message_complete callbacks. However +http_parser_execute() will stop parsing at the end of the headers and return. + +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. + + +Callbacks +--------- + +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. + +There are two types of callbacks: + +* notification `typedef int (*http_cb) (http_parser*);` + Callbacks: on_message_begin, on_headers_complete, on_message_complete. +* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` + Callbacks: (requests only) on_url, + (common) on_header_field, on_header_value, on_body; + +Callbacks must return 0 on success. Returning a non-zero value indicates +error to the parser, making it exit immediately. + +For cases where it is necessary to pass local information to/from a callback, +the `http_parser` object's `data` field can be used. +An example of such a case is when using threads to handle a socket connection, +parse a request, and then give a response over that socket. By instantiation +of a thread-local struct containing relevant data (e.g. accepted socket, +allocated memory for callbacks to write into, etc), a parser's callbacks are +able to communicate data between the scope of the thread and the scope of the +callback in a threadsafe manner. This allows http-parser to be used in +multi-threaded contexts. + +Example: +```c + typedef struct { + socket_t sock; + void* buffer; + int buf_len; + } custom_data_t; + + +int my_url_callback(http_parser* parser, const char *at, size_t length) { + /* access to thread local custom_data_t struct. + Use this access save parsed data for later use into thread local + buffer, or communicate over socket + */ + parser->data; + ... + return 0; +} + +... + +void http_parser_thread(socket_t sock) { + int nparsed = 0; + /* allocate memory for user data */ + custom_data_t *my_data = malloc(sizeof(custom_data_t)); + + /* some information for use by callbacks. + * achieves thread -> callback information flow */ + my_data->sock = sock; + + /* instantiate a thread-local parser */ + http_parser *parser = malloc(sizeof(http_parser)); + http_parser_init(parser, HTTP_REQUEST); /* initialise parser */ + /* this custom data reference is accessible through the reference to the + parser supplied to callback functions */ + parser->data = my_data; + + http_parser_settings settings; /* set up callbacks */ + settings.on_url = my_url_callback; + + /* execute parser */ + nparsed = http_parser_execute(parser, &settings, buf, recved); + + ... + /* parsed information copied from callback. + can now perform action on data copied into thread-local memory from callbacks. + achieves callback -> thread information flow */ + my_data->buffer; + ... +} + +``` + +In case you parse HTTP message in chunks (i.e. `read()` request line +from socket, parse, read half headers, parse, etc) your data callbacks +may be called more than once. Http-parser guarantees that data pointer is only +valid for the lifetime of callback. You can also `read()` into a heap allocated +buffer to avoid copying memory around if this fits your application. + +Reading headers may be a tricky task if you read/parse headers partially. +Basically, you need to remember whether last header callback was field or value +and apply the following logic: + + (on_header_field and on_header_value shortened to on_h_*) + ------------------------ ------------ -------------------------------------------- + | State (prev. callback) | Callback | Description/action | + ------------------------ ------------ -------------------------------------------- + | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | + | | | into it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_field | New header started. | + | | | Copy current name,value buffers to headers | + | | | list and allocate new buffer for new name | + ------------------------ ------------ -------------------------------------------- + | field | on_h_field | Previous name continues. Reallocate name | + | | | buffer and append callback data to it | + ------------------------ ------------ -------------------------------------------- + | field | on_h_value | Value for current header started. Allocate | + | | | new buffer and copy callback data to it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_value | Value continues. Reallocate value buffer | + | | | and append callback data to it | + ------------------------ ------------ -------------------------------------------- + + +Parsing URLs +------------ + +A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. +Users of this library may wish to use it to parse URLs constructed from +consecutive `on_url` callbacks. + +See examples of reading in headers: + +* [partial example](http://gist.github.com/155877) in C +* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C +* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript diff --git a/ext/http-parser/http_parser.c b/ext/http-parser/http_parser.c index a113c7f5..3c896ffa 100644 --- a/ext/http-parser/http_parser.c +++ b/ext/http-parser/http_parser.c @@ -123,7 +123,7 @@ do { \ FOR##_mark = NULL; \ } \ } while (0) - + /* Run the data callback FOR and consume the current byte */ #define CALLBACK_DATA(FOR) \ CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) @@ -440,7 +440,7 @@ enum http_host_state * character or %x80-FF **/ #define IS_HEADER_CHAR(ch) \ - (ch == CR || ch == LF || ch == 9 || (ch > 31 && ch != 127)) + (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) @@ -1007,89 +1007,40 @@ reexecute: UPDATE_STATE(s_req_spaces_before_url); } else if (ch == matcher[parser->index]) { ; /* nada */ - } else if (parser->method == HTTP_CONNECT) { - if (parser->index == 1 && ch == 'H') { - parser->method = HTTP_CHECKOUT; - } else if (parser->index == 2 && ch == 'P') { - parser->method = HTTP_COPY; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_MKCOL) { - if (parser->index == 1 && ch == 'O') { - parser->method = HTTP_MOVE; - } else if (parser->index == 1 && ch == 'E') { - parser->method = HTTP_MERGE; - } else if (parser->index == 1 && ch == '-') { - parser->method = HTTP_MSEARCH; - } else if (parser->index == 2 && ch == 'A') { - parser->method = HTTP_MKACTIVITY; - } else if (parser->index == 3 && ch == 'A') { - parser->method = HTTP_MKCALENDAR; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_SUBSCRIBE) { - if (parser->index == 1 && ch == 'E') { - parser->method = HTTP_SEARCH; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_REPORT) { - if (parser->index == 2 && ch == 'B') { - parser->method = HTTP_REBIND; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->index == 1) { - if (parser->method == HTTP_POST) { - if (ch == 'R') { - parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ - } else if (ch == 'U') { - parser->method = HTTP_PUT; /* or HTTP_PURGE */ - } else if (ch == 'A') { - parser->method = HTTP_PATCH; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_LOCK) { - if (ch == 'I') { - parser->method = HTTP_LINK; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } - } else if (parser->index == 2) { - if (parser->method == HTTP_PUT) { - if (ch == 'R') { - parser->method = HTTP_PURGE; - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; - } - } else if (parser->method == HTTP_UNLOCK) { - if (ch == 'S') { - parser->method = HTTP_UNSUBSCRIBE; - } else if(ch == 'B') { - parser->method = HTTP_UNBIND; - } else { + } else if (IS_ALPHA(ch)) { + + switch (parser->method << 16 | parser->index << 8 | ch) { +#define XX(meth, pos, ch, new_meth) \ + case (HTTP_##meth << 16 | pos << 8 | ch): \ + parser->method = HTTP_##new_meth; break; + + XX(POST, 1, 'U', PUT) + XX(POST, 1, 'A', PATCH) + XX(CONNECT, 1, 'H', CHECKOUT) + XX(CONNECT, 2, 'P', COPY) + XX(MKCOL, 1, 'O', MOVE) + XX(MKCOL, 1, 'E', MERGE) + XX(MKCOL, 2, 'A', MKACTIVITY) + XX(MKCOL, 3, 'A', MKCALENDAR) + XX(SUBSCRIBE, 1, 'E', SEARCH) + XX(REPORT, 2, 'B', REBIND) + XX(POST, 1, 'R', PROPFIND) + XX(PROPFIND, 4, 'P', PROPPATCH) + XX(PUT, 2, 'R', PURGE) + XX(LOCK, 1, 'I', LINK) + XX(UNLOCK, 2, 'S', UNSUBSCRIBE) + XX(UNLOCK, 2, 'B', UNBIND) + XX(UNLOCK, 3, 'I', UNLINK) +#undef XX + + default: SET_ERRNO(HPE_INVALID_METHOD); goto error; - } - } else { - SET_ERRNO(HPE_INVALID_METHOD); - goto error; } - } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { - parser->method = HTTP_PROPPATCH; - } else if (parser->index == 3 && parser->method == HTTP_UNLOCK && ch == 'I') { - parser->method = HTTP_UNLINK; + } else if (ch == '-' && + parser->index == 1 && + parser->method == HTTP_MKCOL) { + parser->method = HTTP_MSEARCH; } else { SET_ERRNO(HPE_INVALID_METHOD); goto error; @@ -1861,6 +1812,9 @@ reexecute: case 0: break; + case 2: + parser->upgrade = 1; + case 1: parser->flags |= F_SKIPBODY; break; @@ -2299,12 +2253,13 @@ http_parse_host_char(enum http_host_state s, const char ch) { static int http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { - //assert(u->field_set & (1 << UF_HOST)); enum http_host_state s; const char *p; size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + assert(u->field_set & (1 << UF_HOST)); + u->field_data[UF_HOST].len = 0; s = found_at ? s_http_userinfo_start : s_http_host_start; diff --git a/ext/http-parser/http_parser.h b/ext/http-parser/http_parser.h index e33c0620..105ae510 100644 --- a/ext/http-parser/http_parser.h +++ b/ext/http-parser/http_parser.h @@ -26,8 +26,8 @@ extern "C" { /* Also update SONAME in the Makefile whenever you change these. */ #define HTTP_PARSER_VERSION_MAJOR 2 -#define HTTP_PARSER_VERSION_MINOR 6 -#define HTTP_PARSER_VERSION_PATCH 1 +#define HTTP_PARSER_VERSION_MINOR 7 +#define HTTP_PARSER_VERSION_PATCH 0 #include <sys/types.h> #if defined(_WIN32) && !defined(__MINGW32__) && \ @@ -77,6 +77,11 @@ typedef struct http_parser_settings http_parser_settings; * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: * chunked' headers that indicate the presence of a body. * + * Returning `2` from on_headers_complete will tell parser that it should not + * expect neither a body nor any futher responses on this connection. This is + * useful for handling responses to a CONNECT request which may not contain + * `Upgrade` or `Connection: upgrade` headers. + * * http_data_cb does not return data chunks. It will be called arbitrarily * many times for each string. E.G. you might get 10 callbacks for "on_url" * each providing just a few characters more data. |