/* HTTP/1.1 Parser
* Copyright 2008 ryah dahl, ry at tiny clouds punkt org
*
* Based on Zed Shaw's parser for Mongrel.
* Copyright (c) 2005 Zed A. Shaw
*
* This software may be distributed under the "MIT" license included in the
* README
*/
#include "ebb_request_parser.h"
#include <stdio.h>
#include <ctype.h>
#include <assert.h>
#define TRUE 1
#define FALSE 0
#define MIN(a,b) (a < b ? a : b)
#define REMAINING (pe - p)
#define CURRENT (parser->current_request)
#define CONTENT_LENGTH (parser->current_request->content_length)
#define LEN(FROM) (p - parser->FROM##_mark)
#define CALLBACK(FOR) \
if(parser->FOR##_mark && CURRENT->on_##FOR) { \
CURRENT->on_##FOR( CURRENT \
, parser->FOR##_mark \
, p - parser->FOR##_mark \
); \
}
#define HEADER_CALLBACK(FOR) \
if(parser->FOR##_mark && CURRENT->on_##FOR) { \
CURRENT->on_##FOR( CURRENT \
, parser->FOR##_mark \
, p - parser->FOR##_mark \
, CURRENT->number_of_headers \
); \
}
#define EMIT_HEADER_CB(FOR, ptr, len) \
if (CURRENT->on_##FOR) { \
CURRENT->on_##FOR(CURRENT, ptr, len, \
CURRENT->number_of_multipart_headers); \
}
#define EMIT_DATA_CB(FOR, ptr, len) \
if (CURRENT->on_##FOR) { \
CURRENT->on_##FOR(CURRENT, ptr, len); \
}
#define END_REQUEST \
if(CURRENT->on_complete) \
CURRENT->on_complete(CURRENT); \
CURRENT = NULL;
%%{
machine ebb_request_parser;
action mark_header_field { parser->header_field_mark = p; }
action mark_header_value { parser->header_value_mark = p; }
action mark_fragment { parser->fragment_mark = p; }
action mark_query_string { parser->query_string_mark = p; }
action mark_request_path { parser->path_mark = p; }
action mark_request_uri { parser->uri_mark = p; }
action method_copy { CURRENT->method = EBB_COPY; }
action method_delete { CURRENT->method = EBB_DELETE; }
action method_get { CURRENT->method = EBB_GET; }
action method_head { CURRENT->method = EBB_HEAD; }
action method_lock { CURRENT->method = EBB_LOCK; }
action method_mkcol { CURRENT->method = EBB_MKCOL; }
action method_move { CURRENT->method = EBB_MOVE; }
action method_options { CURRENT->method = EBB_OPTIONS; }
action method_post { CURRENT->method = EBB_POST; }
action method_propfind { CURRENT->method = EBB_PROPFIND; }
action method_proppatch { CURRENT->method = EBB_PROPPATCH; }
action method_put { CURRENT->method = EBB_PUT; }
action method_trace { CURRENT->method = EBB_TRACE; }
action method_unlock { CURRENT->method = EBB_UNLOCK; }
action write_field {
//printf("write_field!\n");
HEADER_CALLBACK(header_field);
parser->header_field_mark = NULL;
}
action write_value {
//printf("write_value!\n");
HEADER_CALLBACK(header_value);
parser->header_value_mark = NULL;
}
action request_uri {
//printf("request uri\n");
CALLBACK(uri);
parser->uri_mark = NULL;
}
action fragment {
//printf("fragment\n");
CALLBACK(fragment);
parser->fragment_mark = NULL;
}
action query_string {
//printf("query string\n");
CALLBACK(query_string);
parser->query_string_mark = NULL;
}
action request_path {
//printf("request path\n");
CALLBACK(path);
parser->path_mark = NULL;
}
action content_length {
//printf("content_length!\n");
CURRENT->content_length *= 10;
CURRENT->content_length += *p - '0';
}
action use_identity_encoding { CURRENT->transfer_encoding = EBB_IDENTITY; }
action use_chunked_encoding { CURRENT->transfer_encoding = EBB_CHUNKED; }
action set_keep_alive { CURRENT->keep_alive = TRUE; }
action set_not_keep_alive { CURRENT->keep_alive = FALSE; }
action multipart_boundary {
if(CURRENT->multipart_boundary_len == EBB_MAX_MULTIPART_BOUNDARY_LEN) {
cs = -1;
fbreak;
}
CURRENT->multipart_boundary[1 + (++CURRENT->multipart_boundary_len)] = *p;
parser->multipart_state = s_start;
}
action expect_continue {
CURRENT->expect_continue = TRUE;
}
action trailer {
//printf("trailer\n");
/* not implemenetd yet. (do requests even have trailing headers?) */
}
action version_major {
CURRENT->version_major *= 10;
CURRENT->version_major += *p - '0';
}
action version_minor {
CURRENT->version_minor *= 10;
CURRENT->version_minor += *p - '0';
}
action end_header_line {
CURRENT->number_of_headers++;
}
action end_headers {
if(CURRENT->on_headers_complete)
CURRENT->on_headers_complete(CURRENT);
}
action add_to_chunk_size {
//printf("add to chunk size\n");
parser->chunk_size *= 16;
/* XXX: this can be optimized slightly */
if( 'A' <= *p && *p <= 'F')
parser->chunk_size += *p - 'A' + 10;
else if( 'a' <= *p && *p <= 'f')
parser->chunk_size += *p - 'a' + 10;
else if( '0' <= *p && *p <= '9')
parser->chunk_size += *p - '0';
else
assert(0 && "bad hex char");
}
action skip_chunk_data {
//printf("skip chunk data\n");
//printf("chunk_size: %d\n", parser->chunk_size);
if(parser->chunk_size > REMAINING) {
parser->eating = TRUE;
CURRENT->on_body(CURRENT, p, REMAINING);
parser->chunk_size -= REMAINING;
fhold;
fbreak;
} else {
CURRENT->on_body(CURRENT, p, parser->chunk_size);
p += parser->chunk_size;
parser->chunk_size = 0;
parser->eating = FALSE;
fhold;
fgoto chunk_end;
}
}
action end_chunked_body {
//printf("end chunked body\n");
END_REQUEST;
fret; // goto Request;
}
action start_req {
assert(CURRENT == NULL);
CURRENT = parser->new_request(parser->data);
}
action body_logic {
if(CURRENT->transfer_encoding == EBB_CHUNKED) {
fcall ChunkedBody;
} else {
/*
* EAT BODY
* this is very ugly. sorry.
*
*/
if( CURRENT->content_length == 0) {
END_REQUEST;
} else if( CURRENT->content_length < REMAINING ) {
/*
*
* FINISH EATING THE BODY. there is still more
* on the buffer - so we just let it continue
* parsing after we're done
*
*/
p += 1;
if( CURRENT->multipart_boundary_len > 0 )
multipart_parser_execute(parser, p, CURRENT->content_length);
if( CURRENT->on_body )
CURRENT->on_body(CURRENT, p, CURRENT->content_length);
p += CURRENT->content_length;
CURRENT->body_read = CURRENT->content_length;
assert(0 <= REMAINING);
END_REQUEST;
fhold;
} else {
/*
* The body is larger than the buffer
* EAT REST OF BUFFER
* there is still more to read though. this will
* be handled on the next invokion of ebb_request_parser_execute
* right before we enter the state machine.
*
*/
p += 1;
size_t eat = REMAINING;
if( CURRENT->multipart_boundary_len > 0 && eat > 0 )
multipart_parser_execute(parser, p, eat);
if( CURRENT->on_body && eat > 0)
CURRENT->on_body(CURRENT, p, eat);
p += eat;
CURRENT->body_read += eat;
CURRENT->eating_body = TRUE;
//printf("eating body!\n");
assert(CURRENT->body_read < CURRENT->content_length);
assert(REMAINING == 0);
fhold; fbreak;
}
}
}
#
##
###
#### HTTP/1.1 STATE MACHINE
###
## RequestHeaders and character types are from
# Zed Shaw's beautiful Mongrel parser.
CRLF = "\r\n";
# character types
CTL = (cntrl | 127);
safe = ("$" | "-" | "_" | ".");
extra = ("!" | "*" | "'" | "(" | ")" | ",");
reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
national = any -- (alpha | digit | reserved | extra | safe | unsafe);
unreserved = (alpha | digit | safe | extra | national);
escape = ("%" xdigit xdigit);
uchar = (unreserved | escape);
pchar = (uchar | ":" | "@" | "&" | "=" | "+");
tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\"" | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
# elements
token = (ascii -- (CTL | tspecials));
quote = "\"";
# qdtext = token -- "\"";
# quoted_pair = "\" ascii;
# quoted_string = "\"" (qdtext | quoted_pair )* "\"";
# headers
Method = ( "COPY" %method_copy
| "DELETE" %method_delete
| "GET" %method_get
| "HEAD" %method_head
| "LOCK" %method_lock
| "MKCOL" %method_mkcol
| "MOVE" %method_move
| "OPTIONS" %method_options
| "POST" %method_post
| "PROPFIND" %method_propfind
| "PROPPATCH" %method_proppatch
| "PUT" %method_put
| "TRACE" %method_trace
| "UNLOCK" %method_unlock
); # Not allowing extension methods
HTTP_Version = "HTTP/" digit+ $version_major "." digit+ $version_minor;
scheme = ( alpha | digit | "+" | "-" | "." )* ;
absolute_uri = (scheme ":" (uchar | reserved )*);
path = ( pchar+ ( "/" pchar* )* ) ;
query = ( uchar | reserved )* >mark_query_string %query_string ;
param = ( pchar | "/" )* ;
params = ( param ( ";" param )* ) ;
rel_path = ( path? (";" params)? ) ;
absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
Fragment = ( uchar | reserved )* >mark_fragment %fragment;
field_name = ( token -- ":" )+;
Field_Name = field_name >mark_header_field %write_field;
field_value = ((any - " ") any*)?;
Field_Value = field_value >mark_header_value %write_value;
hsep = ":" " "*;
header = (field_name hsep field_value) :> CRLF;
Header = ( ("Content-Length"i hsep digit+ $content_length)
| ("Connection"i hsep
( "Keep-Alive"i %set_keep_alive
| "close"i %set_not_keep_alive
)
)
| ("Content-Type"i hsep
"multipart/form-data" any*
"boundary=" ( (quote token+ $multipart_boundary quote) | (token+ $multipart_boundary) ) # boundary can be quoted or not quoted
)
| ("Transfer-Encoding"i %use_chunked_encoding hsep "identity" %use_identity_encoding)
| ("Expect"i hsep "100-continue"i %expect_continue)
| ("Trailer"i hsep field_value %trailer)
| (Field_Name hsep Field_Value)
) :> CRLF;
Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
RequestHeader = Request_Line (Header %end_header_line)* :> CRLF @end_headers;
# chunked message
trailing_headers = header*;
#chunk_ext_val = token | quoted_string;
chunk_ext_val = token*;
chunk_ext_name = token*;
chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
last_chunk = "0"+ chunk_extension CRLF;
chunk_size = (xdigit* [1-9a-fA-F] xdigit*) $add_to_chunk_size;
chunk_end = CRLF;
chunk_body = any >skip_chunk_data;
chunk_begin = chunk_size chunk_extension CRLF;
chunk = chunk_begin chunk_body chunk_end;
ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
Request = RequestHeader >start_req @body_logic;
main := Request+; # sequence of requests (for keep-alive)
}%%
%% write data;
#define COPYSTACK(dest, src) for(i = 0; i < EBB_RAGEL_STACK_SIZE; i++) { dest[i] = src[i]; }
enum multipart_state {
s_uninitialized = 1,
s_start,
s_start_boundary,
s_header_field_start,
s_header_field,
s_headers_almost_done,
s_header_value_start,
s_header_value,
s_header_value_almost_done,
s_part_data_start,
s_part_data,
s_part_data_almost_boundary,
s_part_data_boundary,
s_part_data_almost_end,
s_part_data_end,
s_part_data_final_hyphen,
s_end
};
void ebb_request_parser_init(ebb_request_parser *parser)
{
int i;
int cs = 0;
int top = 0;
int stack[EBB_RAGEL_STACK_SIZE];
%% write init;
parser->cs = cs;
parser->top = top;
COPYSTACK(parser->stack, stack);
parser->chunk_size = 0;
parser->eating = 0;
parser->current_request = NULL;
parser->header_field_mark = parser->header_value_mark =
parser->query_string_mark = parser->path_mark =
parser->uri_mark = parser->fragment_mark = NULL;
parser->multipart_state = s_uninitialized;
parser->new_request = NULL;
}
#define LF 10
#define CR 13
size_t multipart_parser_execute(ebb_request_parser* parser, const char *buf, size_t len)
{
size_t i = 0;
size_t mark = 0;
char c, cl;
int is_last = 0;
while(!is_last) {
c = buf[i];
is_last = (i == (len - 1));
switch (parser->multipart_state) {
case s_start:
CURRENT->number_of_multipart_headers = 0;
parser->multipart_index = 0;
parser->multipart_state = s_start_boundary;
/* fallthrough */
case s_start_boundary:
// every time needs to take into account the first two '-'
if (parser->multipart_index == CURRENT->multipart_boundary_len + 2) {
if (c != CR) {
return i;
}
parser->multipart_index++;
break;
} else if (parser->multipart_index == (CURRENT->multipart_boundary_len + 3)) {
if (c != LF) {
return i;
}
CURRENT->number_of_multipart_headers = 0;
parser->multipart_index = 0;
parser->multipart_state = s_header_field_start;
break;
}
if (c != CURRENT->multipart_boundary[parser->multipart_index]) {
return i;
}
parser->multipart_index++;
break;
case s_header_field_start:
mark = i;
parser->multipart_state = s_header_field;
/* fallthrough */
case s_header_field:
if (c == CR) {
parser->multipart_state = s_headers_almost_done;
break;
}
if (c == '-') {
break;
}
if (c == ':') {
EMIT_HEADER_CB(multipart_header_field, buf + mark, i - mark);
parser->multipart_state = s_header_value_start;
break;
}
cl = tolower(c);
if (cl < 'a' || cl > 'z') {
return i;
}
if (is_last)
EMIT_HEADER_CB(multipart_header_field, buf + mark, (i - mark) + 1);
break;
case s_headers_almost_done:
if (c != LF) {
return i;
}
parser->multipart_state = s_part_data_start;
break;
case s_header_value_start:
if (c == ' ') {
break;
}
mark = i;
parser->multipart_state = s_header_value;
/* fallthrough */
case s_header_value:
if (c == CR) {
EMIT_HEADER_CB(multipart_header_value, buf + mark, i - mark);
parser->multipart_state = s_header_value_almost_done;
}
if (is_last)
EMIT_HEADER_CB(multipart_header_value, buf + mark, (i - mark) + 1);
break;
case s_header_value_almost_done:
if (c != LF) {
return i;
}
CURRENT->number_of_multipart_headers++;
parser->multipart_state = s_header_field_start;
break;
case s_part_data_start:
if (CURRENT->on_multipart_headers_complete)
CURRENT->on_multipart_headers_complete(CURRENT);
mark = i;
parser->multipart_state = s_part_data;
/* fallthrough */
case s_part_data:
if (c == CR) {
EMIT_DATA_CB(part_data, buf + mark, i - mark);
mark = i;
parser->multipart_state = s_part_data_almost_boundary;
parser->multipart_lookbehind[0] = CR;
break;
}
if (is_last)
EMIT_DATA_CB(part_data, buf + mark, (i - mark) + 1);
break;
case s_part_data_almost_boundary:
if (c == LF) {
parser->multipart_state = s_part_data_boundary;
parser->multipart_lookbehind[1] = LF;
CURRENT->number_of_multipart_headers = 0;
parser->multipart_index = 0;
break;
}
EMIT_DATA_CB(part_data, parser->multipart_lookbehind, 1);
parser->multipart_state = s_part_data;
mark = i --;
break;
case s_part_data_boundary:
if (CURRENT->multipart_boundary[parser->multipart_index] != c) {
EMIT_DATA_CB(part_data, parser->multipart_lookbehind, 2 + parser->multipart_index);
parser->multipart_state = s_part_data;
mark = i --;
break;
}
parser->multipart_lookbehind[2 + parser->multipart_index] = c;
if ((++ parser->multipart_index) == CURRENT->multipart_boundary_len + 2) {
if (CURRENT->on_part_data_complete)
CURRENT->on_part_data_complete(CURRENT);
parser->multipart_state = s_part_data_almost_end;
}
break;
case s_part_data_almost_end:
if (c == '-') {
parser->multipart_state = s_part_data_final_hyphen;
break;
}
if (c == CR) {
parser->multipart_state = s_part_data_end;
break;
}
return i;
case s_part_data_final_hyphen:
if (c == '-') {
parser->multipart_state = s_end;
break;
}
return i;
case s_part_data_end:
if (c == LF) {
parser->multipart_state = s_header_field_start;
break;
}
return i;
case s_end:
break;
default:
return 0;
}
++i;
}
return len;
}
/** exec **/
size_t ebb_request_parser_execute(ebb_request_parser *parser, const char *buffer, size_t len)
{
const char *p, *pe;
int i, cs = parser->cs;
int top = parser->top;
int stack[EBB_RAGEL_STACK_SIZE];
COPYSTACK(stack, parser->stack);
assert(parser->new_request && "undefined callback");
p = buffer;
pe = buffer+len;
if(0 < parser->chunk_size && parser->eating) {
/*
*
* eat chunked body
*
*/
//printf("eat chunk body (before parse)\n");
size_t eat = MIN(len, parser->chunk_size);
if(eat == parser->chunk_size) {
parser->eating = FALSE;
}
if (CURRENT->multipart_boundary_len > 0)
multipart_parser_execute(parser, p, eat);
CURRENT->on_body(CURRENT, p, eat);
p += eat;
parser->chunk_size -= eat;
//printf("eat: %d\n", eat);
} else if( parser->current_request && CURRENT->eating_body ) {
/*
*
* eat normal body
*
*/
//printf("eat normal body (before parse)\n");
size_t eat = MIN(len, CURRENT->content_length - CURRENT->body_read);
if (CURRENT->multipart_boundary_len > 0)
multipart_parser_execute(parser, p, eat);
CURRENT->on_body(CURRENT, p, eat);
p += eat;
CURRENT->body_read += eat;
if(CURRENT->body_read == CURRENT->content_length) {
END_REQUEST;
}
}
if(parser->header_field_mark) parser->header_field_mark = buffer;
if(parser->header_value_mark) parser->header_value_mark = buffer;
if(parser->fragment_mark) parser->fragment_mark = buffer;
if(parser->query_string_mark) parser->query_string_mark = buffer;
if(parser->path_mark) parser->path_mark = buffer;
if(parser->uri_mark) parser->uri_mark = buffer;
%% write exec;
parser->cs = cs;
parser->top = top;
COPYSTACK(parser->stack, stack);
HEADER_CALLBACK(header_field);
HEADER_CALLBACK(header_value);
CALLBACK(fragment);
CALLBACK(query_string);
CALLBACK(path);
CALLBACK(uri);
assert(p <= pe && "buffer overflow after parsing execute");
return(p - buffer);
}
int ebb_request_parser_has_error(ebb_request_parser *parser)
{
return parser->cs == ebb_request_parser_error;
}
int ebb_request_parser_is_finished(ebb_request_parser *parser)
{
return parser->cs == ebb_request_parser_first_final;
}
void ebb_request_init(ebb_request *request)
{
request->expect_continue = FALSE;
request->eating_body = 0;
request->body_read = 0;
request->content_length = 0;
request->version_major = 0;
request->version_minor = 0;
request->number_of_headers = 0;
request->transfer_encoding = EBB_IDENTITY;
request->number_of_multipart_headers = 0;
request->multipart_boundary_len = 0;
request->multipart_boundary[0] = request->multipart_boundary[1] = '-';
request->keep_alive = -1;
request->on_complete = NULL;
request->on_headers_complete = NULL;
request->on_body = NULL;
request->on_multipart_headers_complete = NULL;
request->on_multipart_header_field = NULL;
request->on_multipart_header_value = NULL;
request->on_part_data_complete = NULL;
request->on_part_data = NULL;
request->on_header_field = NULL;
request->on_header_value = NULL;
request->on_uri = NULL;
request->on_fragment = NULL;
request->on_path = NULL;
request->on_query_string = NULL;
}
int ebb_request_should_keep_alive(ebb_request *request)
{
if(request->keep_alive == -1)
if(request->version_major == 1)
return (request->version_minor != 0);
else if(request->version_major == 0)
return FALSE;
else
return TRUE;
else
return request->keep_alive;
}