diff -Naur apache_2.0a6/10xpatchlevel-7 apache_2.0a6/10xpatchlevel --- apache_2.0a6/10xpatchlevel-7 Wed Sep 27 16:59:02 2000 +++ apache_2.0a6/10xpatchlevel Thu Sep 28 20:31:20 2000 @@ -2,4 +2,4 @@ available from http://oss.sgi.com/projects/apache/ -10xpatchlevel=2.0a6-7 +10xpatchlevel=2.0a6-8 diff -Naur apache_2.0a6/src/include/httpd.h-7 apache_2.0a6/src/include/httpd.h --- apache_2.0a6/src/include/httpd.h-7 Wed Sep 20 22:43:21 2000 +++ apache_2.0a6/src/include/httpd.h Thu Sep 28 20:31:29 2000 @@ -807,6 +807,10 @@ * record to improve 64bit alignment the next time we need to break * binary compatibility for some other reason. */ + + char *rqbuf; /* quickly_read_request_line()'s buffer */ + char *rqhdr; /* quickly_get_mime_headers()'s headers */ + apr_size_t rqhdr_len; /* strlen(rqhdr) */ }; diff -Naur apache_2.0a6/src/lib/apr/configure.in-7 apache_2.0a6/src/lib/apr/configure.in --- apache_2.0a6/src/lib/apr/configure.in-7 Fri Aug 18 08:33:08 2000 +++ apache_2.0a6/src/lib/apr/configure.in Thu Sep 28 20:31:40 2000 @@ -334,6 +334,7 @@ AC_CHECK_SIZEOF(short, 2) AC_CHECK_SIZEOF(long double, 12) AC_CHECK_SIZEOF(long long, 8) +AC_CHECK_SIZEOF(char *, 4) if test "$ac_cv_sizeof_short" = "2"; then short_value=short @@ -357,6 +358,13 @@ if test "$ac_cv_sizeof_longlong" = "8"; then long_value="__int64" fi +if test "$ac_cv_sizeof_char_p" = "$ac_cv_sizeof_long"; then + ptr_value="long" +elif test "$ac_cv_sizeof_char_p" = "$ac_cv_sizeof_long_long"; then + ptr_value="long long" +else + ptr_value="int" +fi if test "$ac_cv_type_off_t" = "yes"; then off_t_value="off_t" @@ -408,6 +416,7 @@ AC_SUBST(short_value) AC_SUBST(int_value) AC_SUBST(long_value) +AC_SUBST(ptr_value) AC_SUBST(off_t_value) AC_SUBST(size_t_value) AC_SUBST(ssize_t_value) diff -Naur apache_2.0a6/src/lib/apr/include/apr.h.in-7 apache_2.0a6/src/lib/apr/include/apr.h.in --- apache_2.0a6/src/lib/apr/include/apr.h.in-7 Sat Aug 5 23:07:07 2000 +++ apache_2.0a6/src/lib/apr/include/apr.h.in Thu Sep 28 20:31:49 2000 @@ -108,6 +108,9 @@ typedef @long_value@ apr_int64_t; typedef unsigned @long_value@ apr_uint64_t; +/* Integer large enough to hold any pointer */ +typedef unsigned @ptr_value@ apr_ptr_t; + typedef @size_t_value@ apr_size_t; typedef @ssize_t_value@ apr_ssize_t; typedef @off_t_value@ apr_off_t; @@ -168,5 +171,25 @@ #define WTERMSIG(status) (int)((status).w_termsig) #endif /* !WEXITSTATUS */ #endif /* HAVE_SYS_WAIT_H */ + +/* + * Aligning the address and length of certain key data structures and + * buffers to exactly match cache lines can dramatically increase + * performance. In the absence of an architecture-specific + * APR_CACHE_ALIGNMENT definition, assume this arbitrary default. Set + * to 0 to disable alignment. When overriding the default value use the + * larger of your processor's primary and secondary cache line sizes. + * The value is in bytes. + */ +#ifndef APR_CACHE_ALIGNMENT +# ifdef IRIX +# define APR_CACHE_ALIGNMENT 128 /* best value for most SGI servers */ +# else +# define APR_CACHE_ALIGNMENT 32 +# endif +#endif +#if APR_CACHE_ALIGNMENT & (APR_CACHE_ALIGNMENT - 1) +# error "APR_CACHE_ALIGNMENT must be a power of two" +#endif #endif /* APR_H */ diff -Naur apache_2.0a6/src/main/http_protocol.c-7 apache_2.0a6/src/main/http_protocol.c --- apache_2.0a6/src/main/http_protocol.c-7 Tue Sep 26 15:32:45 2000 +++ apache_2.0a6/src/main/http_protocol.c Thu Sep 28 20:32:02 2000 @@ -989,6 +989,124 @@ } } +#ifndef RQBUF_SIZE +#define RQBUF_SIZE 4096 /* should be >= buff.c's DEFAULT_BUFSIZE */ +#endif + +/* + * Read the request and, as quickly as possible with minimal copying, + * parse it or pass it on to the regular parsing function + * read_request_line(). Shares state with quickly_get_mime_headers(), + * which has a similar mission. + */ +static int +quickly_read_request_line(request_rec *r) +{ + BUFF *fb; + apr_status_t status; + apr_ssize_t n; + + /* + * If r->rqbuf already exists, there could be pointers into it from + * r->headers_in so instead of reusing it, just replace it. + */ +#if APR_CACHE_ALIGNMENT > 0 + /* align r->rqbuf on a cache line boundary */ + r->rqbuf = apr_palloc(r->pool, RQBUF_SIZE + APR_CACHE_ALIGNMENT - 1); + r->rqbuf = (char *) (((apr_ptr_t) r->rqbuf + APR_CACHE_ALIGNMENT - 1) & ~(APR_CACHE_ALIGNMENT - 1)); +#else + r->rqbuf = apr_palloc(r->pool, RQBUF_SIZE); +#endif + + /* + * would read directly into fb->inptr but r->the_request needs an + * intact copy, sigh + */ + fb = r->connection->client; + fb->flags |= B_SAFEREAD; + status = ap_bread(fb, r->rqbuf, RQBUF_SIZE, &n); + fb->flags &= ~B_SAFEREAD; + + /* 16 == strlen("GET / HTTP/1.0\n\n"): minimal valid request */ + if (status == APR_SUCCESS && n >= 16 && +#ifdef WORDS_BIGENDIAN + /* use big-endian multi-character constant for speed */ + *(apr_uint32_t *) r->rqbuf == 'GET ' +#else + r->rqbuf[0] == 'G' && + r->rqbuf[1] == 'E' && + r->rqbuf[2] == 'T' && + r->rqbuf[3] == ' ' +#endif + ) { + char *cp, *ep; + + /* find the end of the uri */ + cp = &r->rqbuf[4]; /* 4 == strlen("GET ") */ + ep = &r->rqbuf[n - 11]; /* 11 == strlen(" HTTP/1.0\n\n") */ + while (cp <= ep && !apr_isspace(*cp)) + cp++; + + if (cp <= ep && + cp[0] == ' ' && + cp[1] == 'H' && + cp[2] == 'T' && + cp[3] == 'T' && + cp[4] == 'P' && + cp[5] == '/' && + cp[6] == '1' && + cp[7] == '.' && + (cp[8] == '0' || cp[8] == '1') && + ((cp[9] == '\r' && cp[10] == '\n') || cp[9] == '\n')) { + long id; + + r->rqhdr = cp + 10 + (cp[9] == '\r'); + r->rqhdr_len = &r->rqbuf[n] - r->rqhdr; + cp[9] = 0; /* null-terminate r->the_request */ + + r->request_time = apr_now(); + r->the_request = r->rqbuf; + r->the_request_len = cp + 9 - r->rqbuf; + r->method = "GET"; + r->method_number = M_GET; + + ap_assert(!r->assbackwards); + if (cp[8] == '1') { + r->protocol = "HTTP/1.1"; + r->proto_num = HTTP_VERSION(1,1); + } else { + r->protocol = "HTTP/1.0"; + r->proto_num = HTTP_VERSION(1,0); + } + + id = r->connection->id; + ap_update_connection_status(id, "Method", r->method); + ap_update_connection_status(id, "Protocol", r->protocol); + + *cp = 0; /* avoid copying uri; briefly mangles r->the_request */ + ap_parse_uri(r, &r->rqbuf[4]); + *cp = ' '; + + return 1; + } + } + + /* + * Shortcut parsing failed. No harm done, just copy the + * already-read data into fb and fall back to regular processing. + * If copying bothers you, extend the buffering code to allow an + * alternate read-ahead buffer and then just manipulate pointers. + */ + ap_assert(r->rqhdr_len == 0); + if (n > 0) { + ap_assert(&fb->inptr[n] <= &fb->inbase[fb->bufsiz]); + memcpy(fb->inptr, r->rqbuf, n); + fb->incnt += n; + } + + return 0; +} + static int read_request_line(request_rec *r) { char l[DEFAULT_LIMIT_REQUEST_LINE + 2]; /* getline's two extra for \n\0 */ @@ -1081,9 +1199,9 @@ /* avoid sscanf in the common case */ if (len == 8 && - pro[0] == 'H' && pro[1] == 'T' && pro[2] == 'T' && pro[3] == 'P' && - pro[4] == '/' && apr_isdigit(pro[5]) && pro[6] == '.' && - apr_isdigit(pro[7])) { + pro[0] == 'H' && pro[1] == 'T' && pro[2] == 'T' && pro[3] == 'P' && + pro[4] == '/' && apr_isdigit(pro[5]) && pro[6] == '.' && + apr_isdigit(pro[7])) { r->proto_num = HTTP_VERSION(pro[5] - '0', pro[7] - '0'); } else if (2 == sscanf(r->protocol, "HTTP/%u.%u", &major, &minor) && minor < HTTP_VERSION(1,0)) /* don't allow HTTP/0.1000 */ @@ -1094,6 +1212,148 @@ return 1; } +/* + * Parse the request options as quickly as possible. Requires state + * from quickly_read_request_line(). Handles only the fast cases, and + * detects and defers the slow ones to the regular parser + * get_mime_headers(). + */ +static int +quickly_get_mime_headers(request_rec *r) +{ + int ok, nh, mh; + apr_table_t *headers; + char *hp, *ep; + apr_ssize_t remain; +#define NQHEADERS 16 /* see big comment below */ + struct qheader { + const char *name; + const char *value; + } qheaders[NQHEADERS]; + struct qheader *nqp, *qp; + + ok = 0; + + nh = apr_table_elts(r->headers_in)->nelts; + mh = r->server->limit_req_fields; + if (mh <= 0) + mh = 65536; /* essentially infinite */ + headers = NULL; + + hp = r->rqhdr; + ep = hp + r->rqhdr_len; + r->rqhdr_len = 0; /* never re-parse */ + + nqp = qheaders; + + while (hp < ep && nh < mh) { + char *name, *value; + + name = hp; + + /* find a : (end of name) */ + value = name; + while (value < ep && *value != ':' && !apr_isspace(*value)) + value++; + + /* complete name? */ + if (value > name && value < ep && *value == ':') { + *value = 0; /* null-terminate name */ + + /* skip white space */ + do + value++; + while (value < ep && (*value == ' ' || *value == '\t')); + + /* find end of value */ + hp = value; + while (hp < ep && *hp != '\n') + hp++; + + /* + * complete value? beware leading-space continuation lines, + * or the threat of one if we're right at the end of rqbuf. + * (there must be at least one more byte left and it must + * not be a leading space) + */ + if (hp + 1 < ep && hp[1] != ' ' && hp[1] != '\t') { + hp[-(hp[-1] == '\r')] = 0; /* null-terminate value */ + hp++; + + if (nqp < &qheaders[NQHEADERS]) { + /* still room in quick header array */ + nqp->name = name; + nqp->value = value; + nqp++; + } else if (headers) { + /* insert into established table */ + apr_table_addn(headers, name, value); + } else { + /* + * More headers than fit in the quick header array. + * Make a table for them. + */ + headers = apr_make_table(r->pool, NQHEADERS * 2); + for (qp = qheaders; qp < nqp; qp++) + apr_table_addn(headers, qp->name, qp->value); + apr_table_addn(headers, name, value); + } + + nh++; + } else { + hp = name; + do + name++; + while (*name); + *name = ':'; /* restore : for copy below */ + break; + } + } else { + if (hp < ep && *hp == '\r') + hp++; + if (hp < ep && *hp == '\n') { + hp++; + ok = 1; /* end of header section */ + } + break; + } + } + + /* + * My experiments on a variety of systems show that when all the + * header names are unique (no duplicates), calling + * apr_table_mergen() to insert each header is faster than calling + * apr_overlap_tables() once, up to around 16 headers. When there + * are more than about 16 headers, apr_overlap_tables() is faster. + * When there are some duplicate headers, using apr_table_mergen() + * almost always is faster no matter how many headers are inserted. + * All the benchmarks and browsers I know issue unique request + * headers so I set NQHEADERS to 16 to optimize: When there are at + * most NQHEADERS headers this function calls apr_table_mergen() + * once for each header, and when there are more it calls + * apr_overlap_tables() once. (But why, you might ask, use + * apr_table_mergen() instead of apr_table_addn() if all the headers + * are unique? Because we don't know apriori that they are unique, + * just that they usually are.) + */ + if (headers) + apr_overlap_tables(r->headers_in, headers, APR_OVERLAP_TABLES_MERGE); + else + for (qp = qheaders; qp < nqp; qp++) + apr_table_mergen(r->headers_in, qp->name, qp->value); + + remain = ep - hp; + if (remain > 0) { + BUFF *fb = r->connection->client; + + ap_assert(&fb->inptr[remain] <= &fb->inbase[fb->bufsiz]); + memcpy(fb->inptr, hp, remain); + fb->incnt += remain; + } + + return ok; +} + static void get_mime_headers(request_rec *r) { char field[DEFAULT_LIMIT_REQUEST_FIELDSIZE + 2]; /* getline's two extra */ @@ -1101,7 +1361,7 @@ char *value; char *copy; int len; - int fields_read = 0; + int fields_read = apr_table_elts(r->headers_in)->nelts; apr_table_t *tmp_headers; /* We'll use apr_overlap_tables later to merge these into r->headers_in. */ @@ -1195,7 +1455,7 @@ : &r->server->timeout); /* Get the request... */ - if (!read_request_line(r)) { + if (!quickly_read_request_line(r) && !read_request_line(r)) { if (r->status == HTTP_REQUEST_URI_TOO_LARGE) { ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, r, "request failed: URI too long"); @@ -1210,7 +1470,8 @@ &r->server->timeout); } if (!r->assbackwards) { - get_mime_headers(r); + if (!quickly_get_mime_headers(r)) + get_mime_headers(r); if (r->status != HTTP_REQUEST_TIME_OUT) { ap_log_rerror(APLOG_MARK, APLOG_NOERRNO|APLOG_ERR, 0, r, "request failed: error reading the headers");