This patch fixes a number of problems with negotiation and etags in Apache 1.3.4. *) Entity tag comparisons for If-Match and If-None-Match were not being performed correctly -- weak tags might cause false positives. Also, strong comparison wasn't properly enforced in all cases. [Roy Fielding, Ken Coar, Dean Gaudet] PR#2065, 3657 *) Work around a bug in Lynx regarding its sending "Negotiate: trans" even though it doesn't understand TCN. [Koen Holtman, Roy Fielding] *) Added ap_size_list_item(), ap_get_list_item(), and ap_find_list_item() to util.c for parsing an HTTP header field value to extract the next list item, taking into account the possible presence of nested comments, quoted-pairs, and quoted-strings. ap_get_list_item() also removes insignificant whitespace and lowercases non-quoted tokens. [Roy Fielding] PR#2065 *) Fix ordering of language variants for the case where the traditional negotiation algorithm is being used with multiple language variants and no Accept-Language. [James Treacy ] PR#3299, 3688 *) Do not round the TCN quality calculation to 5 decimal places, unlike RFC 2296, because the calculation might need 12 decimal places to get the right result. [Roy Fielding] *) Remove unused code to disable transparent negotiation when negotiating on encoding only, as we now handle encoding too (though this is nonstandard for TCN), remove charset=ISO-8859-1 fiddle from the fiddle-averse RVSA comparison, and fix bugs in some debugging statements within mod_negotiation. [Koen Holtman] *) Fixed a rare memory corruption possibility in mod_dir if the index file is negotiable and no acceptable variant can be found. [Dean Gaudet, Roy Fielding, Martin Kraemer] Index: include/httpd.h =================================================================== RCS file: /home/cvs/apache-1.3/src/include/httpd.h,v retrieving revision 1.263 retrieving revision 1.269 diff -u -r1.263 -r1.269 --- httpd.h 1999/01/10 06:45:35 1.263 +++ httpd.h 1999/02/09 16:57:22 1.269 @@ -933,6 +931,10 @@ API_EXPORT(char *) ap_getword_nulls_nc(pool *p, char **line, char stop); API_EXPORT(char *) ap_getword_conf(pool *p, const char **line); API_EXPORT(char *) ap_getword_conf_nc(pool *p, char **line); + +API_EXPORT(const char *) ap_size_list_item(const char **field, int *len); +API_EXPORT(char *) ap_get_list_item(pool *p, const char **field); +API_EXPORT(int) ap_find_list_item(pool *p, const char *line, const char *tok); API_EXPORT(char *) ap_get_token(pool *p, const char **accept_line, int accept_white); API_EXPORT(int) ap_find_token(pool *p, const char *line, const char *tok); Index: main/http_protocol.c =================================================================== RCS file: /home/cvs/apache-1.3/src/main/http_protocol.c,v retrieving revision 1.253 retrieving revision 1.257 diff -u -r1.253 -r1.257 --- http_protocol.c 1999/01/08 17:54:41 1.253 +++ http_protocol.c 1999/02/09 16:57:24 1.257 @@ -144,16 +144,18 @@ return 0; } - /* Check the If-Range header for Etag or Date */ - + /* Check the If-Range header for Etag or Date. + * Note that this check will return false (as required) if either + * of the two etags are weak. + */ if ((if_range = ap_table_get(r->headers_in, "If-Range"))) { if (if_range[0] == '"') { if (!(match = ap_table_get(r->headers_out, "Etag")) || - (strcasecmp(if_range, match) != 0)) + (strcmp(if_range, match) != 0)) return 0; } else if (!(match = ap_table_get(r->headers_out, "Last-Modified")) || - (strcasecmp(if_range, match) != 0)) + (strcmp(if_range, match) != 0)) return 0; } @@ -398,13 +400,14 @@ mtime = (r->mtime != 0) ? r->mtime : time(NULL); /* If an If-Match request-header field was given - * AND if our ETag does not match any of the entity tags in that field - * AND the field value is not "*" (meaning match anything), then + * AND the field value is not "*" (meaning match anything) + * AND if our strong ETag does not match any entity tag in that field, * respond with a status of 412 (Precondition Failed). */ if ((if_match = ap_table_get(r->headers_in, "If-Match")) != NULL) { - if ((etag == NULL) || - ((if_match[0] != '*') && !ap_find_token(r->pool, if_match, etag))) { + if (if_match[0] != '*' && + (etag == NULL || etag[0] == 'W' || + !ap_find_list_item(r->pool, if_match, etag))) { return HTTP_PRECONDITION_FAILED; } } @@ -425,22 +428,38 @@ } /* If an If-None-Match request-header field was given - * AND if our ETag matches any of the entity tags in that field - * OR if the field value is "*" (meaning match anything), then - * if the request method was GET or HEAD, the server SHOULD - * respond with a 304 (Not Modified) response. - * For all other request methods, the server MUST - * respond with a status of 412 (Precondition Failed). + * AND the field value is "*" (meaning match anything) + * OR our ETag matches any of the entity tags in that field, fail. + * + * If the request method was GET or HEAD, failure means the server + * SHOULD respond with a 304 (Not Modified) response. + * For all other request methods, failure means the server MUST + * respond with a status of 412 (Precondition Failed). + * + * GET or HEAD allow weak etag comparison, all other methods require + * strong comparison. We can only use weak if it's not a range request. */ if_nonematch = ap_table_get(r->headers_in, "If-None-Match"); if (if_nonematch != NULL) { - int rstatus; - - if ((if_nonematch[0] == '*') - || ((etag != NULL) && ap_find_token(r->pool, if_nonematch, etag))) { - rstatus = (r->method_number == M_GET) ? HTTP_NOT_MODIFIED - : HTTP_PRECONDITION_FAILED; - return rstatus; + if (r->method_number == M_GET) { + if (if_nonematch[0] == '*') + return HTTP_NOT_MODIFIED; + if (etag != NULL) { + if (ap_table_get(r->headers_in, "Range")) { + if (etag[0] != 'W' && + ap_find_list_item(r->pool, if_nonematch, etag)) { + return HTTP_NOT_MODIFIED; + } + } + else if (strstr(if_nonematch, etag)) { + return HTTP_NOT_MODIFIED; + } + } + } + else if (if_nonematch[0] == '*' || + (etag != NULL && + ap_find_list_item(r->pool, if_nonematch, etag))) { + return HTTP_PRECONDITION_FAILED; } } /* Else if a valid If-Modified-Since request-header field was given Index: main/util.c =================================================================== RCS file: /home/cvs/apache-1.3/src/main/util.c,v retrieving revision 1.145 retrieving revision 1.150 diff -u -r1.145 -r1.150 --- util.c 1999/01/08 20:08:23 1.145 +++ util.c 1999/02/09 16:57:24 1.150 @@ -978,6 +978,185 @@ } } +/* Size an HTTP header field list item, as separated by a comma. + * The return value is a pointer to the beginning of the non-empty list item + * within the original string (or NULL if there is none) and the address + * of field is shifted to the next non-comma, non-whitespace character. + * len is the length of the item excluding any beginning whitespace. + */ +API_EXPORT(const char *) ap_size_list_item(const char **field, int *len) +{ + const unsigned char *ptr = (const unsigned char *)*field; + const unsigned char *token; + int in_qpair, in_qstr, in_com; + + /* Find first non-comma, non-whitespace byte */ + + while (*ptr == ',' || ap_isspace(*ptr)) + ++ptr; + + token = ptr; + + /* Find the end of this item, skipping over dead bits */ + + for (in_qpair = in_qstr = in_com = 0; + *ptr && (in_qpair || in_qstr || in_com || *ptr != ','); + ++ptr) { + + if (in_qpair) { + in_qpair = 0; + } + else { + switch (*ptr) { + case '\\': in_qpair = 1; /* quoted-pair */ + break; + case '"' : if (!in_com) /* quoted string delim */ + in_qstr = !in_qstr; + break; + case '(' : if (!in_qstr) /* comment (may nest) */ + ++in_com; + break; + case ')' : if (in_com) /* end comment */ + --in_com; + break; + default : break; + } + } + } + + if ((*len = (ptr - token)) == 0) { + *field = ptr; + return NULL; + } + + /* Advance field pointer to the next non-comma, non-white byte */ + + while (*ptr == ',' || ap_isspace(*ptr)) + ++ptr; + + *field = ptr; + return (const char *)token; +} + +/* Retrieve an HTTP header field list item, as separated by a comma, + * while stripping insignificant whitespace/comments and lowercasing anything + * not in a quoted string. The return value is a new string containing + * the converted list item (empty if it was all comments or NULL if none) + * and the address of field is shifted to the next non-comma, non-whitespace. + */ +API_EXPORT(char *) ap_get_list_item(pool *p, const char **field) +{ + const char *tok_start; + const unsigned char *ptr; + unsigned char *pos; + char *token; + int addspace = 0, in_qpair = 0, in_qstr = 0, in_com = 0, tok_len = 0; + + /* Find the beginning and maximum length of the list item so that + * we can allocate a buffer for the new string and reset the field. + */ + if ((tok_start = ap_size_list_item(field, &tok_len)) == NULL) { + return NULL; + } + token = ap_palloc(p, tok_len + 1); + + /* Scan the token again, but this time copy only the good bytes. + * We skip extra whitespace and any whitespace around a '=' or ';', + * strip comments, and lowercase normal characters not within a + * quoted-string or quoted-pair. The result may be an empty string. + */ + for (ptr = (const unsigned char *)tok_start, pos = (unsigned char *)token; + *ptr && (in_qpair || in_qstr || in_com || *ptr != ','); + ++ptr) { + + if (in_qpair) { + in_qpair = 0; + if (!in_com) + *pos++ = *ptr; + } + else { + switch (*ptr) { + case '\\': in_qpair = 1; + if (in_com) + break; + if (addspace == 1) + *pos++ = ' '; + *pos++ = *ptr; + addspace = 0; + break; + case '"' : if (in_com) + break; + in_qstr = !in_qstr; + if (addspace == 1) + *pos++ = ' '; + *pos++ = *ptr; + addspace = 0; + break; + case '(' : if (in_qstr) + *pos++ = *ptr; + else + ++in_com; + break; + case ')' : if (in_com) + --in_com; + else + *pos++ = *ptr; + break; + case ' ' : + case '\t': if (in_com || addspace) + break; + if (in_qstr) + *pos++ = *ptr; + else + addspace = 1; + break; + case '=' : + case ';' : if (in_com) + break; + if (!in_qstr) + addspace = -1; + *pos++ = *ptr; + break; + default : if (in_com) + break; + if (addspace == 1) + *pos++ = ' '; + *pos++ = in_qstr ? *ptr : ap_tolower(*ptr); + addspace = 0; + break; + } + } + } + *pos = '\0'; + + return token; +} + +/* Find an item in canonical form (lowercase, no extra spaces) within + * an HTTP field value list. Returns 1 if found, 0 if not found. + * This would be much more efficient if we stored header fields as + * an array of list items as they are received instead of a plain string. + * We could make it more efficient by duplicating the loop/switch above + * within this function, replacing the assignments with compares. + */ +API_EXPORT(int) ap_find_list_item(pool *p, const char *line, const char *tok) +{ + const char *nxt; + char *item; + + if (!line || !tok) + return 0; + + nxt = line; + + while ((item = ap_get_list_item(p, &nxt)) != NULL) { + if (strcmp(item, tok) == 0) + return 1; + } + return 0; +} + + /* Retrieve a token, spacing over it and returning a pointer to * the first non-white byte afterwards. Note that these tokens * are delimited by semis and commas; and can also be delimited @@ -1877,6 +2056,7 @@ */ if ((*inchr == '\\') && (inchr[1] != '\0')) { inchr++; + newlen++; } inchr++; } Index: modules/standard/mod_dir.c =================================================================== RCS file: /home/cvs/apache-1.3/src/modules/standard/mod_dir.c,v retrieving revision 1.53 retrieving revision 1.54 diff -u -r1.53 -r1.54 --- mod_dir.c 1999/01/01 19:05:08 1.53 +++ mod_dir.c 1999/02/06 08:51:26 1.54 @@ -179,13 +179,13 @@ if (ap_is_HTTP_REDIRECT(rr->status) || (rr->status == HTTP_NOT_ACCEPTABLE && num_names == 1)) { + ap_pool_join(r->pool, rr->pool); error_notfound = rr->status; r->notes = ap_overlay_tables(r->pool, r->notes, rr->notes); r->headers_out = ap_overlay_tables(r->pool, r->headers_out, rr->headers_out); r->err_headers_out = ap_overlay_tables(r->pool, r->err_headers_out, rr->err_headers_out); - ap_destroy_sub_req(rr); return error_notfound; } Index: modules/standard/mod_negotiation.c =================================================================== RCS file: /home/cvs/apache-1.3/src/modules/standard/mod_negotiation.c,v retrieving revision 1.92 retrieving revision 1.97 diff -u -r1.92 -r1.97 --- mod_negotiation.c 1999/01/03 12:04:38 1.92 +++ mod_negotiation.c 1999/02/08 15:12:20 1.97 @@ -511,81 +511,81 @@ static void parse_negotiate_header(request_rec *r, negotiation_state *neg) { const char *negotiate = ap_table_get(r->headers_in, "Negotiate"); + char *tok; - if (negotiate) { - /* Negotiate: header tells us UA does transparent negotiation */ + /* First, default to no TCN, no Alternates, and the original Apache + * negotiation algorithm with fiddles for broken browser configs. + * + * To save network bandwidth, we do not configure to send an + * Alternates header to the user agent by default. User + * agents that want an Alternates header for agent-driven + * negotiation will have to request it by sending an + * appropriate Negotiate header. + */ + neg->ua_supports_trans = 0; + neg->send_alternates = 0; + neg->may_choose = 1; + neg->use_rvsa = 0; + neg->dont_fiddle_headers = 0; - /* sending Alternates on non-transparent resources is allowed, - * and may even be useful, but we don't for now, also - * because it could clash with an Alternates header set by - * a sub- or super- request on a transparent resource. - */ + if (!negotiate) + return; - while (*negotiate) { - char *tok = ap_get_token(neg->pool, &negotiate, 1); - char *cp; + if (strcmp(negotiate, "trans") == 0) { + /* Lynx 2.7 and 2.8 send 'negotiate: trans' even though they + * do not support transparent content negotiation, so for Lynx we + * ignore the negotiate header when its contents are exactly "trans". + * If future versions of Lynx ever need to say 'negotiate: trans', + * they can send the equivalent 'negotiate: trans, trans' instead + * to avoid triggering the workaround below. + */ + const char *ua = ap_table_get(r->headers_in, "User-Agent"); - for (cp = tok; (*cp && !ap_isspace(*cp) && *cp != '='); ++cp) { - *cp = ap_tolower(*cp); - } - *cp = 0; - - if (strcmp(tok, "trans") == 0 || - strcmp(tok, "vlist") == 0 || - strcmp(tok, "guess-small") == 0 || - ap_isdigit(tok[0]) || - strcmp(tok, "*") == 0) { - - /* The user agent supports transparent negotiation */ - neg->ua_supports_trans = 1; - - /* Send-alternates could be configurable, but note - * that it must be 1 if we have 'vlist' in the - * negotiate header. - */ - neg->send_alternates = 1; + if (ua && (strncmp(ua, "Lynx", 4) == 0)) + return; + } - if (strcmp(tok, "1.0") == 0) { - /* we may use the RVSA/1.0 algorithm, configure for it */ - neg->may_choose = 1; - neg->use_rvsa = 1; - neg->dont_fiddle_headers = 1; - } - else if (strcmp(tok, "*") == 0) { - /* we may use any variant selection algorithm, configure - * to use the Apache algorithm - */ - neg->may_choose = 1; - - /* We disable header fiddles on the assumption that a - * client sending Negotiate knows how to send correct - * headers which don't need fiddling. - */ - neg->dont_fiddle_headers = 1; - } - } + neg->may_choose = 0; /* An empty Negotiate would require 300 response */ - if (*negotiate) - negotiate++; /* skip over , */ - } - } + while ((tok = ap_get_list_item(neg->pool, &negotiate)) != NULL) { - if (!neg->ua_supports_trans) { - /* User agent does not support transparent negotiation, - * configure to do server-driven negotiation with the Apache - * algorithm. - */ - neg->may_choose = 1; + if (strcmp(tok, "trans") == 0 || + strcmp(tok, "vlist") == 0 || + strcmp(tok, "guess-small") == 0 || + ap_isdigit(tok[0]) || + strcmp(tok, "*") == 0) { + + /* The user agent supports transparent negotiation */ + neg->ua_supports_trans = 1; + + /* Send-alternates could be configurable, but note + * that it must be 1 if we have 'vlist' in the + * negotiate header. + */ + neg->send_alternates = 1; - /* To save network bandwidth, we do not configure to send an - * Alternates header to the user agent in this case. User - * agents which want an Alternates header for agent-driven - * negotiation will have to request it by sending an - * appropriate Negotiate header. - */ + if (strcmp(tok, "1.0") == 0) { + /* we may use the RVSA/1.0 algorithm, configure for it */ + neg->may_choose = 1; + neg->use_rvsa = 1; + neg->dont_fiddle_headers = 1; + } + else if (tok[0] == '*') { + /* we may use any variant selection algorithm, configure + * to use the Apache algorithm + */ + neg->may_choose = 1; + + /* We disable header fiddles on the assumption that a + * client sending Negotiate knows how to send correct + * headers which don't need fiddling. + */ + neg->dont_fiddle_headers = 1; + } + } } -#if NEG_DEBUG +#ifdef NEG_DEBUG fprintf(stderr, "dont_fiddle_headers=%d use_rvsa=%d ua_supports_trans=%d " "send_alternates=%d, may_choose=%d\n", neg->dont_fiddle_headers, neg->use_rvsa, @@ -1556,7 +1556,7 @@ } /* For a given variant, find the 'q' value of the charset given - * on the Accept-Charset line. If not charsets are listed, + * on the Accept-Charset line. If no charsets are listed, * assume value of '1'. */ static void set_charset_quality(negotiation_state *neg, var_rec *variant) @@ -1763,20 +1763,21 @@ variant->charset_quality * variant->lang_quality; - /* Make sure that variants with a very low nonzero q value - * do not get rounded down to 0 + /* RFC 2296 calls for the result to be rounded to 5 decimal places, + * but we don't do that because it serves no useful purpose other + * than to ensure that a remote algorithm operates on the same + * precision as ours. That is silly, since what we obviously want + * is for the algorithm to operate on the best available precision + * regardless of who runs it. Since the above calculation may + * result in significant variance at 1e-12, rounding would be bogus. */ - if (q <= 0.0f) - q = 0.0f; - else if (q < 0.00001f) - q = 0.00001f; #ifdef NEG_DEBUG fprintf(stderr, "Variant: file=%s type=%s lang=%s sourceq=%1.3f " "mimeq=%1.3f langq=%1.3f charq=%1.3f encq=%1.3f " "q=%1.5f definite=%d\n", (variant->file_name ? variant->file_name : ""), - (variant->mime_name ? variant->mime_name : ""), + (variant->mime_type ? variant->mime_type : ""), (variant->content_languages ? ap_array_pstrcat(neg->pool, variant->content_languages, ',') : ""), @@ -1784,11 +1785,12 @@ variant->mime_type_quality, variant->lang_quality, variant->charset_quality, - variant->encoding_qual q, + variant->encoding_quality, + q, variant->definite); #endif - if (q == 0.0f) { + if (q <= 0.0f) { return 0; } if (q > bestq) { @@ -1803,19 +1805,6 @@ *p_bestq = q; return 1; } - /* If the best variant's charset is ISO-8859-1 and this variant has - * the same charset quality, then we prefer this variant - */ - if (variant->charset_quality == best->charset_quality && - (variant->content_charset != NULL && - *variant->content_charset != '\0' && - strcmp(variant->content_charset, "iso-8859-1") != 0) && - (best->content_charset == NULL || - *best->content_charset == '\0' || - strcmp(best->content_charset, "iso-8859-1") == 0)) { - *p_bestq = q; - return 1; - } } return 0; } @@ -1833,7 +1822,7 @@ /* For non-transparent negotiation, server can choose how * to handle the negotiation. We'll use the following in * order: content-type, language, content-type level, charset, - * content length. + * content encoding, content length. * * For each check, we have three possible outcomes: * This variant is worse than current best: return 0 @@ -1853,6 +1842,22 @@ * acceptable by type, charset, encoding or language. */ +#ifdef NEG_DEBUG + fprintf(stderr, "Variant: file=%s type=%s lang=%s sourceq=%1.3f " + "mimeq=%1.3f langq=%1.3f langidx=%d charq=%1.3f encq=%1.3f \n", + (variant->file_name ? variant->file_name : ""), + (variant->mime_type ? variant->mime_type : ""), + (variant->content_languages + ? ap_array_pstrcat(neg->pool, variant->content_languages, ',') + : ""), + variant->source_quality, + variant->mime_type_quality, + variant->lang_quality, + variant->lang_index, + variant->charset_quality, + variant->encoding_quality); +#endif + if (variant->encoding_quality == 0.0f || variant->lang_quality == 0.0f || variant->source_quality == 0.0f || @@ -1879,17 +1884,13 @@ return 1; } - /* if language qualities were equal, try the LanguagePriority - * stuff - */ - /* XXX: TODO: there is a slight discrepancy between how this - * behaves and how it described in the documentation - */ - if (best->lang_index != -1 && variant->lang_index > best->lang_index) { + /* if language qualities were equal, try the LanguagePriority stuff */ + if (best->lang_index != -1 && + (variant->lang_index == -1 || variant->lang_index > best->lang_index)) { return 0; } if (variant->lang_index != -1 && - (variant->lang_index < best->lang_index || best->lang_index == -1)) { + (best->lang_index == -1 || variant->lang_index < best->lang_index)) { *p_bestq = q; return 1; } @@ -2437,7 +2438,6 @@ int alg_result; /* result of variant selection algorithm */ int res; int j; - int unencoded_variants = 0; /* Decide if resource is transparently negotiable */ @@ -2465,20 +2465,7 @@ */ if (strchr(variant->file_name, '/')) neg->is_transparent = 0; - - if (!variant->content_encoding) - unencoded_variants++; } - - /* If there are less than 2 unencoded variants, we always - * switch to server-driven negotiation, regardless of whether - * we are contacted by a client capable of transparent - * negotiation. We do this because our current TCN - * implementation does not deal well with the case of having 0 - * or 1 unencoded variants. - */ - if (unencoded_variants < 2) - neg->is_transparent = 0; } if (neg->is_transparent) {