/* http.c * HTTP protocol client implementation * (c) 2002 Mikulas Patocka * This file is a part of the Links program, released under GPL. */ #include "links.h" struct http_connection_info { int bl_flags; int http10; int https_forward; int close; int send_close; off_t length; int version; int chunk_remaining; }; /* prototypes */ static void http_send_header(struct connection *c); static void http_get_header(struct connection *c); static void test_restart(struct connection *c); static void add_user_agent(unsigned char **hdr, int *l, unsigned char *url); static void add_referer(unsigned char **hdr, int *l, unsigned char *url, unsigned char *prev_url); static void add_accept(unsigned char **hdr, int *l, struct connection *c); static void add_accept_language(unsigned char **hdr, int *l, struct http_connection_info *info); static void add_accept_encoding(unsigned char **hdr, int *l, unsigned char *url, struct connection *c); static void add_accept_charset(unsigned char **hdr, int *l, struct http_connection_info *info); static void add_connection(unsigned char **hdr, int *l, int http10, int proxy, int post); static void add_upgrade(unsigned char **hdr, int *l); static void add_dnt(unsigned char **hdr, int *l); static void add_if_modified(unsigned char **hdr, int *l, struct connection *c); static void add_range(unsigned char **hdr, int *l, unsigned char *url, struct connection *c); static void add_pragma_no_cache(unsigned char **hdr, int *l, int no_cache); static void add_proxy_auth_string(unsigned char **hdr, int *l, unsigned char *url); static void add_auth_string(unsigned char **hdr, int *l, unsigned char *url); static void add_post_header(unsigned char **hdr, int *l, unsigned char **post); static void add_extra_options(unsigned char **hdr, int *l); /* Returns a string pointer with value of the item. * The string must be destroyed after usage with mem_free. */ unsigned char *parse_http_header(unsigned char *head, unsigned char *item, unsigned char **ptr) { unsigned char *i, *f, *g, *h; if (!head) return NULL; for (f = head; *f; f++) { if (*f != 10) continue; f++; for (i = item; *i && *f; i++, f++) if (upcase(*i) != upcase(*f)) goto cont; if (!*f) break; if (f[0] == ':') { while (f[1] == ' ') f++; for (g = ++f; *g >= ' '; g++) ; while (g > f && g[-1] == ' ') g--; h = memacpy(f, g - f); if (ptr) { *ptr = f; } return h; } cont: f--; } return NULL; } unsigned char *parse_header_param(unsigned char *x, unsigned char *e, int all) { unsigned char u; size_t le = strlen(cast_const_char e); int lp; unsigned char *y = x; if (!all) { a: if (!(y = cast_uchar strchr(cast_const_char y, ';'))) return NULL; } while (*y && (*y == ';' || *y <= ' ')) y++; if (strlen(cast_const_char y) < le) return NULL; if (casecmp(y, e, le)) goto a; y += le; while (*y && (*y <= ' ' || *y == '=')) y++; u = ';'; if (*y == '\'' || *y == '"') u = *y++; lp = 0; while (y[lp] >= ' ' && y[lp] != u) { lp++; if (lp == MAXINT) overalloc(); } return memacpy(y, lp); } int get_http_code(unsigned char *head, int *code, int *version) { if (!head) return -1; while (head[0] == ' ') head++; if (upcase(head[0]) != 'H' || upcase(head[1]) != 'T' || upcase(head[2]) != 'T' || upcase(head[3]) != 'P') return -1; if (head[4] == '/' && head[5] >= '0' && head[5] <= '9' && head[6] == '.' && head[7] >= '0' && head[7] <= '9' && head[8] <= ' ') { if (version) *version = (head[5] - '0') * 10 + head[7] - '0'; } else if (version) *version = 0; for (head += 4; *head > ' '; head++) ; if (*head++ != ' ') return -1; if (head[0] < '1' || head [0] > '9' || head[1] < '0' || head[1] > '9' || head[2] < '0' || head [2] > '9') { if (code) *code = 200; return 0; } if (code) *code = (head[0]-'0')*100 + (head[1]-'0')*10 + head[2]-'0'; return 0; } static const unsigned char *months[12] = { cast_uchar "Jan", cast_uchar "Feb", cast_uchar "Mar", cast_uchar "Apr", cast_uchar "May", cast_uchar "Jun", cast_uchar "Jul", cast_uchar "Aug", cast_uchar "Sep", cast_uchar "Oct", cast_uchar "Nov", cast_uchar "Dec" }; #ifdef HAVE_GMTIME static const unsigned char *days[7] = { cast_uchar "Sun", cast_uchar "Mon", cast_uchar "Tue", cast_uchar "Wed", cast_uchar "Thu", cast_uchar "Fri", cast_uchar "Sat", }; #endif time_t parse_http_date(unsigned char *date) /* this functions is bad !!! */ { #ifdef HAVE_MKTIME time_t t = 0; time_t offset; /* Mon, 03 Jan 2000 21:29:33 GMT */ int y; struct tm tm; memset(&tm, 0, sizeof(struct tm)); date = cast_uchar strchr(cast_const_char date, ' '); if (!date) return (time_t)-1; date++; if (*date >= '0' && *date <= '9') { /* Sun, 06 Nov 1994 08:49:37 GMT */ /* Sunday, 06-Nov-94 08:49:37 GMT */ y = 0; if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; tm.tm_mday = (date[0] - '0') * 10 + date[1] - '0'; date += 2; if (*date != ' ' && *date != '-') return (time_t)-1; date += 1; for (tm.tm_mon = 0; tm.tm_mon < 12; tm.tm_mon++) if (!casecmp(date, months[tm.tm_mon], 3)) goto f1; return (time_t)-1; f1: date += 3; if (*date != '-' && *date != ' ') return (time_t)-1; date++; if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; if (date[2] == ' ') { /* Sunday, 06-Nov-94 08:49:37 GMT */ if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; tm.tm_year = (date[0] >= '7' ? 1900 : 2000) + (date[0] - '0') * 10 + date[1] - '0' - 1900; date += 2; } else if (date[2] >= '0' && date[2] <= '9' && date[3] >= '0' && date[3] <= '9') { /* Sun, 06 Nov 1994 08:49:37 GMT */ if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; if (date[2] < '0' || date[2] > '9') return (time_t)-1; if (date[3] < '0' || date[3] > '9') return (time_t)-1; tm.tm_year = (date[0] - '0') * 1000 + (date[1] - '0') * 100 + (date[2] - '0') * 10 + date[3] - '0' - 1900; date += 4; } else return (time_t)-1; if (*date != ' ') return (time_t)-1; date++; } else { /* Sun Nov 6 08:49:37 1994 */ y = 1; for (tm.tm_mon = 0; tm.tm_mon < 12; tm.tm_mon++) if (!casecmp(date, months[tm.tm_mon], 3)) goto f2; return (time_t)-1; f2: date += 3; while (*date == ' ') date++; if (date[0] < '0' || date[0] > '9') return (time_t)-1; tm.tm_mday = date[0] - '0'; date++; if (*date != ' ') { if (date[0] < '0' || date[0] > '9') return (time_t)-1; tm.tm_mday = tm.tm_mday * 10 + date[0] - '0'; date++; } if (*date != ' ') return (time_t)-1; date++; } if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; tm.tm_hour = (date[0] - '0') * 10 + date[1] - '0'; date += 2; if (*date != ':') return (time_t)-1; date++; if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; tm.tm_min = (date[0] - '0') * 10 + date[1] - '0'; date += 2; if (*date != ':') return (time_t)-1; date++; if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; tm.tm_sec = (date[0] - '0') * 10 + date[1] - '0'; date += 2; if (y) { if (*date != ' ') return (time_t)-1; date++; if (date[0] < '0' || date[0] > '9') return (time_t)-1; if (date[1] < '0' || date[1] > '9') return (time_t)-1; if (date[2] < '0' || date[2] > '9') return (time_t)-1; if (date[3] < '0' || date[3] > '9') return (time_t)-1; tm.tm_year = (date[0] - '0') * 1000 + (date[1] - '0') * 100 + (date[2] - '0') * 10 + date[3] - '0' - 1900; date += 4; } if (*date != ' ' && *date) return (time_t)-1; /*debug("%02d:%02d:%02d", tm.tm_hour, tm.tm_min, tm.tm_sec);*/ #if defined(HAVE_TIMEGM) && !defined(HAIKU) t = timegm(&tm); if (t == (time_t)-1) return (time_t)-1; offset = 0; #else t = mktime(&tm); if (t == (time_t)-1) return (time_t)-1; memset(&tm, 0, sizeof(struct tm)); tm.tm_year = 80; tm.tm_mday = 1; offset = 315532800 - mktime(&tm); #endif return (time_t)((uttime)t + (uttime)offset); #else return (time_t)-1; #endif } unsigned char *print_http_date(time_t date) { static unsigned char str[64]; #ifdef HAVE_GMTIME struct tm *tm = gmtime(&date); snprintf(cast_char str, sizeof str, "%s %s %2d %02d:%02d:%02d %04d", days[tm->tm_wday], months[tm->tm_mon], tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec, tm->tm_year + 1900); #else snprintf(cast_char str, sizeof str, "%.0f", (double)date); #endif return str; } static struct { char *name; int bugs; } buggy_servers[] = { { "mod_czech/3.1.0", BL_HTTP10 }, { "Purveyor", BL_HTTP10 }, { "Netscape-Enterprise", BL_HTTP10 | BL_NO_ACCEPT_LANGUAGE }, { "Apache Coyote", BL_HTTP10 }, { "lighttpd", BL_HTTP10 }, { "FORPSI", BL_NO_RANGE }, { "Sausalito", BL_HTTP10 }, { NULL, 0 } }; static int check_http_server_bugs(unsigned char *url, struct http_connection_info *info, unsigned char *head) { unsigned char *server; int i, bugs; if (!http_options.allow_blacklist || info->http10) return 0; if (!(server = parse_http_header(head, cast_uchar "Server", NULL))) return 0; bugs = 0; for (i = 0; buggy_servers[i].name; i++) if (strstr(cast_const_char server, cast_const_char buggy_servers[i].name)) bugs |= buggy_servers[i].bugs; mem_free(server); if (bugs && (server = get_host_name(url))) { add_blacklist_entry(server, bugs); mem_free(server); return bugs & ~BL_NO_RANGE; } return 0; } static void http_end_request(struct connection *c, int notrunc, int nokeepalive, int state) { struct http_connection_info *info = c->info; if (state == S__OK) { if (c->cache) { if (!notrunc) truncate_entry(c->cache, c->from, 1); c->cache->incomplete = 0; } } setcstate(c, state); if (c->info && !info->close && !info->send_close && !nokeepalive) { add_keepalive_socket(c, HTTP_KEEPALIVE_TIMEOUT, 0); } else { abort_connection(c); } } void http_func(struct connection *c) { /*setcstate(c, S_CONN);*/ /*set_connection_timeout(c);*/ if (get_keepalive_socket(c, NULL)) { int p; if ((p = get_port(c->url)) == -1) { setcstate(c, S_BAD_URL); abort_connection(c); return; } make_connection(c, p, &c->sock1, http_send_header); } else { http_send_header(c); } } void proxy_func(struct connection *c) { http_func(c); } static void add_url_to_str(unsigned char **str, int *l, unsigned char *url) { unsigned char *sp; for (sp = url; *sp && *sp != POST_CHAR; sp++) { if (*sp <= ' ' || *sp >= 127) { unsigned char esc[4]; sprintf(cast_char esc, "%%%02X", (int)*sp); add_to_str(str, l, esc); } else { add_chr_to_str(str, l, *sp); } } } static void http_send_header(struct connection *c) { struct http_connection_info *info; int http10 = http_options.http10; int proxy; unsigned char *hdr; unsigned char *h, *u; unsigned char *u2; int l = 0; unsigned char *post = NULL; unsigned char *host; if (!c->cache) { if (!find_in_cache(c->url, &c->cache)) c->cache->refcount--; } proxy = is_proxy_url(c->url); host = remove_proxy_prefix(c->url); set_connection_timeout_keepal(c); info = mem_calloc(sizeof(struct http_connection_info)); c->info = info; #ifdef HAVE_SSL info->https_forward = !c->ssl && proxy && host && !casecmp(host, cast_uchar "https://", 8); if (c->ssl) proxy = 0; #endif hdr = init_str(); if (!host) { http_bad_url: mem_free(hdr); http_end_request(c, 0, 1, S_BAD_URL); return; } if (!info->https_forward && (h = get_host_name(host))) { info->bl_flags = get_blacklist_flags(h); mem_free(h); } if (info->bl_flags & BL_HTTP10) http10 = 1; info->http10 = http10; if (!info->https_forward) { post = cast_uchar strchr(cast_const_char host, POST_CHAR); if (post) post++; } info->send_close = info->https_forward || http10; #ifdef HAVE_SSL if (info->https_forward) { add_to_str(&hdr, &l, cast_uchar "CONNECT "); h = get_host_name(host); if (!h) goto http_bad_url; add_to_str(&hdr, &l, h); mem_free(h); h = get_port_str(host); if (!h) h = stracpy(cast_uchar "443"); add_chr_to_str(&hdr, &l, ':'); add_to_str(&hdr, &l, h); mem_free(h); goto added_connect; } else #endif if (!post) { add_to_str(&hdr, &l, cast_uchar "GET "); } else { add_to_str(&hdr, &l, cast_uchar "POST "); c->unrestartable = 2; } if (!proxy) { add_chr_to_str(&hdr, &l, '/'); u = get_url_data(host); } else { u = host; } if (post && post < u) { goto http_bad_url; } u2 = u; if (proxy && !*c->socks_proxy && *proxies.dns_append) { unsigned char *u_host; int u_host_len; int u2_len = 0; if (parse_url(u, NULL, NULL, NULL, NULL, NULL, &u_host, &u_host_len, NULL, NULL, NULL, NULL, NULL)) goto http_bad_url; u2 = init_str(); add_bytes_to_str(&u2, &u2_len, u, u_host + u_host_len - u); add_to_str(&u2, &u2_len, proxies.dns_append); add_to_str(&u2, &u2_len, u_host + u_host_len); } add_url_to_str(&hdr, &l, u2); if (u2 != u) mem_free(u2); #ifdef HAVE_SSL added_connect: #endif if (!http10) add_to_str(&hdr, &l, cast_uchar " HTTP/1.1\r\n"); else add_to_str(&hdr, &l, cast_uchar " HTTP/1.0\r\n"); if (!info->https_forward && (h = get_host_name(host))) { add_to_str(&hdr, &l, cast_uchar "Host: "); if (*h && h[strlen(cast_const_char h) - 1] == '.') { h[strlen(cast_const_char h) - 1] = 0; } if (h[0] == '[' && h[strlen(cast_const_char h) - 1] == ']') { unsigned char *pc = cast_uchar strchr(cast_const_char h, '%'); if (pc) { pc[0] = ']'; pc[1] = 0; } } add_to_str(&hdr, &l, h); mem_free(h); if ((h = get_port_str(host))) { unsigned char *default_port = cast_uchar "80"; #ifdef HAVE_SSL if (c->ssl) default_port = cast_uchar "443"; #endif if (strcmp(cast_const_char h, cast_const_char default_port)) { add_chr_to_str(&hdr, &l, ':'); add_to_str(&hdr, &l, h); } mem_free(h); } add_to_str(&hdr, &l, cast_uchar "\r\n"); } add_user_agent(&hdr, &l, info->https_forward ? NULL : host); if (proxy) add_proxy_auth_string(&hdr, &l, c->url); if (!info->https_forward) { test_restart(c); if (!c->doh) { add_referer(&hdr, &l, host, c->prev_url); } add_accept(&hdr, &l, c); if (!c->doh) { add_accept_language(&hdr, &l, info); add_accept_encoding(&hdr, &l, host, c); add_accept_charset(&hdr, &l, info); } add_dnt(&hdr, &l); add_connection(&hdr, &l, http10, proxy, !info->send_close); if (!c->doh) { add_upgrade(&hdr, &l); add_if_modified(&hdr, &l, c); add_range(&hdr, &l, host, c); add_pragma_no_cache(&hdr, &l, c->no_cache); add_auth_string(&hdr, &l, host); } add_post_header(&hdr, &l, &post); if (!c->doh) { add_cookies(&hdr, &l, host); } add_extra_options(&hdr, &l); } add_to_str(&hdr, &l, cast_uchar "\r\n"); if (post) { while (post[0] && post[1]) { int h1, h2; h1 = post[0] <= '9' ? (unsigned)post[0] - '0' : post[0] >= 'A' ? upcase(post[0]) - 'A' + 10 : 0; if (h1 < 0 || h1 >= 16) h1 = 0; h2 = post[1] <= '9' ? (unsigned)post[1] - '0' : post[1] >= 'A' ? upcase(post[1]) - 'A' + 10 : 0; if (h2 < 0 || h2 >= 16) h2 = 0; add_chr_to_str(&hdr, &l, h1 * 16 + h2); post += 2; } } write_to_socket(c, c->sock1, hdr, l, http_get_header); mem_free(hdr); setcstate(c, S_SENT); } static void test_restart(struct connection *c) { /* If the cached entity is compressed, request the whole file and turn off compression */ if (c->cache && c->from) { unsigned char *d; if ((d = parse_http_header(c->cache->head, cast_uchar "Content-Encoding", NULL))) { mem_free(d); c->from = 0; c->no_compress = 1; #ifdef HAVE_ANY_COMPRESSION if (c->tries >= 1) { unsigned char *h; if ((h = get_host_name(c->url))) { add_blacklist_entry(h, BL_NO_COMPRESSION); mem_free(h); } } #endif } } } static void add_user_agent(unsigned char **hdr, int *l, unsigned char *url) { add_to_str(hdr, l, cast_uchar "User-Agent: "); if (SCRUB_HEADERS) { add_to_str(hdr, l, cast_uchar "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0\r\n"); } else if (!(*http_options.header.fake_useragent)) { /* * Google started to return css-styled page for searches. * It returns non-css page if the user agent begins with Lynx. */ #if 0 if (url && (casestrstr(url, cast_uchar "/www.google.") || casestrstr(url, cast_uchar "/google.")) && strstr(cast_const_char url, "/search?") && (strstr(cast_const_char url, "?q=") || strstr(cast_const_char url, "&q=")) && !strstr(cast_const_char url, "?tbm=isch") && !strstr(cast_const_char url, "&tbm=isch") ) add_to_str(hdr, l, cast_uchar("Lynx/")); #endif add_to_str(hdr, l, cast_uchar("Links (" VERSION_STRING "; ")); add_to_str(hdr, l, system_name); add_to_str(hdr, l, cast_uchar "; "); add_to_str(hdr, l, compiler_name); add_to_str(hdr, l, cast_uchar "; "); if (!F && !list_empty(terminals)) { struct terminal *term; struct list_head *lterm; unsigned char *t = cast_uchar "text"; foreach(struct terminal, term, lterm, terminals) if (term->spec->braille) t = cast_uchar "braille"; add_to_str(hdr, l, t); } #ifdef G else if (F && drv) { add_to_str(hdr, l, drv->name); } #endif else { add_to_str(hdr, l, cast_uchar "dump"); } add_to_str(hdr, l, cast_uchar ")\r\n"); } else { add_to_str(hdr, l, http_options.header.fake_useragent); add_to_str(hdr, l, cast_uchar "\r\n"); } } static void add_referer(unsigned char **hdr, int *l, unsigned char *url, unsigned char *prev_url) { switch (http_options.header.referer) { case REFERER_FAKE: { add_to_str(hdr, l, cast_uchar "Referer: "); add_to_str(hdr, l, http_options.header.fake_referer); add_to_str(hdr, l, cast_uchar "\r\n"); break; } case REFERER_SAME_URL: { add_to_str(hdr, l, cast_uchar "Referer: "); add_url_to_str(hdr, l, url); add_to_str(hdr, l, cast_uchar "\r\n"); break; } case REFERER_REAL_SAME_SERVER: { unsigned char *h, *j; int brk = 1; if ((h = get_host_name(url))) { if ((j = get_host_name(prev_url))) { if (!casestrcmp(h, j)) brk = 0; else if (!casestrcmp(h, cast_uchar "imageproxy.jxs.cz")) { int l = (int)strlen(cast_const_char j); int q = (int)strlen(".blog.cz"); if (l > q && !casestrcmp((j + l - q), cast_uchar ".blog.cz")) brk = 0; else if (!casestrcmp(j, cast_uchar "blog.cz")) brk = 0; } else if (!casestrcmp(h, cast_uchar "www.google.com")) { unsigned char *c = get_url_data(url); if (c && !strncmp(cast_const_char c, "recaptcha/api/", 14)) brk = 0; } mem_free(j); } mem_free(h); } if (brk) break; } /*-fallthrough*/ case REFERER_REAL: { unsigned char *ref; unsigned char *user, *ins; int ulen; if (!prev_url) break; /* no referrer */ ref = stracpy(prev_url); if (!parse_url(ref, NULL, &user, &ulen, NULL, NULL, &ins, NULL, NULL, NULL, NULL, NULL, NULL) && ulen && ins) { memmove(user, ins, strlen(cast_const_char ins) + 1); } add_to_str(hdr, l, cast_uchar "Referer: "); add_url_to_str(hdr, l, ref); add_to_str(hdr, l, cast_uchar "\r\n"); mem_free(ref); break; } } } static void add_accept(unsigned char **hdr, int *l, struct connection *c) { if (c->doh) { add_to_str(hdr, l, cast_uchar "Accept: application/dns-message\r\n"); } else if (SCRUB_HEADERS) { add_to_str(hdr, l, cast_uchar "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8\r\n"); } else { add_to_str(hdr, l, cast_uchar "Accept: */*\r\n"); } } static void add_accept_language(unsigned char **hdr, int *l, struct http_connection_info *info) { if (!(info->bl_flags & BL_NO_ACCEPT_LANGUAGE)) { add_to_str(hdr, l, cast_uchar "Accept-Language: "); if (SCRUB_HEADERS) { add_to_str(hdr, l, cast_uchar "en-US,en;q=0.5\r\n"); } else { int la; la = *l; add_to_str(hdr, l, get_text_translation(TEXT_(T__ACCEPT_LANGUAGE), NULL)); add_chr_to_str(hdr, l, ','); if (!strstr(cast_const_char(*hdr + la), "en,") && !strstr(cast_const_char(*hdr + la), "en;")) add_to_str(hdr, l, cast_uchar "en;q=0.2,"); add_to_str(hdr, l, cast_uchar "*;q=0.1\r\n"); } } } #ifdef HAVE_ANY_COMPRESSION static int advertise_compression(unsigned char *url, struct connection *c) { struct http_connection_info *info = c->info; unsigned char *extd; if (c->no_compress || http_options.no_compression || info->bl_flags & BL_NO_COMPRESSION) return 0; /* Fix for bugzilla. The attachment may be compressed and if the server compresses it again, we can't decompress the inner compression */ if (strstr(cast_const_char url, "/attachment.cgi?")) return 0; extd = cast_uchar strrchr(cast_const_char url, '.'); if (extd && get_compress_by_extension(extd + 1, cast_uchar strchr(cast_const_char(extd + 1), 0))) return 0; return 1; } #endif static void add_accept_encoding(unsigned char **hdr, int *l, unsigned char *url, struct connection *c) { #if defined(HAVE_ANY_COMPRESSION) #define info ((struct http_connection_info *)c->info) if (advertise_compression(url, c)) { int orig_l = *l; int l1; add_to_str(hdr, l, cast_uchar "Accept-Encoding: "); l1 = *l; #if defined(HAVE_ZLIB) if (*l != l1) add_to_str(hdr, l, cast_uchar ", "); add_to_str(hdr, l, cast_uchar "gzip, deflate"); #endif #if defined(HAVE_BROTLI) if ((!SCRUB_HEADERS #ifdef HAVE_SSL || c->ssl #endif ) && !(info->bl_flags & BL_NO_BZIP2)) { if (*l != l1) add_to_str(hdr, l, cast_uchar ", "); add_to_str(hdr, l, cast_uchar "br"); } #endif #if defined(HAVE_ZSTD) if (!SCRUB_HEADERS && !(info->bl_flags & BL_NO_BZIP2)) { if (*l != l1) add_to_str(hdr, l, cast_uchar ", "); add_to_str(hdr, l, cast_uchar "zstd"); } #endif #if defined(HAVE_BZIP2) if (!SCRUB_HEADERS && !(info->bl_flags & BL_NO_BZIP2)) { if (*l != l1) add_to_str(hdr, l, cast_uchar ", "); add_to_str(hdr, l, cast_uchar "bzip2"); } #endif /* LZMA on DOS often fails with out of memory, don't announce it */ #if defined(HAVE_LZMA) && !defined(DOS) if (!SCRUB_HEADERS && !(info->bl_flags & BL_NO_BZIP2)) { if (*l != l1) add_to_str(hdr, l, cast_uchar ", "); add_to_str(hdr, l, cast_uchar "lzma, lzma2"); } #endif #if defined(HAVE_LZIP) if (!SCRUB_HEADERS && !(info->bl_flags & BL_NO_BZIP2)) { if (*l != l1) add_to_str(hdr, l, cast_uchar ", "); add_to_str(hdr, l, cast_uchar "lzip"); } #endif if (*l != l1) add_to_str(hdr, l, cast_uchar "\r\n"); else *l = orig_l; } #undef info #endif } static void add_accept_charset(unsigned char **hdr, int *l, struct http_connection_info *info) { static unsigned char *accept_charset = NULL; if (SCRUB_HEADERS || info->bl_flags & BL_NO_CHARSET || http_options.no_accept_charset) return; if (!accept_charset) { int i; unsigned char *cs, *ac; int aclen = 0; ac = init_str(); for (i = 0; (cs = get_cp_mime_name(i)); i++) { if (aclen) add_chr_to_str(&ac, &aclen, ','); else add_to_str(&ac, &aclen, cast_uchar "Accept-Charset: "); add_to_str(&ac, &aclen, cs); } if (aclen) add_to_str(&ac, &aclen, cast_uchar "\r\n"); if (!(accept_charset = cast_uchar strdup(cast_const_char ac))) { add_to_str(hdr, l, ac); mem_free(ac); return; } mem_free(ac); } add_to_str(hdr, l, accept_charset); } static void add_dnt(unsigned char **hdr, int *l) { if (http_options.header.do_not_track) add_to_str(hdr, l, cast_uchar "DNT: 1\r\n"); } static void add_connection(unsigned char **hdr, int *l, int http10, int proxy, int alive) { if (!http10) { if (!proxy) add_to_str(hdr, l, cast_uchar "Connection: "); else add_to_str(hdr, l, cast_uchar "Proxy-Connection: "); if (alive) add_to_str(hdr, l, cast_uchar "keep-alive\r\n"); else add_to_str(hdr, l, cast_uchar "close\r\n"); } } static void add_upgrade(unsigned char **hdr, int *l) { if (proxies.only_proxies) { add_to_str(hdr, l, cast_uchar "Upgrade-Insecure-Requests: 1\r\n"); } } static void add_if_modified(unsigned char **hdr, int *l, struct connection *c) { struct cache_entry *e; if ((e = c->cache)) { int code = 0; /* against warning */ if (get_http_code(e->head, &code, NULL) || code >= 400) goto skip_ifmod; if (!e->incomplete && e->head && c->no_cache <= NC_IF_MOD) { unsigned char *m; if (e->last_modified) m = stracpy(e->last_modified); else if ((m = parse_http_header(e->head, cast_uchar "Date", NULL))) ; else if ((m = parse_http_header(e->head, cast_uchar "Expires", NULL))) ; else goto skip_ifmod; add_to_str(hdr, l, cast_uchar "If-Modified-Since: "); add_to_str(hdr, l, m); add_to_str(hdr, l, cast_uchar "\r\n"); mem_free(m); } skip_ifmod:; } } static void add_range(unsigned char **hdr, int *l, unsigned char *url, struct connection *c) { struct cache_entry *e; struct http_connection_info *info = c->info; if ((e = c->cache)) { int code = 0; /* against warning */ if (!get_http_code(e->head, &code, NULL) && code >= 300) return; } if (c->from /*&& (c->est_length == -1 || c->from < c->est_length)*/ && c->no_cache < NC_IF_MOD && !(info->bl_flags & BL_NO_RANGE)) { add_to_str(hdr, l, cast_uchar "Range: bytes="); add_num_to_str(hdr, l, c->from); add_to_str(hdr, l, cast_uchar "-\r\n"); } } static void add_pragma_no_cache(unsigned char **hdr, int *l, int no_cache) { if (no_cache >= NC_PR_NO_CACHE) add_to_str(hdr, l, cast_uchar "Pragma: no-cache\r\nCache-Control: no-cache\r\n"); } static void add_proxy_auth_string(unsigned char **hdr, int *l, unsigned char *url) { unsigned char *h; if ((h = get_auth_string(url, 1))) { add_to_str(hdr, l, h); mem_free(h); } } static void add_auth_string(unsigned char **hdr, int *l, unsigned char *url) { unsigned char *h; if ((h = get_auth_string(url, 0))) { add_to_str(hdr, l, h); mem_free(h); } } static void add_post_header(unsigned char **hdr, int *l, unsigned char **post) { if (*post) { unsigned char *pd = cast_uchar strchr(cast_const_char *post, '\n'); if (pd) { add_to_str(hdr, l, cast_uchar "Content-Type: "); add_bytes_to_str(hdr, l, *post, pd - *post); add_to_str(hdr, l, cast_uchar "\r\n"); *post = pd + 1; } add_to_str(hdr, l, cast_uchar "Content-Length: "); add_num_to_str(hdr, l, strlen(cast_const_char *post) / 2); add_to_str(hdr, l, cast_uchar "\r\n"); } } static void add_extra_options(unsigned char **hdr, int *l) { unsigned char *p = http_options.header.extra_header; while (1) { unsigned char *q = p + strcspn(cast_const_char p, "\\"); if (p != q) { unsigned char *c; unsigned char *s = memacpy(p, q - p); c = cast_uchar strchr(cast_const_char s, ':'); if (c && casecmp(s, cast_uchar "Cookie:", 7)) { unsigned char *v = NULL; /* against warning */ unsigned char *cc = memacpy(s, c - s); unsigned char *x = parse_http_header(*hdr, cc, &v); mem_free(cc); if (x) { unsigned char *new_hdr; int new_l; mem_free(x); new_hdr = init_str(); new_l = 0; add_bytes_to_str(&new_hdr, &new_l, *hdr, v - *hdr); while (*++c == ' ') ; add_to_str(&new_hdr, &new_l, c); add_to_str(&new_hdr, &new_l, v + strcspn(cast_const_char v, "\r\n")); mem_free(*hdr); *hdr = new_hdr; *l = new_l; goto already_added; } } add_to_str(hdr, l, s); add_to_str(hdr, l, cast_uchar "\r\n"); already_added: mem_free(s); } if (!*q) break; p = q + 1; } } static int is_line_in_buffer(struct read_buffer *rb) { int l; for (l = 0; l < rb->len; l++) { if (rb->data[l] == 10) return l + 1; if (l < rb->len - 1 && rb->data[l] == 13 && rb->data[l + 1] == 10) return l + 2; if (l == rb->len - 1 && rb->data[l] == 13) return 0; if (rb->data[l] < ' ') return -1; } return 0; } static void read_http_data(struct connection *c, struct read_buffer *rb) { struct http_connection_info *info = c->info; int a; set_connection_timeout(c); if (rb->close == 2) { http_end_request(c, 0, 0, S__OK); return; } if (info->length != -2) { int l = rb->len; if (info->length >= 0 && info->length < l) l = (int)info->length; if ((off_t)(0UL + c->from + l) < 0) { setcstate(c, S_LARGE_FILE); abort_connection(c); return; } c->received += l; a = add_fragment(c->cache, c->from, rb->data, l); if (a < 0) { setcstate(c, a); abort_connection(c); return; } if (a == 1) c->tries = 0; if (info->length >= 0) info->length -= l; c->from += l; kill_buffer_data(rb, l); if (!info->length) { http_end_request(c, 0, 0, S__OK); return; } } else { next_chunk: if (info->chunk_remaining == -2) { int l; if ((l = is_line_in_buffer(rb))) { if (l == -1) { setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } kill_buffer_data(rb, l); if (l <= 2) { http_end_request(c, 0, 0, S__OK); return; } goto next_chunk; } } else if (info->chunk_remaining == -1) { int l; if ((l = is_line_in_buffer(rb))) { char *end; long n = 0; /* warning, go away */ if (l != -1) n = strtol(cast_const_char rb->data, &end, 16); if (l == -1 || n < 0 || n >= MAXINT || cast_uchar end == rb->data) { setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } kill_buffer_data(rb, l); if (!(info->chunk_remaining = (int)n)) info->chunk_remaining = -2; goto next_chunk; } } else { int l = info->chunk_remaining; if (l > rb->len) l = rb->len; if ((off_t)(0UL + c->from + l) < 0) { setcstate(c, S_LARGE_FILE); abort_connection(c); return; } c->received += l; a = add_fragment(c->cache, c->from, rb->data, l); if (a < 0) { setcstate(c, a); abort_connection(c); return; } if (a == 1) c->tries = 0; info->chunk_remaining -= l; c->from += l; kill_buffer_data(rb, l); if (!info->chunk_remaining && rb->len >= 1) { if (rb->data[0] == 10) kill_buffer_data(rb, 1); else { if (rb->data[0] != 13 || (rb->len >= 2 && ((unsigned char *)rb->data)[1] != 10)) { setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } if (rb->len < 2) goto read_more; kill_buffer_data(rb, 2); } info->chunk_remaining = -1; goto next_chunk; } } } read_more: read_from_socket(c, c->sock1, rb, read_http_data); setcstate(c, S_TRANS); } static int get_header(struct read_buffer *rb) { int i; if (rb->len <= 0) return 0; if (rb->data[0] != 'H') return -2; if (rb->len <= 1) return 0; if (((unsigned char *)rb->data)[1] != 'T') return -2; if (rb->len <= 2) return 0; if (((unsigned char *)rb->data)[2] != 'T') return -2; if (rb->len <= 3) return 0; if (((unsigned char *)rb->data)[3] != 'P') return -2; for (i = 0; i < rb->len; i++) { unsigned char a = rb->data[i]; if (/*a < ' ' && a != 10 && a != 13*/ !a) return -1; if (i < rb->len - 1 && a == 10 && rb->data[i + 1] == 10) return i + 2; if (i < rb->len - 3 && a == 13) { if (rb->data[i + 1] != 10) return -1; if (rb->data[i + 2] == 13) { if (rb->data[i + 3] != 10) return -1; return i + 4; } } } return 0; } static void http_got_header(struct connection *c, struct read_buffer *rb) { off_t cf; int state = c->state != S_PROC ? S_GETH : S_PROC; unsigned char *head; int a, h = 0, version = 0; /* against warning */ unsigned char *d; struct cache_entry *e; int previous_http_code; struct http_connection_info *info; unsigned char *host = remove_proxy_prefix(c->url); set_connection_timeout(c); info = c->info; if (rb->close == 2) { unsigned char *hs; if (!c->tries && (hs = get_host_name(host))) { if (info->bl_flags & BL_NO_CHARSET) { del_blacklist_entry(hs, BL_NO_CHARSET); } else { add_blacklist_entry(hs, BL_NO_CHARSET); c->tries = -1; } mem_free(hs); } setcstate(c, S_CANT_READ); retry_connection(c); return; } rb->close = 0; again: if ((a = get_header(rb)) == -1) { setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } if (!a) { read_from_socket(c, c->sock1, rb, http_got_header); setcstate(c, state); return; } if (a != -2) { head = memacpy(rb->data, a); kill_buffer_data(rb, a); } else { head = stracpy(cast_uchar "HTTP/0.9 200 OK\r\nContent-Type: text/html\r\n\r\n"); } if (get_http_code(head, &h, &version) || h == 101) { mem_free(head); setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } if (check_http_server_bugs(host, c->info, head) && is_connection_restartable(c)) { mem_free(head); setcstate(c, S_RESTART); retry_connection(c); return; } if (h == 100) { mem_free(head); state = S_PROC; goto again; } if (h < 200) { mem_free(head); setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } #ifdef HAVE_SSL if (info->https_forward && h >= 200 && h < 300) { mem_free(head); mem_free(c->info); c->info = 0; c->ssl = DUMMY; continue_connection(c, &c->sock1, http_send_header); return; } if (info->https_forward && h != 407) { mem_free(head); setcstate(c, S_HTTPS_FWD_ERROR); abort_connection(c); return; } #endif if (h != 401 && h != 407 && !c->doh) { unsigned char *cookie; unsigned char *ch = head; while ((cookie = parse_http_header(ch, cast_uchar "Set-Cookie", &ch))) { set_cookie(host, cookie); mem_free(cookie); } } if (h == 204) { mem_free(head); http_end_request(c, 0, 0, S_HTTP_204); return; } if (h == 304) { mem_free(head); http_end_request(c, 1, 0, S__OK); return; } if (h == 416 && c->from) { mem_free(head); http_end_request(c, 0, 1, S__OK); return; } if (h == 431) { unsigned char *hs; if (!(info->bl_flags & BL_NO_CHARSET) && (hs = get_host_name(host))) { mem_free(head); add_blacklist_entry(hs, BL_NO_CHARSET); mem_free(hs); c->tries = -1; setcstate(c, S_RESTART); retry_connection(c); return; } } if ((h == 500 || h == 502 || h == 503 || h == 504) && http_options.retry_internal_errors && is_connection_restartable(c)) { /* !!! FIXME: wait some time ... */ if (is_last_try(c)) { unsigned char *h; if ((h = get_host_name(host))) { add_blacklist_entry(h, BL_NO_BZIP2); mem_free(h); } } mem_free(head); setcstate(c, S_RESTART); retry_connection(c); return; } if (!c->cache) { if (get_connection_cache_entry(c)) { mem_free(head); setcstate(c, S_OUT_OF_MEM); abort_connection(c); return; } c->cache->refcount--; } e = c->cache; previous_http_code = e->http_code; e->http_code = h; if (e->head) mem_free(e->head); e->head = head; if (c->doh) e->expire_time = 1; if ((d = parse_http_header(head, cast_uchar "Expires", NULL))) { time_t t = parse_http_date(d); if (t != (time_t)-1 && e->expire_time != 1) e->expire_time = t; mem_free(d); } if ((d = parse_http_header(head, cast_uchar "Pragma", NULL))) { if (!casecmp(d, cast_uchar "no-cache", 8)) e->expire_time = 1; mem_free(d); } if ((d = parse_http_header(head, cast_uchar "Cache-Control", NULL))) { unsigned char *f = d; while (1) { while (*f && (*f == ' ' || *f == ',')) f++; if (!*f) break; if (!casecmp(f, cast_uchar "no-cache", 8) || !casecmp(f, cast_uchar "must-revalidate", 15)) { e->expire_time = 1; } if (!casecmp(f, cast_uchar "max-age=", 8)) { if (e->expire_time != 1) { e->expire_time = get_absolute_seconds(); e->expire_time = (time_t)((uttime)e->expire_time + (uttime)atoi(cast_const_char(f + 8))); } } while (*f && *f != ',') f++; } mem_free(d); } #ifdef HAVE_SSL if (c->ssl) { if (e->ssl_info) mem_free(e->ssl_info); e->ssl_info = get_cipher_string(c->ssl); if (e->ssl_authority) mem_free(e->ssl_authority); e->ssl_authority = stracpy(c->ssl->ca); } #endif if (e->redirect) mem_free(e->redirect), e->redirect = NULL; if ((h == 302 || h == 303 || h == 307 || h == 511) && !e->expire_time) e->expire_time = 1; if (h == 301 || h == 302 || h == 303 || h == 307 || h == 308) { if ((d = parse_http_header(e->head, cast_uchar "Location", NULL))) { unsigned char *user, *ins; unsigned char *newuser, *newpassword; if (!parse_url(d, NULL, &user, NULL, NULL, NULL, &ins, NULL, NULL, NULL, NULL, NULL, NULL) && !user && ins && (newuser = get_user_name(host))) { if (*newuser) { int ins_off = (int)(ins - d); newpassword = get_pass(host); if (!newpassword) newpassword = stracpy(cast_uchar ""); add_to_strn(&newuser, cast_uchar ":"); add_to_strn(&newuser, newpassword); add_to_strn(&newuser, cast_uchar "@"); extend_str(&d, (int)strlen(cast_const_char newuser)); ins = d + ins_off; memmove(ins + strlen(cast_const_char newuser), ins, strlen(cast_const_char ins) + 1); memcpy(ins, newuser, strlen(cast_const_char newuser)); mem_free(newpassword); } mem_free(newuser); } if (e->redirect) mem_free(e->redirect); e->redirect = d; if (h == 307 || h == 308) { unsigned char *p; if ((p = cast_uchar strchr(cast_const_char host, POST_CHAR))) add_to_strn(&e->redirect, p); } } } if (!e->expire_time && strchr(cast_const_char c->url, POST_CHAR)) e->expire_time = 1; info->close = 0; info->length = -1; info->version = version; if ((d = parse_http_header(e->head, cast_uchar "Connection", NULL)) || (d = parse_http_header(e->head, cast_uchar "Proxy-Connection", NULL))) { if (!casestrcmp(d, cast_uchar "close")) info->close = 1; mem_free(d); } else if (version < 11) info->close = 1; cf = c->from; c->from = 0; if ((d = parse_http_header(e->head, cast_uchar "Content-Range", NULL))) { if (strlen(cast_const_char d) > 6) { d[5] = 0; if (!(casestrcmp(d, cast_uchar "bytes")) && d[6] >= '0' && d[6] <= '9') { my_strtoll_t f = my_strtoll(d + 6, NULL); if (f >= 0 && (off_t)f >= 0 && (off_t)f == f) c->from = f; } } mem_free(d); } else if (h == 206) { /* Hmm ... some servers send 206 partial but don't send Content-Range */ c->from = cf; } if (cf && !c->from && !c->unrestartable) c->unrestartable = 1; if (c->from > cf || c->from < 0) { setcstate(c, S_HTTP_ERROR); abort_connection(c); return; } if ((d = parse_http_header(e->head, cast_uchar "Content-Length", NULL))) { unsigned char *ep; my_strtoll_t l = my_strtoll(d, &ep); if (!*ep && l >= 0 && (off_t)l >= 0 && (off_t)l == l) { if (!info->close || version >= 11 || h / 100 == 3) info->length = l; if (c->from + l >= 0) c->est_length = c->from + l; } mem_free(d); } if ((d = parse_http_header(e->head, cast_uchar "Accept-Ranges", NULL))) { if (!casestrcmp(d, cast_uchar "none") && !c->unrestartable) c->unrestartable = 1; mem_free(d); } else { if (!c->unrestartable && !c->from) c->unrestartable = 1; } if (info->bl_flags & BL_NO_RANGE && !c->unrestartable) c->unrestartable = 1; if ((d = parse_http_header(e->head, cast_uchar "Transfer-Encoding", NULL))) { if (!casestrcmp(d, cast_uchar "chunked")) { info->length = -2; info->chunk_remaining = -1; } mem_free(d); } if (!info->close && info->length == -1) info->close = 1; if ((d = parse_http_header(e->head, cast_uchar "Last-Modified", NULL))) { if (e->last_modified && casestrcmp(e->last_modified, d)) { delete_entry_content(e); if (c->from) { c->from = 0; mem_free(d); setcstate(c, S_MODIFIED); retry_connection(c); return; } } if (!e->last_modified) e->last_modified = d; else mem_free(d); } if (!e->last_modified && (d = parse_http_header(e->head, cast_uchar "Date", NULL))) e->last_modified = d; if (info->length == -1 || (version < 11 && info->close)) rb->close = 1; /* * Truncate entry if: * - we are using DNS-over-HTTPS * - it is compressed (the mix of an old and new document * would likely produce decompression error). * - it was http authentication (the user doesn't need to see the * authentication message). */ if (c->doh) { truncate_entry(e, c->from, 0); } else if ((d = parse_http_header(e->head, cast_uchar "Content-Encoding", NULL))) { mem_free(d); truncate_entry(e, c->from, 0); } else if (previous_http_code == 401 || previous_http_code == 407) { truncate_entry(e, c->from, 0); } if (info->https_forward && h == 407) { http_end_request(c, 0, 1, S__OK); return; } read_http_data(c, rb); } static void http_get_header(struct connection *c) { struct read_buffer *rb; set_connection_timeout_keepal(c); if (!(rb = alloc_read_buffer(c))) return; rb->close = 1; read_from_socket(c, c->sock1, rb, http_got_header); }