/* url.c * (c) 2002 Mikulas Patocka * This file is a part of the Links program, released under GPL. */ #include "links.h" static_const struct { char *prot; int port; void (*func)(struct connection *); void (*nc_func)(struct session *, unsigned char *); int free_syntax; int need_slashes; int need_slash_after_host; int allow_post; int bypasses_socks; } protocols[]= { {"data", 0, data_func, NULL, 1, 0, 0, 0, 0}, {"file", 0, file_func, NULL, 1, 1, 0, 0, 1}, {"https", 443, https_func, NULL, 0, 1, 1, 1, 0}, {"http", 80, http_func, NULL, 0, 1, 1, 1, 0}, {"proxy", 3128, proxy_func, NULL, 0, 1, 1, 1, 0}, {"ftp", 21, ftp_func, NULL, 0, 1, 1, 0, 0}, {"finger", 79, finger_func, NULL, 0, 1, 1, 0, 0}, #ifndef DISABLE_SMB {"smb", 139, smb_func, NULL, 0, 1, 1, 0, 1}, #endif {"mailto", 0, NULL, mailto_func, 0, 0, 0, 0, 0}, {"telnet", 0, NULL, telnet_func, 0, 0, 0, 0, 1}, {"tn3270", 0, NULL, tn3270_func, 0, 0, 0, 0, 1}, {"mms", 0, NULL, mms_func, 1, 0, 1, 0, 1}, {"magnet", 0, NULL, magnet_func, 1, 0, 0, 0, 1}, {"gopher", 0, NULL, gopher_func, 1, 0, 0, 0, 1}, #ifdef JS {"javascript", 0, NULL, javascript_func,1, 0, 0, 0, 0}, #endif {NULL, 0, NULL, NULL, 0, 0, 0, 0, 0} }; static int check_protocol(unsigned char *p, int l) { int i; for (i = 0; protocols[i].prot; i++) if (!casecmp(cast_uchar protocols[i].prot, p, l) && strlen(cast_const_char protocols[i].prot) == (size_t)l) { return i; } return -1; } static int get_prot_info(unsigned char *prot, int *port, void (**func)(struct connection *), void (**nc_func)(struct session *ses, unsigned char *), int *allow_post, int *bypasses_socks) { int i; for (i = 0; protocols[i].prot; i++) if (!casestrcmp(cast_uchar protocols[i].prot, prot)) { if (port) *port = protocols[i].port; if (func) *func = protocols[i].func; if (nc_func) *nc_func = protocols[i].nc_func; if (allow_post) *allow_post = protocols[i].allow_post; if (bypasses_socks) *bypasses_socks = protocols[i].bypasses_socks; return 0; } return -1; } int parse_url(unsigned char *url, int *prlen, unsigned char **user, int *uslen, unsigned char **pass, int *palen, unsigned char **host, int *holen, unsigned char **port, int *polen, unsigned char **data, int *dalen, unsigned char **post) { unsigned char *p, *q; unsigned char p_c[2]; int a; if (prlen) *prlen = 0; if (user) *user = NULL; if (uslen) *uslen = 0; if (pass) *pass = NULL; if (palen) *palen = 0; if (host) *host = NULL; if (holen) *holen = 0; if (port) *port = NULL; if (polen) *polen = 0; if (data) *data = NULL; if (dalen) *dalen = 0; if (post) *post = NULL; if (!url || !(p = cast_uchar strchr(cast_const_char url, ':'))) return -1; if (prlen) *prlen = (int)(p - url); if ((a = check_protocol(url, (int)(p - url))) == -1) return -1; if (p[1] != '/' || p[2] != '/') { if (protocols[a].need_slashes) return -1; p -= 2; } if (protocols[a].free_syntax) { if (data) *data = p + 3; if (dalen) *dalen = (int)strlen(cast_const_char(p + 3)); return 0; } p += 3; q = p + strcspn(cast_const_char p, "@/?"); if (!*q && protocols[a].need_slash_after_host) return -1; if (*q == '@') { unsigned char *pp; while (strcspn(cast_const_char(q + 1), "@") < strcspn(cast_const_char(q + 1), "/?")) q = q + 1 + strcspn(cast_const_char(q + 1), "@"); pp = cast_uchar strchr(cast_const_char p, ':'); if (!pp || pp > q) { if (user) *user = p; if (uslen) *uslen = (int)(q - p); } else { if (user) *user = p; if (uslen) *uslen = (int)(pp - p); if (pass) *pass = pp + 1; if (palen) *palen = (int)(q - pp - 1); } p = q + 1; } if (p[0] == '[') { q = cast_uchar strchr(cast_const_char p, ']'); if (q) { q++; goto have_host; } } q = p + strcspn(cast_const_char p, ":/?"); have_host: if (!*q && protocols[a].need_slash_after_host) return -1; if (host) *host = p; if (holen) *holen = (int)(q - p); if (*q == ':') { unsigned char *pp = q + strcspn(cast_const_char q, "/"); int cc; if (*pp != '/' && protocols[a].need_slash_after_host) return -1; if (port) *port = q + 1; if (polen) *polen = (int)(pp - q - 1); for (cc = 0; cc < pp - q - 1; cc++) if (q[cc+1] < '0' || q[cc+1] > '9') return -1; q = pp; } if (*q && *q != '?') q++; p = q; p_c[0] = POST_CHAR; p_c[1] = 0; q = p + strcspn(cast_const_char p, cast_const_char p_c); if (data) *data = p; if (dalen) *dalen = (int)(q - p); if (post) *post = *q ? q + 1 : NULL; return 0; } unsigned char *get_protocol_name(unsigned char *url) { int l; if (parse_url(url, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL; return memacpy(url, l); } unsigned char *get_keepalive_id(unsigned char *url) { unsigned char *h, *p, *k, *d; int hl, pl; if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, &p, &pl, &d, NULL, NULL)) return NULL; if (is_proxy_url(url) && !casecmp(d, cast_uchar "https://", 8)) { if (parse_url(d, NULL, NULL, NULL, NULL, NULL, &h, &hl, &p, &pl, NULL, NULL, NULL)) return NULL; } k = p ? p + pl : h ? h + hl : NULL; if (!k) return stracpy(cast_uchar ""); return memacpy(url, k - url); } unsigned char *get_host_name(unsigned char *url) { unsigned char *h; int hl; if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL)) return stracpy(cast_uchar ""); return memacpy(h, hl); } unsigned char *get_user_name(unsigned char *url) { unsigned char *h; int hl; if (parse_url(url, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL; return memacpy(h, hl); } unsigned char *get_pass(unsigned char *url) { unsigned char *h; int hl; if (parse_url(url, NULL,NULL, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL; return memacpy(h, hl); } unsigned char *get_port_str(unsigned char *url) { unsigned char *h; int hl; if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL)) return NULL; return hl ? memacpy(h, hl) : NULL; } int get_port(unsigned char *url) { unsigned char *h; int hl; long n = -1; if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL)) return -1; if (h) { n = strtol(cast_const_char h, NULL, 10); if (n > 0 && n < 65536) return (int)n; return -1; } if ((h = get_protocol_name(url))) { int nn = -1; /* against warning */ get_prot_info(h, &nn, NULL, NULL, NULL, NULL); mem_free(h); n = nn; } return (int)n; } void (*get_protocol_handle(unsigned char *url))(struct connection *) { unsigned char *p; void (*f)(struct connection *) = NULL; int post = 0; if (!(p = get_protocol_name(url))) return NULL; get_prot_info(p, NULL, &f, NULL, &post, NULL); mem_free(p); if (!post && strchr(cast_const_char url, POST_CHAR)) return NULL; return f; } void (*get_external_protocol_function(unsigned char *url))(struct session *, unsigned char *) { unsigned char *p; void (*f)(struct session *, unsigned char *) = NULL; int post = 0; if (!(p = get_protocol_name(url))) return NULL; get_prot_info(p, NULL, NULL, &f, &post, NULL); mem_free(p); if (!post && strchr(cast_const_char url, POST_CHAR)) return NULL; return f; } int url_bypasses_socks(unsigned char *url) { int ret = 0; /* against warning */ unsigned char *p; if (!(p = get_protocol_name(url))) return 1; get_prot_info(p, NULL, NULL, NULL, NULL, &ret); mem_free(p); return ret; } unsigned char *get_url_data(unsigned char *url) { unsigned char *d; if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &d, NULL, NULL)) return NULL; return d; } #define dsep(x) (lo ? dir_sep(x) : (x) == '/') static void translate_directories(unsigned char *url) { unsigned char *dd = get_url_data(url); unsigned char *s, *d; int lo = !casecmp(url, cast_uchar "file://", 7); if (!casecmp(url, cast_uchar "javascript:", 11)) return; if (!casecmp(url, cast_uchar "magnet:", 7)) return; if (!casecmp(url, cast_uchar "gopher:", 7)) return; if (!dd || dd == url /*|| *--dd != '/'*/) return; if (!dsep(*dd)) { dd--; if (!dsep(*dd)) { dd++; memmove(dd + 1, dd, strlen(cast_const_char dd) + 1); *dd = '/'; } } s = dd; d = dd; r: if (end_of_dir(url, s[0])) { memmove(d, s, strlen(cast_const_char s) + 1); return; } if (dsep(s[0]) && s[1] == '.' && (dsep(s[2]) || !s[2] || end_of_dir(url, s[2]))) { if (!dsep(s[2])) *d++ = *s; s += 2; goto r; } if (dsep(s[0]) && s[1] == '.' && s[2] == '.' && (dsep(s[3]) || !s[3] || end_of_dir(url, s[3]))) { while (d > dd) { d--; if (dsep(*d)) goto b; } b: if (!dsep(s[3])) *d++ = *s; s += 3; goto r; } if ((*d++ = *s++)) goto r; } static unsigned char *translate_hashbang(unsigned char *up) { unsigned char *u, *p, *dp, *data, *post_seq; int q; unsigned char *r; int rl; if (!strstr(cast_const_char up, "#!") && !strstr(cast_const_char up, "#%21")) return up; u = stracpy(up); p = extract_position(u); if (!p) { free_u_ret_up: mem_free(u); return up; } if (p[0] == '!') dp = p + 1; else if (!casecmp(p, cast_uchar "%21", 3)) dp = p + 3; else { mem_free(p); goto free_u_ret_up; } if (!(post_seq = cast_uchar strchr(cast_const_char u, POST_CHAR))) post_seq = cast_uchar strchr(cast_const_char u, 0); data = get_url_data(u); if (!data) data = u; r = init_str(); rl = 0; add_bytes_to_str(&r, &rl, u, post_seq - u); q = (int)strlen(cast_const_char data); if (q && (data[q - 1] == '&' || data[q - 1] == '?')) ; else if (strchr(cast_const_char data, '?')) add_chr_to_str(&r, &rl, '&'); else add_chr_to_str(&r, &rl, '?'); add_to_str(&r, &rl, cast_uchar "_escaped_fragment_="); for (; *dp; dp++) { unsigned char c = *dp; if (c <= 0x20 || c == 0x23 || c == 0x25 || c == 0x26 || c == 0x2b || c >= 0x7f) { unsigned char h[4]; sprintf(cast_char h, "%%%02X", c); add_to_str(&r, &rl, h); } else { add_chr_to_str(&r, &rl, c); } } add_to_str(&r, &rl, post_seq); mem_free(u); mem_free(p); mem_free(up); return r; } static unsigned char *rewrite_url_google_docs(unsigned char *n) { int i; unsigned char *id, *id_end, *url_end; unsigned char *res; int l; struct { const char *beginning; const char *result1; const char *result2; } const patterns[] = { { "https://docs.google.com/document/d/", "https://docs.google.com/document/d/", "/export?format=pdf" }, { "https://docs.google.com/document/u/", "https://docs.google.com/document/u/", "/export?format=pdf" }, { "https://docs.google.com/spreadsheets/d/", "https://docs.google.com/spreadsheets/d/", "/export?format=pdf" }, { "https://docs.google.com/spreadsheets/u/", "https://docs.google.com/spreadsheets/u/", "/export?format=pdf" }, { "https://docs.google.com/presentation/d/", "https://docs.google.com/presentation/d/", "/export/pdf" }, { "https://docs.google.com/presentation/u/", "https://docs.google.com/presentation/u/", "/export/pdf" }, { "https://drive.google.com/file/d/", "https://drive.google.com/uc?export=download&id=", "" }, { "https://drive.google.com/file/u/", "https://drive.google.com/uc?export=download&id=", "" } }; for (i = 0; i < (int)array_elements(patterns); i++) { if (!cmpbeg(n, cast_uchar patterns[i].beginning)) goto match; } return n; match: id = n + strlen(cast_const_char patterns[i].beginning); url_end = id + strcspn(cast_const_char id, "#" POST_CHAR_STRING); id_end = memrchr(id, '/', url_end - id); if (!id_end) return n; if (!cmpbeg(id_end, cast_uchar "/export")) return n; if (!patterns[i].result2[0]) { id = id_end; while (id[-1] != '/') id--; } res = init_str(); l = 0; add_to_str(&res, &l, cast_uchar patterns[i].result1); add_bytes_to_str(&res, &l, id, id_end - id); add_to_str(&res, &l, cast_uchar patterns[i].result2); mem_free(n); return res; } static unsigned char *rewrite_url_mediawiki_svg(unsigned char *n) { #ifndef HAVE_SVG const unsigned char u1[] = "/media/math/render/svg/"; const unsigned char u2[] = "/media/math/render/png/"; unsigned char *d, *s; d = get_url_data(n); if (!d) return n; s = cast_uchar strstr(cast_const_char d, cast_const_char u1); if (!s) return n; memcpy(s, u2, strlen(cast_const_char u2)); #endif return n; } static unsigned char *rewrite_url(unsigned char *n) { extend_str(&n, 1); translate_directories(n); n = translate_hashbang(n); n = rewrite_url_google_docs(n); n = rewrite_url_mediawiki_svg(n); return n; } static int test_qualified_name(unsigned char *host, unsigned char *hostname) { unsigned char *c; if (!casestrcmp(host, hostname)) return 1; c = cast_uchar strchr(cast_const_char hostname, '.'); if (c) { *c = 0; if (!casestrcmp(host, hostname)) return 1; } return 0; } static int is_local_host(unsigned char *host) { if (!*host) return 1; if (!casestrcmp(host, cast_uchar "localhost")) return 1; #if defined(HAVE_GETHOSTNAME) { int rs; unsigned char n[4096]; n[0] = 0; EINTRLOOP(rs, gethostname(cast_char n, sizeof n)); n[sizeof n - 1] = 0; if (!rs && strlen(cast_const_char n) < sizeof n - 1) { if (test_qualified_name(host, n)) return 1; } } #elif defined(HAVE_SYS_UTSNAME_H) && defined(HAVE_UNAME) { int rs; struct utsname name; memset(&name, 0, sizeof name); EINTRLOOP(rs, uname(&name)); if (rs >= 0) { if (test_qualified_name(host, cast_uchar name.nodename)) return 1; } } #endif return 0; } static void insert_wd(unsigned char **up, unsigned char *cwd) { unsigned char *u = *up; unsigned char *cw; unsigned char *url; unsigned char *host; int url_l; int i; if (!u || !cwd || !*cwd) return; if (casecmp(u, cast_uchar "file://", 7)) return; for (i = 7; u[i] && !dir_sep(u[i]); i++) ; host = memacpy(u + 7, i - 7); if (is_local_host(host)) { mem_free(host); memmove(u + 7, u + i, strlen(cast_const_char (u + i)) + 1); return; } mem_free(host); #ifdef DOS_FS if (upcase(u[7]) >= 'A' && upcase(u[7]) <= 'Z' && u[8] == ':' && dir_sep(u[9])) return; #endif #ifdef SPAD if (_is_absolute(cast_const_char(u + 7)) != _ABS_NO) return; #endif url = init_str(); url_l = 0; add_bytes_to_str(&url, &url_l, u, 7); for (cw = cwd; *cw; cw++) { unsigned char c = *cw; if (c < ' ' || c == '%' || c >= 127) { unsigned char h[4]; sprintf(cast_char h, "%%%02X", (unsigned)c & 0xff); add_to_str(&url, &url_l, h); } else { add_chr_to_str(&url, &url_l, c); } } if (!dir_sep(cwd[strlen(cast_const_char cwd) - 1])) add_chr_to_str(&url, &url_l, '/'); add_to_str(&url, &url_l, u + 7); mem_free(u); *up = url; } int url_non_ascii(unsigned char *url) { unsigned char *ch; for (ch = url; *ch; ch++) if (*ch >= 128) return 1; return 0; } static unsigned char *translate_idn(unsigned char *nu, int canfail) { if (url_non_ascii(nu)) { unsigned char *id = idn_encode_url(nu, 0); if (!id) { if (!canfail) return nu; mem_free(nu); return NULL; } mem_free(nu); return id; } return nu; } /* * U funkce join_urls musi byt prvni url absolutni (takove, co projde funkci * parse_url bez chyby --- pokud neni absolutni, tak to spatne na internal) a * druhe url je relativni cesta vuci nemu nebo taky absolutni url. Pokud je * druhe url absolutni, vrati se to; pokud je relativni, tak se spoji prvni a * druhe url. */ unsigned char *join_urls(unsigned char *base, unsigned char *rel) { unsigned char *p, *n, *pp, *ch; int l; int lo = !casecmp(base, cast_uchar "file://", 7); int data = !casecmp(base, cast_uchar "data:", 5); if (rel[0] == '#' || !rel[0]) { n = stracpy(base); for (p = n; *p && *p != POST_CHAR && *p != '#'; p++) ; *p = 0; add_to_strn(&n, rel); goto return_n; } if (rel[0] == '?' || rel[0] == '&') { unsigned char rj[3]; unsigned char *d = get_url_data(base); if (!d) goto bad_base; rj[0] = rel[0]; rj[1] = POST_CHAR; rj[2] = 0; d += strcspn(cast_const_char d, cast_const_char rj); n = memacpy(base, d - base); add_to_strn(&n, rel); goto return_n; } if (rel[0] == '/' && rel[1] == '/' && !data) { unsigned char *s; if (!(s = cast_uchar strstr(cast_const_char base, "//"))) { if (!(s = cast_uchar strchr(cast_const_char base, ':'))) { bad_base: internal_error("bad base url: %s", base); return NULL; } s++; } n = memacpy(base, s - base); add_to_strn(&n, rel); if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_n; add_to_strn(&n, cast_uchar "/"); if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_n; mem_free(n); } if (is_proxy_url(rel)) goto prx; if (!parse_url(rel, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { n = stracpy(rel); goto return_n; } n = stracpy(rel); while (n[0] && n[strlen(cast_const_char n) - 1] <= ' ') n[strlen(cast_const_char n) - 1] = 0; extend_str(&n, 1); ch = cast_uchar strrchr(cast_const_char n, '#'); if (!ch || strchr(cast_const_char ch, '/')) ch = n + strlen(cast_const_char n); memmove(ch + 1, ch, strlen(cast_const_char ch) + 1); *ch = '/'; if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_n; mem_free(n); prx: if (parse_url(base, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &p, NULL, NULL) || !p) { goto bad_base; } if (!dsep(*p)) p--; if (!data) { if (end_of_dir(base, rel[0])) for (; *p; p++) { if (end_of_dir(base, *p)) break; } else if (!dsep(rel[0])) for (pp = p; *pp; pp++) { if (end_of_dir(base, *pp)) break; if (dsep(*pp)) p = pp + 1; } } n = memacpy(base, p - base); add_to_strn(&n, rel); goto return_n; return_n: n = translate_idn(n, 0); n = rewrite_url(n); return n; } unsigned char *translate_url(unsigned char *url, unsigned char *cwd) { unsigned char *ch; unsigned char *nu, *da; unsigned char *prefix; int sl; while (*url == ' ') url++; if (*url && url[strlen(cast_const_char url) - 1] == ' ') { nu = stracpy(url); while (*nu && nu[strlen(cast_const_char nu) - 1] == ' ') nu[strlen(cast_const_char nu) - 1] = 0; ch = translate_url(nu, cwd); mem_free(nu); return ch; } if (is_proxy_url(url)) return NULL; if (!parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &da, NULL, NULL)) { nu = stracpy(url); goto return_nu; } if (strchr(cast_const_char url, POST_CHAR)) return NULL; if (strstr(cast_const_char url, "://")) { nu = stracpy(url); extend_str(&nu, 1); ch = cast_uchar strrchr(cast_const_char nu, '#'); if (!ch || strchr(cast_const_char ch, '/')) ch = nu + strlen(cast_const_char nu); memmove(ch + 1, ch, strlen(cast_const_char ch) + 1); *ch = '/'; if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_nu; mem_free(nu); } prefix = cast_uchar "file://"; if (url[0] == '[' && strchr(cast_const_char url, ']')) { ch = url; goto http; } ch = url + strcspn(cast_const_char url, ".:/@"); sl = 0; #ifdef SPAD if (strchr(cast_const_char url, ':') && _is_local(cast_const_char url)) goto set_prefix; #endif if (*ch != ':' || *(url + strcspn(cast_const_char url, "/@")) == '@') { if (*url != '.' && *ch == '.') { unsigned char *e, *f, *g; int tl; for (e = ch + 1; *(f = e + strcspn(cast_const_char e, ".:/")) == '.'; e = f + 1) ; g = memacpy(e, f - e); tl = is_tld(g); mem_free(g); if (tl) http: prefix = cast_uchar "http://", sl = 1; } if (*ch == '@' || *ch == ':' || !cmpbeg(url, cast_uchar "ftp.")) prefix = cast_uchar "ftp://", sl = 1; goto set_prefix; set_prefix: nu = stracpy(prefix); add_to_strn(&nu, url); if (sl && !strchr(cast_const_char url, '/')) add_to_strn(&nu, cast_uchar "/"); if (parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) { mem_free(nu); return NULL; } goto return_nu; } #ifdef DOS_FS if (ch == url + 1) goto set_prefix; #endif nu = memacpy(url, ch - url + 1); add_to_strn(&nu, cast_uchar "//"); add_to_strn(&nu, ch + 1); if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_nu; add_to_strn(&nu, cast_uchar "/"); if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_nu; mem_free(nu); return NULL; return_nu: nu = translate_idn(nu, 1); if (!nu) return NULL; insert_wd(&nu, cwd); nu = rewrite_url(nu); return nu; } unsigned char *extract_position(unsigned char *url) { unsigned char *u, *uu, *r; if ((u = get_url_data(url))) url = u; if (!(u = cast_uchar strchr(cast_const_char url, POST_CHAR))) u = cast_uchar strchr(cast_const_char url, 0); if (!(uu = memchr(url, '#', u - url))) return NULL; r = memacpy(uu + 1, u - uu - 1); memmove(uu, u, strlen(cast_const_char u) + 1); return r; } int url_not_saveable(unsigned char *url) { int p, palen; unsigned char *u = translate_url(url, cast_uchar "/"); if (!u) return 1; p = parse_url(u, NULL, NULL, NULL, NULL, &palen, NULL, NULL, NULL, NULL, NULL, NULL, NULL); mem_free(u); return p || palen; } #define accept_char(x) ((x) != 10 && (x) != 13 && (x) != '"' && (x) != '\'' && (x) != '&' && (x) != '<' && (x) != '>') #define special_char(x) ((x) < ' ' || (x) == '%' || (x) == '#' || (x) >= 127) /* * -2 percent to raw * -1 percent to html * 0 raw to html * 1 raw to percent */ void add_conv_str(unsigned char **s, int *l, unsigned char *b, int ll, int encode_special) { for (; ll > 0; ll--, b++) { unsigned char chr = *b; if (!chr) continue; if (special_char(chr) && encode_special == 1) { unsigned char h[4]; sprintf(cast_char h, "%%%02X", (unsigned)chr & 0xff); add_to_str(s, l, h); continue; } if (chr == '%' && encode_special <= -1 && ll > 2 && ((b[1] >= '0' && b[1] <= '9') || (b[1] >= 'A' && b[1] <= 'F') || (b[1] >= 'a' && b[1] <= 'f')) && ((b[2] >= '0' && b[2] <= '9') || (b[2] >= 'A' && b[2] <= 'F') || (b[2] >= 'a' && b[2] <= 'f'))) { int i; chr = 0; for (i = 1; i < 3; i++) { if (b[i] >= '0' && b[i] <= '9') chr = chr * 16 + b[i] - '0'; if (b[i] >= 'A' && b[i] <= 'F') chr = chr * 16 + b[i] - 'A' + 10; if (b[i] >= 'a' && b[i] <= 'f') chr = chr * 16 + b[i] - 'a' + 10; } ll -= 2; b += 2; if (!chr) continue; } if (chr == ' ' && (!encode_special || encode_special == -1)) { add_to_str(s, l, cast_uchar " "); } else if (accept_char(chr) || encode_special == -2) { add_chr_to_str(s, l, chr); } else if (chr == 10 || chr == 13) { } else { add_to_str(s, l, cast_uchar "&#"); add_num_to_str(s, l, (int)chr); add_chr_to_str(s, l, ';'); } } } void convert_file_charset(unsigned char **s, int *l, int start_l) { #ifdef __CYGWIN__ int win_charset = windows_charset(); unsigned char *cpy = stracpy(*s + start_l); unsigned char *ptr, *end; (*s)[*l = start_l] = 0; end = cast_uchar strchr(cast_const_char cpy, 0); for (ptr = cpy; ptr < end; ptr++) { unsigned char chr = *ptr; unsigned u; unsigned char *p; if (chr == 0x18) { p = ptr + 1; goto try_get_utf; } if (chr >= 128) { if (win_charset != utf8_table) { u = (unsigned)cp2u(chr, win_charset); if (u != -1U) goto put_u; } else { p = ptr; try_get_utf: GET_UTF_8(p, u); if (u) { ptr = p - 1; put_u: add_to_str(s, l, cast_uchar "&#"); add_num_to_str(s, l, (int)u); add_chr_to_str(s, l, ';'); continue; } } } add_chr_to_str(s, l, chr); } mem_free(cpy); #endif } static_const unsigned char xn[] = "xn--"; static_const unsigned xn_l = sizeof(xn) - 1; #define puny_max_length 63 #define puny_base 36 #define puny_tmin 1 #define puny_tmax 26 #define puny_skew 38 #define puny_damp 700 #define puny_init_bias 72 static int ascii_allowed(unsigned c) { return c == '-' || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } static unsigned char puny_chrenc(unsigned n) { return n + (n < 26 ? 'a' : '0' - 26); } static unsigned puny_chrdec(unsigned char c) { if (c <= '9') return c - '0' + 26; if (c <= 'Z') return c - 'A'; return c - 'a'; } struct puny_state { unsigned ascii_numpoints; unsigned numpoints; unsigned bias; unsigned k; }; static void puny_init(struct puny_state *st, unsigned numpoints) { st->ascii_numpoints = numpoints; st->numpoints = numpoints; st->bias = puny_init_bias; st->k = puny_base; } static unsigned puny_threshold(struct puny_state *st) { unsigned k = st->k; st->k += puny_base; if (k <= st->bias) return puny_tmin; if (k >= st->bias + puny_tmax) return puny_tmax; return k - st->bias; } static void puny_adapt(struct puny_state *st, unsigned val) { unsigned k; val = st->ascii_numpoints == st->numpoints ? val / puny_damp : val / 2; st->numpoints++; val += val / st->numpoints; k = 0; while (val > ((puny_base - puny_tmin) * puny_tmax) / 2) { val /= puny_base - puny_tmin; k += puny_base; } st->bias = k + (((puny_base - puny_tmin + 1) * val) / (val + puny_skew)); st->k = puny_base; } static unsigned char *puny_encode(unsigned char *s, int len) { unsigned char *p; unsigned *uni; unsigned uni_l; unsigned char *res; int res_l; unsigned i; unsigned ni, cchar, skip; struct puny_state st; if (len > 7 * puny_max_length) goto err; uni = mem_alloc(len * sizeof(unsigned)); uni_l = 0; for (p = s; p < s + len; ) { unsigned c; GET_UTF_8(p, c); c = uni_locase(c); if (c < 128 && !ascii_allowed(c)) goto err_free_uni; if (c > 0x10FFFF) goto err_free_uni; uni[uni_l++] = c; } if (uni_l > puny_max_length) goto err_free_uni; res = init_str(); res_l = 0; add_to_str(&res, &res_l, cast_uchar xn); ni = 0; for (i = 0; i < uni_l; i++) { if (uni[i] < 128) { add_chr_to_str(&res, &res_l, uni[i]); ni++; } } if (ni == uni_l) { memmove(res, res + xn_l, res_l - xn_l + 1); res_l -= 4; goto ret_free_uni; } if (res_l != (int)xn_l) add_chr_to_str(&res, &res_l, '-'); puny_init(&st, ni); cchar = 128; skip = 0; while (1) { unsigned dlen = 0; unsigned lchar = -1U; for (i = 0; i < uni_l; i++) { unsigned c = uni[i]; if (c < cchar) dlen++; else if (c < lchar) lchar = c; } if (lchar == -1U) break; skip += (lchar - cchar) * (dlen + 1); for (i = 0; i < uni_l; i++) { unsigned c = uni[i]; if (c < lchar) skip++; if (c == lchar) { unsigned n; /*fprintf(stderr, "%d\n", skip);*/ n = skip; while (1) { unsigned t = puny_threshold(&st); if (n < t) { add_chr_to_str(&res, &res_l, puny_chrenc(n)); break; } else { unsigned d = (n - t) % (puny_base - t); n = (n - t) / (puny_base - t); add_chr_to_str(&res, &res_l, puny_chrenc(d + t)); } } puny_adapt(&st, skip); skip = 0; } } skip++; cchar = lchar + 1; } ret_free_uni: mem_free(uni); if (res_l > puny_max_length) goto err; return res; err_free_uni: mem_free(uni); err: return NULL; } static unsigned char *puny_decode(unsigned char *s, int len) { unsigned char *p, *last_dash; unsigned *uni; unsigned uni_l; unsigned char *res; int res_l; unsigned i; unsigned cchar, pos; struct puny_state st; if (!(len >= 4 && !casecmp(s, xn, xn_l))) return NULL; s += xn_l; len -= xn_l; last_dash = NULL; for (p = s; p < s + len; p++) { unsigned char c = *p; if (!ascii_allowed(c)) goto err; if (c == '-') last_dash = p; } if (len > puny_max_length) goto err; uni = mem_alloc(len * sizeof(unsigned)); uni_l = 0; if (last_dash) { for (p = s; p < last_dash; p++) uni[uni_l++] = *p; p = last_dash + 1; } else { p = s; } puny_init(&st, uni_l); cchar = 128; pos = 0; while (p < s + len) { unsigned w = 1; unsigned val = 0; while (1) { unsigned n, t, nv, nw; if (p >= s + len) goto err_free_uni; n = puny_chrdec(*p++); nw = n * w; if (nw / w != n) goto err_free_uni; nv = val + nw; if (nv < val) goto err_free_uni; val = nv; t = puny_threshold(&st); if (n < t) break; nw = w * (puny_base - t); if (nw / w != puny_base - t) goto err_free_uni; w = nw; } puny_adapt(&st, val); if (val > uni_l - pos) { unsigned cp; val -= uni_l - pos + 1; pos = 0; cp = val / (uni_l + 1) + 1; val %= uni_l + 1; if (cchar + cp < cchar) goto err_free_uni; cchar += cp; if (cchar > 0x10FFFF) goto err_free_uni; } pos += val; memmove(uni + pos + 1, uni + pos, (uni_l - pos) * sizeof(unsigned)); uni[pos++] = cchar; uni_l++; } res = init_str(); res_l = 0; for (i = 0; i < uni_l; i++) { unsigned char *us = encode_utf_8(uni[i]); add_to_str(&res, &res_l, us); } mem_free(uni); return res; err_free_uni: mem_free(uni); err: return NULL; } unsigned char *idn_encode_host(unsigned char *host, int len, unsigned char *separator, int decode) { unsigned char *p, *s; int pl, l, i; p = init_str(); pl = 0; next_host_elem: l = len; for (s = separator; *s; s++) { unsigned char *d = memchr(host, *s, l); if (d) l = (int)(d - host); } if (!decode) { for (i = 0; i < l; i++) if (host[i] >= 0x80) { unsigned char *enc = puny_encode(host, l); if (!enc) goto err; add_to_str(&p, &pl, enc); mem_free(enc); goto advance_host; } } else { unsigned char *dec = puny_decode(host, l); if (dec) { add_to_str(&p, &pl, dec); mem_free(dec); goto advance_host; } } add_bytes_to_str(&p, &pl, host, l); advance_host: if (l != len) { add_chr_to_str(&p, &pl, host[l]); host += l + 1; len -= l + 1; goto next_host_elem; } return p; err: mem_free(p); return NULL; } unsigned char *idn_encode_url(unsigned char *url, int decode) { unsigned char *host, *p, *h; int holen, pl; if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &host, &holen, NULL, NULL, NULL, NULL, NULL) || !host) { host = url; holen = 0; } h = idn_encode_host(host, holen, cast_uchar ".", decode); if (!h) return NULL; p = init_str(); pl = 0; add_bytes_to_str(&p, &pl, url, host - url); add_to_str(&p, &pl, h); add_to_str(&p, &pl, host + holen); mem_free(h); return p; } static unsigned char *display_url_or_host(struct terminal *term, unsigned char *url, int warn_idn, int just_host, unsigned char *separator) { unsigned char *uu, *url_dec, *url_conv, *url_conv2, *url_enc, *ret; int is_idn; if (!url) return stracpy(cast_uchar ""); url = stracpy(url); if (!just_host) { if ((uu = cast_uchar strchr(cast_const_char url, POST_CHAR))) *uu = 0; } if (!url_non_ascii(url) && !strstr(cast_const_char url, cast_const_char xn)) return url; if (!just_host) url_dec = idn_encode_url(url, 1); else url_dec = idn_encode_host(url, (int)strlen(cast_const_char url), separator, 1); is_idn = strcmp(cast_const_char url_dec, cast_const_char url); url_conv = convert(utf8_table, term_charset(term), url_dec, NULL); mem_free(url_dec); url_conv2 = convert(term_charset(term), utf8_table, url_conv, NULL); if (!just_host) url_enc = idn_encode_url(url_conv2, 0); else url_enc = idn_encode_host(url_conv2, (int)strlen(cast_const_char url_conv2), separator, 0); if (!url_enc) url_enc = stracpy(url_conv2), is_idn = 1; mem_free(url_conv2); if (!strcmp(cast_const_char url_enc, cast_const_char url)) { if (is_idn && warn_idn) { ret = stracpy(cast_uchar "(IDN) "); add_to_strn(&ret, url_conv); } else { ret = url_conv; url_conv = DUMMY; } } else { ret = convert(utf8_table, term_charset(term), url, NULL); } mem_free(url); mem_free(url_conv); mem_free(url_enc); return ret; } unsigned char *display_url(struct terminal *term, unsigned char *url, int warn_idn) { return display_url_or_host(term, url, warn_idn, 0, cast_uchar "."); } unsigned char *display_host(struct terminal *term, unsigned char *host) { return display_url_or_host(term, host, 1, 1, cast_uchar "."); } unsigned char *display_host_list(struct terminal *term, unsigned char *host) { return display_url_or_host(term, host, 0, 1, cast_uchar ".,"); }