polipo-1.0.4.1/0000755000175000017500000000000011331407220012460 5ustar chrisdchrisdpolipo-1.0.4.1/util.h0000644000175000017500000001045611331407220013614 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* These are Polipo's error codes. They need to be positive integers, and must not collide with possible errno values. Starting at 2^16 should be safe enough. */ #define E0 (1 << 16) #define E1 (2 << 16) #define E2 (3 << 16) #define E3 (4 << 16) #define EUNKNOWN (E0) #define EDOSHUTDOWN (E0 + 1) #define EDOGRACEFUL (E0 + 2) #define EDOTIMEOUT (E0 + 3) #define ECLIENTRESET (E0 + 4) #define ESYNTAX (E0 + 5) #define EREDIRECTOR (E0 + 6) #define EDNS_HOST_NOT_FOUND (E1) #define EDNS_NO_ADDRESS (E1 + 1) #define EDNS_NO_RECOVERY (E1 + 2) #define EDNS_TRY_AGAIN (E1 + 3) #define EDNS_INVALID (E1 + 4) #define EDNS_UNSUPPORTED (E1 + 5) #define EDNS_FORMAT (E1 + 6) #define EDNS_REFUSED (E1 + 7) #define EDNS_CNAME_LOOP (E1 + 8) #define ESOCKS_PROTOCOL (E2) /* These correspond to SOCKS status codes 91 through 93 */ #define ESOCKS_REJECT_FAIL (E2 + 1) #define ESOCKS_REJECT_IDENTD (E2 + 2) #define ESOCKS_REJECT_UID_MISMATCH (E2 + 3) /* (ESOCKS5_BASE + n) corresponds to SOCKS5 status code n (0 to 8) */ #define ESOCKS5_BASE (E3) typedef struct _IntRange { int from; int to; } IntRangeRec, *IntRangePtr; typedef struct _IntList { int length; int size; IntRangePtr ranges; } IntListRec, *IntListPtr; char *strdup_n(const char *restrict buf, int n) ATTRIBUTE ((malloc)); int snnprintf(char *restrict buf, int n, int len, const char *format, ...) ATTRIBUTE ((format (printf, 4, 5))); int snnvprintf(char *restrict buf, int n, int len, const char *format, va_list args) ATTRIBUTE ((format (printf, 4, 0))); int snnprint_n(char *restrict buf, int n, int len, const char *s, int slen); int strcmp_n(const char *string, const char *buf, int n) ATTRIBUTE ((pure)); int digit(char) ATTRIBUTE ((const)); int letter(char) ATTRIBUTE ((const)); char lwr(char) ATTRIBUTE ((const)); char* lwrcpy(char *restrict dst, const char *restrict src, int n); int lwrcmp(const char *as, const char *bs, int n) ATTRIBUTE ((pure)); int strcasecmp_n(const char *string, const char *buf, int n) ATTRIBUTE ((pure)); int atoi_n(const char *restrict string, int n, int len, int *value_return); int isWhitespace(const char *string) ATTRIBUTE((pure)); #ifndef HAVE_MEMRCHR void *memrchr(const void *s, int c, size_t n) ATTRIBUTE ((pure)); #endif int h2i(char h) ATTRIBUTE ((const)); char i2h(int i) ATTRIBUTE ((const)); int log2_floor(int x) ATTRIBUTE ((const)); int log2_ceil(int x) ATTRIBUTE ((const)); char* vsprintf_a(const char *f, va_list args) ATTRIBUTE ((malloc, format (printf, 1, 0))); char* sprintf_a(const char *f, ...) ATTRIBUTE ((malloc, format (printf, 1, 2))); unsigned int hash(unsigned seed, const void *restrict key, int key_size, unsigned int hash_size) ATTRIBUTE ((pure)); char *pstrerror(int e); time_t mktime_gmt(struct tm *tm) ATTRIBUTE ((pure)); AtomPtr expandTilde(AtomPtr filename); void do_daemonise(int noclose); void writePid(char *pidfile); int b64cpy(char *restrict dst, const char *restrict src, int n, int fss); int b64cmp(const char *restrict a, int an, const char *restrict b, int bn) ATTRIBUTE ((pure)); IntListPtr makeIntList(int size) ATTRIBUTE ((malloc)); void destroyIntList(IntListPtr list); int intListMember(int n, IntListPtr list) ATTRIBUTE ((pure)); int intListCons(int from, int to, IntListPtr list); int physicalMemory(void); polipo-1.0.4.1/util.c0000644000175000017500000004200511331407220013602 0ustar chrisdchrisd/* Copyright (c) 2003-2007 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" /* Note that this is different from GNU's strndup(3). */ char * strdup_n(const char *restrict buf, int n) { char *s; s = malloc(n + 1); if(s == NULL) return NULL; memcpy(s, buf, n); s[n] = '\0'; return s; } int snnprintf(char *restrict buf, int n, int len, const char *format, ...) { va_list args; int rc; va_start(args, format); rc = snnvprintf(buf, n, len, format, args); va_end(args); return rc; } int snnvprintf(char *restrict buf, int n, int len, const char *format, va_list args) { int rc = -1; if(n < 0) return -2; if(n < len) rc = vsnprintf(buf + n, len - n, format, args); if(rc >= 0 && n + rc <= len) return n + rc; else return -1; } int snnprint_n(char *restrict buf, int n, int len, const char *s, int slen) { int i = 0; if(n < 0) return -2; while(i < slen && n < len) buf[n++] = s[i++]; if(n < len) return n; else return -1; } int strcmp_n(const char *string, const char *buf, int n) { int i; i = 0; while(string[i] != '\0' && i < n) { if(string[i] < buf[i]) return -1; else if(string[i] > buf[i]) return 1; i++; } if(string[i] == '\0' || i == n) return 0; else if(i == n) return 1; else return -1; } int letter(char c) { if(c >= 'A' && c <= 'Z') return 1; if(c >= 'a' && c <= 'z') return 1; return 0; } int digit(char c) { if(c >= '0' && c <= '9') return 1; return 0; } char lwr(char a) { if(a >= 'A' && a <= 'Z') return a | 0x20; else return a; } char * lwrcpy(char *restrict dst, const char *restrict src, int n) { int i; for(i = 0; i < n; i++) dst[i] = lwr(src[i]); return dst; } int lwrcmp(const char *as, const char *bs, int n) { int i; for(i = 0; i < n; i++) { char a = lwr(as[i]), b = lwr(bs[i]); if(a < b) return -1; else if(a > b) return 1; } return 0; } int strcasecmp_n(const char *string, const char *buf, int n) { int i; i = 0; while(string[i] != '\0' && i < n) { char a = lwr(string[i]), b = lwr(buf[i]); if(a < b) return -1; else if(a > b) return 1; i++; } if(string[i] == '\0' && i == n) return 0; else if(i == n) return 1; else return -1; } int atoi_n(const char *restrict string, int n, int len, int *value_return) { int i = n; int val = 0; if(i >= len || !digit(string[i])) return -1; while(i < len && digit(string[i])) { val = val * 10 + (string[i] - '0'); i++; } *value_return = val; return i; } int isWhitespace(const char *string) { while(*string != '\0') { if(*string == ' ' || *string == '\t') string++; else return 0; } return 1; } #ifndef HAVE_MEMRCHR void * memrchr(const void *s, int c, size_t n) { const unsigned char *ss = s; unsigned char cc = c; size_t i; for(i = n - 1; i >= 0; i--) if(ss[i] == cc) return (void*)(ss + i); return NULL; } #endif int h2i(char h) { if(h >= '0' && h <= '9') return h - '0'; else if(h >= 'a' && h <= 'f') return h - 'a' + 10; else if(h >= 'A' && h <= 'F') return h - 'A' + 10; else return -1; } char i2h(int i) { if(i < 0 || i >= 16) return '?'; if(i < 10) return i + '0'; else return i - 10 + 'A'; } /* floor(log2(x)) */ int log2_floor(int x) { int i, j; assert(x > 0); i = 0; j = 1; while(2 * j <= x) { i++; j *= 2; } return i; } /* ceil(log2(x)) */ int log2_ceil(int x) { int i, j; assert(x > 0); i = 0; j = 1; while(j < x) { i++; j *= 2; } return i; } #ifdef HAVE_ASPRINTF char * vsprintf_a(const char *f, va_list args) { char *r; int rc; rc = vasprintf(&r, f, args); if(rc < 0) return NULL; return r; } #else char* vsprintf_a(const char *f, va_list args) { int n, size; char buf[64]; char *string; n = vsnprintf(buf, 64, f, args); if(n >= 0 && n < 64) { return strdup_n(buf, n); } if(n >= 64) size = n + 1; else size = 96; while(1) { string = malloc(size); if(!string) return NULL; n = vsnprintf(string, size, f, args); if(n >= 0 && n < size) return string; else if(n >= size) size = n + 1; else size = size * 3 / 2; free(string); if(size > 16 * 1024) return NULL; } /* NOTREACHED */ } #endif char* sprintf_a(const char *f, ...) { char *s; va_list args; va_start(args, f); s = vsprintf_a(f, args); va_end(args); return s; } unsigned int hash(unsigned int seed, const void *restrict key, int key_size, unsigned int hash_size) { int i; unsigned int h; h = seed; for(i = 0; i < key_size; i++) h = (h << 5) + (h >> (hash_size - 5)) + ((unsigned char*)key)[i]; return h & ((1 << hash_size) - 1); } char * pstrerror(int e) { char *s; static char buf[20]; switch(e) { case EDOSHUTDOWN: s = "Immediate shutdown requested"; break; case EDOGRACEFUL: s = "Graceful shutdown requested"; break; case EDOTIMEOUT: s = "Timeout"; break; case ECLIENTRESET: s = "Connection reset by client"; break; case ESYNTAX: s = "Incorrect syntax"; break; case EREDIRECTOR: s = "Redirector error"; break; case EDNS_HOST_NOT_FOUND: s = "Host not found"; break; case EDNS_NO_ADDRESS: s = "No address"; break; case EDNS_NO_RECOVERY: s = "Permanent name server failure"; break; case EDNS_TRY_AGAIN: s = "Temporary name server failure"; break; case EDNS_INVALID: s = "Invalid reply from name server"; break; case EDNS_UNSUPPORTED: s = "Unsupported DNS reply"; break; case EDNS_FORMAT: s = "Invalid DNS query"; break; case EDNS_REFUSED: s = "DNS query refused by server"; break; case EDNS_CNAME_LOOP: s = "DNS CNAME loop"; break; #ifndef NO_SOCKS case ESOCKS_PROTOCOL: s = "SOCKS protocol error"; break; case ESOCKS_REJECT_FAIL: s = "SOCKS request rejected or failed"; break; case ESOCKS_REJECT_IDENTD: s = "SOCKS request rejected: " "server couldn't connect to identd"; case ESOCKS_REJECT_UID_MISMATCH: s = "SOCKS request rejected: " "uid mismatch"; break; case ESOCKS5_BASE: s = "SOCKS success"; break; case ESOCKS5_BASE + 1: s = "General SOCKS server failure"; break; case ESOCKS5_BASE + 2: s = "SOCKS connection not allowed"; break; case ESOCKS5_BASE + 3: s = "SOCKS error: network unreachable"; break; case ESOCKS5_BASE + 4: s = "SOCKS error: host unreachable"; break; case ESOCKS5_BASE + 5: s = "SOCKS error: connection refused"; break; case ESOCKS5_BASE + 6: s = "SOCKS error: TTL expired"; break; case ESOCKS5_BASE + 7: s = "SOCKS command not supported"; break; case ESOCKS5_BASE + 8: s = "SOCKS error: address type not supported"; break; #endif case EUNKNOWN: s = "Unknown error"; break; default: s = NULL; break; } if(!s) s = strerror(e); #ifdef MINGW if(!s) { if(e >= WSABASEERR && e <= WSABASEERR + 2000) { /* This should be okay, as long as the caller discards the pointer before another error occurs. */ snprintf(buf, 20, "Winsock error %d", e); s = buf; } } #endif if(!s) { snprintf(buf, 20, "Unknown error %d", e); s = buf; } return s; } /* Like mktime(3), but UTC rather than local time */ #if defined(HAVE_TIMEGM) time_t mktime_gmt(struct tm *tm) { return timegm(tm); } #elif defined(HAVE_TM_GMTOFF) time_t mktime_gmt(struct tm *tm) { time_t t; struct tm *ltm; t = mktime(tm); if(t < 0) return -1; ltm = localtime(&t); if(ltm == NULL) return -1; return t + ltm->tm_gmtoff; } #elif defined(HAVE_TZSET) #ifdef HAVE_SETENV /* Taken from the Linux timegm(3) man page. */ time_t mktime_gmt(struct tm *tm) { time_t t; char *tz; tz = getenv("TZ"); setenv("TZ", "", 1); tzset(); t = mktime(tm); if(tz) setenv("TZ", tz, 1); else unsetenv("TZ"); tzset(); return t; } #else time_t mktime_gmt(struct tm *tm) { time_t t; char *tz; static char *old_tz = NULL; tz = getenv("TZ"); putenv("TZ="); tzset(); t = mktime(tm); if(old_tz) free(old_tz); if(tz) old_tz = sprintf_a("TZ=%s", tz); else old_tz = strdup("TZ"); /* XXX - non-portable? */ if(old_tz) putenv(old_tz); tzset(); return t; } #endif #else #error no mktime_gmt implementation on this platform #endif AtomPtr expandTilde(AtomPtr filename) { char *buf; char *home; int len; AtomPtr ret; if(filename == NULL || filename->length < 1 || filename->string[0] != '~' || filename->string[1] != '/') return filename; home = getenv("HOME"); if(home == NULL) { return NULL; } len = strlen(home); buf = malloc(len + 1 + 1 + filename->length - 2); if(buf == NULL) { do_log(L_ERROR, "Could not allocate buffer.\n"); return NULL; } memcpy(buf, home, len); if(buf[len - 1] != '/') buf[len++] = '/'; memcpy(buf + len, filename->string + 2, filename->length - 2); len += filename->length - 2; ret = internAtomN(buf, len); free(buf); if(ret != NULL) releaseAtom(filename); return ret; } #ifdef HAVE_FORK void do_daemonise(int noclose) { int rc; fflush(stdout); fflush(stderr); rc = fork(); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't fork"); exit(1); } if(rc > 0) exit(0); if(!noclose) { close(0); close(1); close(2); } rc = setsid(); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't create new session"); exit(1); } } #else void do_daemonise(int noclose) { do_log(L_ERROR, "Cannot daemonise on this platform"); exit(1); } #endif void writePid(char *pidfile) { int fd, n, rc; char buf[16]; fd = open(pidfile, O_WRONLY | O_CREAT | O_EXCL, 0666); if(fd < 0) { do_log_error(L_ERROR, errno, "Couldn't create pid file %s", pidfile); exit(1); } n = snprintf(buf, 16, "%ld\n", (long)getpid()); if(n < 0 || n >= 16) { close(fd); unlink(pidfile); do_log(L_ERROR, "Couldn't format pid.\n"); exit(1); } rc = write(fd, buf, n); if(rc != n) { close(fd); unlink(pidfile); do_log_error(L_ERROR, errno, "Couldn't write pid"); exit(1); } close(fd); return; } static const char b64[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* "/" replaced with "-" */ static const char b64fss[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-"; int b64cpy(char *restrict dst, const char *restrict src, int n, int fss) { const char *b = fss ? b64fss: b64; int i, j; j = 0; for(i = 0; i < n; i += 3) { unsigned char a0, a1, a2; a0 = src[i]; a1 = i < n - 1 ? src[i + 1] : 0; a2 = i < n - 2 ? src[i + 2] : 0; dst[j++] = b[(a0 >> 2) & 0x3F]; dst[j++] = b[((a0 << 4) & 0x30) | ((a1 >> 4) & 0x0F)]; if(i < n - 1) dst[j++] = b[((a1 << 2) & 0x3C) | ((a2 >> 6) & 0x03)]; else dst[j++] = '='; if(i < n - 2) dst[j++] = b[a2 & 0x3F]; else dst[j++] = '='; } return j; } int b64cmp(const char *restrict a, int an, const char *restrict b, int bn) { char *buf; int r; if(an % 4 != 0) return -1; if((bn + 2) / 3 != an / 4) return -1; buf = malloc(an); if(buf == NULL) return -1; b64cpy(buf, b, bn, 0); r = memcmp(buf, a, an); free(buf); return r; } IntListPtr makeIntList(int size) { IntListPtr list; if(size <= 0) size = 4; list = malloc(sizeof(IntListRec)); if(list == NULL) return NULL; list->ranges = malloc(size * sizeof(IntRangeRec)); if(list->ranges == NULL) { free(list); return NULL; } list->length = 0; list->size = size; return list; } void destroyIntList(IntListPtr list) { free(list->ranges); free(list); } int intListMember(int n, IntListPtr list) { int lo = 0, hi = list->length - 1; int mid; while(hi >= lo) { mid = (hi + lo) / 2; if(list->ranges[mid].from > n) hi = mid - 1; else if(list->ranges[mid].to < n) lo = mid + 1; else return 1; } return 0; } static int deleteRange(IntListPtr list, int i) { assert(list->length > i); if(list->length > i + 1) memmove(list->ranges + i, list->ranges + i + 1, (list->length - i - 1) * sizeof(IntRangeRec)); list->length--; return 1; } static int insertRange(int from, int to, IntListPtr list, int i) { assert(i >= 0 && i <= list->length); assert(i == 0 || list->ranges[i - 1].to < from - 1); assert(i == list->length || list->ranges[i].from > to + 1); if(list->length >= list->size) { int newsize = list->size * 2 + 1; IntRangePtr newranges = realloc(list->ranges, newsize * sizeof(IntRangeRec)); if(newranges == NULL) return -1; list->size = newsize; list->ranges = newranges; } if(i < list->length) memmove(list->ranges + i + 1, list->ranges + i, list->length - i); list->length++; list->ranges[i].from = from; list->ranges[i].to = to; return 1; } static int maybeMergeRanges(IntListPtr list, int i) { int rc; while(i > 0 && list->ranges[i].from <= list->ranges[i - 1].to + 1) { list->ranges[i - 1].from = MIN(list->ranges[i - 1].from, list->ranges[i].from); list->ranges[i - 1].to = MAX(list->ranges[i - 1].to, list->ranges[i].to); rc = deleteRange(list, i); if(rc < 0) return -1; i--; } while(i < list->length - 1 && list->ranges[i].to >= list->ranges[i + 1].from - 1) { list->ranges[i + 1].from = MIN(list->ranges[i + 1].from, list->ranges[i].from); list->ranges[i - 1].to = MAX(list->ranges[i + 1].to, list->ranges[i].to); rc = deleteRange(list, i); if(rc < 0) return -1; } return 1; } int intListCons(int from, int to, IntListPtr list) { int i; /* Don't bother with the dichotomy. */ for(i = 0; i < list->length; i++) { if(list->ranges[i].to >= from - 1) break; } if(i < list->length && (from >= list->ranges[i].from - 1 || to <= list->ranges[i].to + 1)) { if(from <= list->ranges[i].from) list->ranges[i].from = from; if(to >= list->ranges[i].to) list->ranges[i].to = to; return maybeMergeRanges(list, i); } return insertRange(from, to, list, i); } /* Return the amount of physical memory on the box, -1 if unknown or over two gigs. */ #if defined(__linux__) #include int physicalMemory() { int rc; struct sysinfo info; rc = sysinfo(&info); if(rc < 0) return -1; if(info.totalram <= 0x7fffffff / info.mem_unit) return (int)(info.totalram * info.mem_unit); return -1; } #elif defined(__FreeBSD__) #include int physicalMemory() { int membytes; size_t len; int res; len = sizeof(membytes); res = sysctlbyname("hw.physmem", &membytes, &len, NULL, 0); if (res) return -1; return membytes; } #else int physicalMemory() { return -1; } #endif polipo-1.0.4.1/tunnel.h0000644000175000017500000000316111331407220014137 0ustar chrisdchrisd/* Copyright (c) 2004-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ typedef struct _CircularBuffer { int head; int tail; char *buf; } CircularBufferRec, *CircularBufferPtr; #define TUNNEL_READER1 1 #define TUNNEL_WRITER1 2 #define TUNNEL_EOF1 4 #define TUNNEL_EPIPE1 8 #define TUNNEL_READER2 16 #define TUNNEL_WRITER2 32 #define TUNNEL_EOF2 64 #define TUNNEL_EPIPE2 128 typedef struct _Tunnel { AtomPtr hostname; int port; int flags; int fd1; CircularBufferRec buf1; int fd2; CircularBufferRec buf2; } TunnelRec, *TunnelPtr; void do_tunnel(int fd, char *buf, int offset, int len, AtomPtr url); polipo-1.0.4.1/tunnel.c0000644000175000017500000003766511331407220014152 0ustar chrisdchrisd/* Copyright (c) 2004-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifdef NO_TUNNEL void do_tunnel(int fd, char *buf, int offset, int len, AtomPtr url) { int n; assert(buf); n = httpWriteErrorHeaders(buf, CHUNK_SIZE, 0, 1, 501, internAtom("CONNECT not available " "in this version."), 1, NULL, url->string, url->length, NULL); releaseAtom(url); if(n >= 0) { /* This is completely wrong. The write is non-blocking, and we don't reschedule it if it fails. But then, if the write blocks, we'll simply drop the connection with no error message. */ write(fd, buf, n); } dispose_chunk(buf); lingeringClose(fd); return; } #else static void tunnelDispatch(TunnelPtr); static int tunnelRead1Handler(int, FdEventHandlerPtr, StreamRequestPtr); static int tunnelRead2Handler(int, FdEventHandlerPtr, StreamRequestPtr); static int tunnelWrite1Handler(int, FdEventHandlerPtr, StreamRequestPtr); static int tunnelWrite2Handler(int, FdEventHandlerPtr, StreamRequestPtr); static int tunnelDnsHandler(int, GethostbynameRequestPtr); static int tunnelConnectionHandler(int, FdEventHandlerPtr, ConnectRequestPtr); static int tunnelSocksHandler(int, SocksRequestPtr); static int tunnelHandlerCommon(int, TunnelPtr); static int tunnelError(TunnelPtr, int, AtomPtr); static int circularBufferFull(CircularBufferPtr buf) { if(buf->head == buf->tail - 1) return 1; if(buf->head == CHUNK_SIZE - 1 && buf->tail == 0) return 1; return 0; } static int circularBufferEmpty(CircularBufferPtr buf) { return buf->head == buf->tail; } static TunnelPtr makeTunnel(int fd, char *buf, int offset, int len) { TunnelPtr tunnel; assert(offset < CHUNK_SIZE); tunnel = malloc(sizeof(TunnelRec)); if(tunnel == NULL) return NULL; tunnel->hostname = NULL; tunnel->port = -1; tunnel->flags = 0; tunnel->fd1 = fd; tunnel->fd2 = -1; tunnel->buf1.buf = buf; if(offset == len) { tunnel->buf1.tail = 0; tunnel->buf1.head = 0; } else { tunnel->buf1.tail = offset; tunnel->buf1.head = len; } tunnel->buf2.buf = NULL; tunnel->buf2.tail = 0; tunnel->buf2.head = 0; return tunnel; } static void destroyTunnel(TunnelPtr tunnel) { assert(tunnel->fd1 < 0 && tunnel->fd2 < 0); releaseAtom(tunnel->hostname); if(tunnel->buf1.buf) dispose_chunk(tunnel->buf1.buf); if(tunnel->buf2.buf) dispose_chunk(tunnel->buf2.buf); free(tunnel); } void do_tunnel(int fd, char *buf, int offset, int len, AtomPtr url) { TunnelPtr tunnel; int port; char *p, *q; tunnel = makeTunnel(fd, buf, offset, len); if(tunnel == NULL) { do_log(L_ERROR, "Couldn't allocate tunnel.\n"); releaseAtom(url); dispose_chunk(buf); CLOSE(fd); return; } p = memrchr(url->string, ':', url->length); q = NULL; if(p) port = strtol(p + 1, &q, 10); if(!p || q != url->string + url->length) { do_log(L_ERROR, "Couldn't parse CONNECT.\n"); releaseAtom(url); tunnelError(tunnel, 400, internAtom("Couldn't parse CONNECT")); return; } tunnel->hostname = internAtomLowerN(url->string, p - url->string); if(tunnel->hostname == NULL) { releaseAtom(url); tunnelError(tunnel, 501, internAtom("Couldn't allocate hostname")); return; } if(!intListMember(port, tunnelAllowedPorts)) { releaseAtom(url); tunnelError(tunnel, 403, internAtom("Forbidden port")); return; } tunnel->port = port; releaseAtom(url); if(socksParentProxy) do_socks_connect(parentHost ? parentHost->string : tunnel->hostname->string, parentHost ? parentPort : tunnel->port, tunnelSocksHandler, tunnel); else do_gethostbyname(parentHost ? parentHost->string : tunnel->hostname->string, 0, tunnelDnsHandler, tunnel); } static int tunnelDnsHandler(int status, GethostbynameRequestPtr request) { TunnelPtr tunnel = request->data; if(status <= 0) { tunnelError(tunnel, 504, internAtomError(-status, "Host %s lookup failed", atomString(tunnel->hostname))); return 1; } if(request->addr->string[0] == DNS_CNAME) { if(request->count > 10) tunnelError(tunnel, 504, internAtom("CNAME loop")); do_gethostbyname(request->addr->string + 1, request->count + 1, tunnelDnsHandler, tunnel); return 1; } do_connect(retainAtom(request->addr), 0, parentHost ? parentPort : tunnel->port, tunnelConnectionHandler, tunnel); return 1; } static int tunnelConnectionHandler(int status, FdEventHandlerPtr event, ConnectRequestPtr request) { TunnelPtr tunnel = request->data; int rc; if(status < 0) { tunnelError(tunnel, 504, internAtomError(-status, "Couldn't connect")); return 1; } rc = setNodelay(request->fd, 1); if(rc < 0) do_log_error(L_WARN, errno, "Couldn't disable Nagle's algorithm"); return tunnelHandlerCommon(request->fd, tunnel); } static int tunnelSocksHandler(int status, SocksRequestPtr request) { TunnelPtr tunnel = request->data; if(status < 0) { tunnelError(tunnel, 504, internAtomError(-status, "Couldn't connect")); return 1; } return tunnelHandlerCommon(request->fd, tunnel); } static int tunnelHandlerParent(int fd, TunnelPtr tunnel) { char *message; int n; if(tunnel->buf1.buf == NULL) tunnel->buf1.buf = get_chunk(); if(tunnel->buf1.buf == NULL) { message = "Couldn't allocate buffer"; goto fail; } if(tunnel->buf1.tail != tunnel->buf1.head) { message = "Pipelined connect to parent proxy not implemented"; goto fail; } n = snnprintf(tunnel->buf1.buf, tunnel->buf1.tail, CHUNK_SIZE - tunnel->buf1.tail, "CONNECT %s:%d HTTP/1.1" "\r\n\r\n", tunnel->hostname->string, tunnel->port); if(n < 0) { message = "Buffer overflow"; goto fail; } tunnel->buf1.head = n; tunnelDispatch(tunnel); return 1; fail: CLOSE(fd); tunnel->fd2 = -1; tunnelError(tunnel, 501, internAtom(message)); return 1; } static int tunnelHandlerCommon(int fd, TunnelPtr tunnel) { const char *message = "HTTP/1.1 200 Tunnel established\r\n\r\n"; tunnel->fd2 = fd; if(parentHost) return tunnelHandlerParent(fd, tunnel); if(tunnel->buf2.buf == NULL) tunnel->buf2.buf = get_chunk(); if(tunnel->buf2.buf == NULL) { CLOSE(fd); tunnelError(tunnel, 501, internAtom("Couldn't allocate buffer")); return 1; } memcpy(tunnel->buf2.buf, message, MIN(CHUNK_SIZE - 1, strlen(message))); tunnel->buf2.head = MIN(CHUNK_SIZE - 1, strlen(message)); tunnelDispatch(tunnel); return 1; } static void bufRead(int fd, CircularBufferPtr buf, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { int tail; if(buf->tail == 0) tail = CHUNK_SIZE - 1; else tail = buf->tail - 1; if(buf->head == 0) do_stream_buf(IO_READ | IO_NOTNOW, fd, 0, &buf->buf, tail, handler, data); else if(buf->tail > buf->head) do_stream(IO_READ | IO_NOTNOW, fd, buf->head, buf->buf, tail, handler, data); else do_stream_2(IO_READ | IO_NOTNOW, fd, buf->head, buf->buf, CHUNK_SIZE, buf->buf, tail, handler, data); } static void bufWrite(int fd, CircularBufferPtr buf, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { if(buf->head > buf->tail) do_stream(IO_WRITE, fd, buf->tail, buf->buf, buf->head, handler, data); else do_stream_2(IO_WRITE, fd, buf->tail, buf->buf, CHUNK_SIZE, buf->buf, buf->head, handler, data); } static void tunnelDispatch(TunnelPtr tunnel) { if(circularBufferEmpty(&tunnel->buf1)) { if(tunnel->buf1.buf && !(tunnel->flags & (TUNNEL_READER1 | TUNNEL_WRITER2))) { dispose_chunk(tunnel->buf1.buf); tunnel->buf1.buf = NULL; tunnel->buf1.head = tunnel->buf1.tail = 0; } } if(circularBufferEmpty(&tunnel->buf2)) { if(tunnel->buf2.buf && !(tunnel->flags & (TUNNEL_READER2 | TUNNEL_WRITER1))) { dispose_chunk(tunnel->buf2.buf); tunnel->buf2.buf = NULL; tunnel->buf2.head = tunnel->buf2.tail = 0; } } if(tunnel->fd1 >= 0) { if(!(tunnel->flags & (TUNNEL_READER1 | TUNNEL_EOF1)) && !circularBufferFull(&tunnel->buf1)) { tunnel->flags |= TUNNEL_READER1; bufRead(tunnel->fd1, &tunnel->buf1, tunnelRead1Handler, tunnel); } if(!(tunnel->flags & (TUNNEL_WRITER1 | TUNNEL_EPIPE1)) && !circularBufferEmpty(&tunnel->buf2)) { tunnel->flags |= TUNNEL_WRITER1; /* There's no IO_NOTNOW in bufWrite, so it might close the file descriptor straight away. Wait until we're rescheduled. */ bufWrite(tunnel->fd1, &tunnel->buf2, tunnelWrite1Handler, tunnel); return; } if(tunnel->fd2 < 0 || (tunnel->flags & TUNNEL_EOF2)) { if(!(tunnel->flags & TUNNEL_EPIPE1)) shutdown(tunnel->fd1, 1); tunnel->flags |= TUNNEL_EPIPE1; } else if(tunnel->fd1 < 0 || (tunnel->flags & TUNNEL_EPIPE2)) { if(!(tunnel->flags & TUNNEL_EOF1)) shutdown(tunnel->fd1, 0); tunnel->flags |= TUNNEL_EOF1; } if((tunnel->flags & TUNNEL_EOF1) && (tunnel->flags & TUNNEL_EPIPE1)) { if(!(tunnel->flags & (TUNNEL_READER1 | TUNNEL_WRITER1))) { CLOSE(tunnel->fd1); tunnel->fd1 = -1; } } } if(tunnel->fd2 >= 0) { if(!(tunnel->flags & (TUNNEL_READER2 | TUNNEL_EOF2)) && !circularBufferFull(&tunnel->buf2)) { tunnel->flags |= TUNNEL_READER2; bufRead(tunnel->fd2, &tunnel->buf2, tunnelRead2Handler, tunnel); } if(!(tunnel->flags & (TUNNEL_WRITER2 | TUNNEL_EPIPE2)) && !circularBufferEmpty(&tunnel->buf1)) { tunnel->flags |= TUNNEL_WRITER2; bufWrite(tunnel->fd2, &tunnel->buf1, tunnelWrite2Handler, tunnel); return; } if(tunnel->fd1 < 0 || (tunnel->flags & TUNNEL_EOF1)) { if(!(tunnel->flags & TUNNEL_EPIPE2)) shutdown(tunnel->fd2, 1); tunnel->flags |= TUNNEL_EPIPE2; } else if(tunnel->fd1 < 0 || (tunnel->flags & TUNNEL_EPIPE1)) { if(!(tunnel->flags & TUNNEL_EOF2)) shutdown(tunnel->fd2, 0); tunnel->flags |= TUNNEL_EOF2; } if((tunnel->flags & TUNNEL_EOF2) && (tunnel->flags & TUNNEL_EPIPE2)) { if(!(tunnel->flags & (TUNNEL_READER2 | TUNNEL_WRITER2))) { CLOSE(tunnel->fd2); tunnel->fd2 = -1; } } } if(tunnel->fd1 < 0 && tunnel->fd2 < 0) destroyTunnel(tunnel); else assert(tunnel->flags & (TUNNEL_READER1 | TUNNEL_WRITER1 | TUNNEL_READER2 | TUNNEL_WRITER2)); } static int tunnelRead1Handler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { TunnelPtr tunnel = request->data; if(status) { if(status < 0) do_log_error(L_ERROR, -status, "Couldn't read from client"); tunnel->flags |= TUNNEL_EOF1; goto done; } tunnel->buf1.head = request->offset % CHUNK_SIZE; done: /* Keep buffer empty to avoid a deadlock */ if((tunnel->flags & TUNNEL_EPIPE2)) tunnel->buf1.tail = tunnel->buf1.head; tunnel->flags &= ~TUNNEL_READER1; tunnelDispatch(tunnel); return 1; } static int tunnelRead2Handler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { TunnelPtr tunnel = request->data; if(status) { if(status < 0) do_log_error(L_ERROR, -status, "Couldn't read from server"); tunnel->flags |= TUNNEL_EOF2; goto done; } tunnel->buf2.head = request->offset % CHUNK_SIZE; done: /* Keep buffer empty to avoid a deadlock */ if((tunnel->flags & TUNNEL_EPIPE1)) tunnel->buf2.tail = tunnel->buf2.head; tunnel->flags &= ~TUNNEL_READER2; tunnelDispatch(tunnel); return 1; } static int tunnelWrite1Handler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { TunnelPtr tunnel = request->data; if(status || (tunnel->flags & TUNNEL_EPIPE1)) { tunnel->flags |= TUNNEL_EPIPE1; if(status < 0 && status != -EPIPE) do_log_error(L_ERROR, -status, "Couldn't write to client"); /* Empty the buffer to avoid a deadlock */ tunnel->buf2.tail = tunnel->buf2.head; goto done; } tunnel->buf2.tail = request->offset % CHUNK_SIZE; done: tunnel->flags &= ~TUNNEL_WRITER1; tunnelDispatch(tunnel); return 1; } static int tunnelWrite2Handler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { TunnelPtr tunnel = request->data; if(status || (tunnel->flags & TUNNEL_EPIPE2)) { tunnel->flags |= TUNNEL_EPIPE2; if(status < 0 && status != -EPIPE) do_log_error(L_ERROR, -status, "Couldn't write to server"); /* Empty the buffer to avoid a deadlock */ tunnel->buf1.tail = tunnel->buf1.head; goto done; } tunnel->buf1.tail = request->offset % CHUNK_SIZE; done: tunnel->flags &= ~TUNNEL_WRITER2; tunnelDispatch(tunnel); return 1; } static int tunnelError(TunnelPtr tunnel, int code, AtomPtr message) { int n; if(tunnel->fd2 > 0) { CLOSE(tunnel->fd2); tunnel->fd2 = -1; } if(tunnel->buf2.buf == NULL) tunnel->buf2.buf = get_chunk(); if(tunnel->buf2.buf == NULL) goto fail; n = httpWriteErrorHeaders(tunnel->buf2.buf, CHUNK_SIZE - 1, 0, 1, code, message, 1, NULL, NULL, 0, NULL); if(n <= 0) goto fail; tunnel->buf2.head = n; tunnelDispatch(tunnel); return 1; fail: CLOSE(tunnel->fd1); tunnel->fd1 = -1; tunnelDispatch(tunnel); return 1; } #endif polipo-1.0.4.1/socks.h0000644000175000017500000000261711331407220013761 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ extern AtomPtr socksParentProxy; typedef struct _SocksRequest { AtomPtr name; int port; int fd; int (*handler)(int, struct _SocksRequest*); char *buf; void *data; } SocksRequestRec, *SocksRequestPtr; void preinitSocks(void); void initSocks(void); int do_socks_connect(char*, int, int (*)(int, SocksRequestPtr), void*); polipo-1.0.4.1/socks.c0000644000175000017500000003277211331407220013761 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifdef NO_SOCKS AtomPtr socksParentProxy = NULL; void preinitSocks() { return; } void initSocks() { return; } int do_socks_connect(char *name, int port, int (*handler)(int, SocksRequestPtr), void *data) { SocksRequestRec request; request.name = internAtomLowerN(name, strlen(name)); request.port = port; request.handler = handler; request.buf = NULL; request.data = data; handler(-ENOSYS, &request); releaseAtom(request.name); return 1; } #else AtomPtr socksParentProxy = NULL; AtomPtr socksProxyHost = NULL; int socksProxyPort = -1; AtomPtr socksProxyAddress = NULL; int socksProxyAddressIndex = -1; AtomPtr socksUserName = NULL; AtomPtr socksProxyType = NULL; AtomPtr aSocks4a, aSocks5; static int socksParentProxySetter(ConfigVariablePtr, void*); static int socksProxyTypeSetter(ConfigVariablePtr, void*); static int do_socks_connect_common(SocksRequestPtr); static int socksDnsHandler(int, GethostbynameRequestPtr); static int socksConnectHandler(int, FdEventHandlerPtr, ConnectRequestPtr); static int socksWriteHandler(int, FdEventHandlerPtr, StreamRequestPtr); static int socksReadHandler(int, FdEventHandlerPtr, StreamRequestPtr); static int socks5ReadHandler(int, FdEventHandlerPtr, StreamRequestPtr); static int socks5WriteHandler(int, FdEventHandlerPtr, StreamRequestPtr); static int socks5ReadHandler2(int, FdEventHandlerPtr, StreamRequestPtr); void preinitSocks() { aSocks4a = internAtom("socks4a"); aSocks5 = internAtom("socks5"); socksProxyType = retainAtom(aSocks5); socksUserName = internAtom(""); CONFIG_VARIABLE_SETTABLE(socksParentProxy, CONFIG_ATOM_LOWER, socksParentProxySetter, "SOCKS parent proxy (host:port)"); CONFIG_VARIABLE_SETTABLE(socksUserName, CONFIG_ATOM, configAtomSetter, "SOCKS4a user name"); CONFIG_VARIABLE_SETTABLE(socksProxyType, CONFIG_ATOM_LOWER, socksProxyTypeSetter, "One of socks4a or socks5"); } static int socksParentProxySetter(ConfigVariablePtr var, void *value) { configAtomSetter(var, value); initSocks(); return 1; } static int socksProxyTypeSetter(ConfigVariablePtr var, void *value) { if(*var->value.a != aSocks4a && *var->value.a != aSocks5) { do_log(L_ERROR, "Unknown socksProxyType %s\n", (*var->value.a)->string); return -1; } return configAtomSetter(var, value); } void initSocks() { int port = -1; AtomPtr host = NULL, port_atom; int rc; if(socksParentProxy) { rc = atomSplit(socksParentProxy, ':', &host, &port_atom); if(rc <= 0) { do_log(L_ERROR, "Couldn't parse socksParentProxy"); exit(1); } port = atoi(port_atom->string); releaseAtom(port_atom); } if(socksProxyHost) releaseAtom(socksProxyHost); socksProxyHost = host; socksProxyPort = port; if(socksProxyAddress) releaseAtom(socksProxyAddress); socksProxyAddress = NULL; socksProxyAddressIndex = -1; if(socksProxyType != aSocks4a && socksProxyType != aSocks5) { do_log(L_ERROR, "Unknown socksProxyType %s\n", socksProxyType->string); exit(1); } } static void destroySocksRequest(SocksRequestPtr request) { releaseAtom(request->name); if(request->buf) free(request->buf); free(request); } int do_socks_connect(char *name, int port, int (*handler)(int, SocksRequestPtr), void *data) { SocksRequestPtr request = malloc(sizeof(SocksRequestRec)); SocksRequestRec request_nomem; if(request == NULL) goto nomem; request->name = internAtomLowerN(name, strlen(name)); if(request->name == NULL) { free(request); goto nomem; } request->port = port; request->fd = -1; request->handler = handler; request->buf = NULL; request->data = data; if(socksProxyAddress == NULL) { do_gethostbyname(socksProxyHost->string, 0, socksDnsHandler, request); return 1; } return do_socks_connect_common(request); nomem: request_nomem.name = internAtomLowerN(name, strlen(name)); request_nomem.port = port; request_nomem.handler = handler; request_nomem.buf = NULL; request_nomem.data = data; handler(-ENOMEM, &request_nomem); releaseAtom(request_nomem.name); return 1; } static int do_socks_connect_common(SocksRequestPtr request) { assert(socksProxyAddressIndex >= 0); do_connect(retainAtom(socksProxyAddress), socksProxyAddressIndex, socksProxyPort, socksConnectHandler, request); return 1; } static int socksDnsHandler(int status, GethostbynameRequestPtr grequest) { SocksRequestPtr request = grequest->data; if(status <= 0) { request->handler(status, request); destroySocksRequest(request); return 1; } if(grequest->addr->string[0] == DNS_CNAME) { if(grequest->count > 10) { do_log(L_ERROR, "DNS CNAME loop.\n"); request->handler(-EDNS_CNAME_LOOP, request); destroySocksRequest(request); return 1; } do_gethostbyname(grequest->addr->string + 1, grequest->count + 1, httpServerConnectionDnsHandler, request); return 1; } socksProxyAddress = retainAtom(grequest->addr); socksProxyAddressIndex = 0; do_socks_connect_common(request); return 1; } static int socksConnectHandler(int status, FdEventHandlerPtr event, ConnectRequestPtr crequest) { SocksRequestPtr request = crequest->data; int rc; if(status < 0) { request->handler(status, request); destroySocksRequest(request); return 1; } assert(request->fd < 0); request->fd = crequest->fd; socksProxyAddressIndex = crequest->index; rc = setNodelay(request->fd, 1); if(rc < 0) do_log_error(L_WARN, errno, "Couldn't disable Nagle's algorithm"); if(socksProxyType == aSocks4a) { request->buf = malloc(8 + socksUserName->length + 1 + request->name->length + 1); if(request->buf == NULL) { CLOSE(request->fd); request->fd = -1; request->handler(-ENOMEM, request); destroySocksRequest(request); return 1; } request->buf[0] = 4; /* VN */ request->buf[1] = 1; /* CD = REQUEST */ request->buf[2] = (request->port >> 8) & 0xFF; request->buf[3] = request->port & 0xFF; request->buf[4] = request->buf[5] = request->buf[6] = 0; request->buf[7] = 3; memcpy(request->buf + 8, socksUserName->string, socksUserName->length); request->buf[8 + socksUserName->length] = '\0'; memcpy(request->buf + 8 + socksUserName->length + 1, request->name->string, request->name->length); request->buf[8 + socksUserName->length + 1 + request->name->length] = '\0'; do_stream(IO_WRITE, request->fd, 0, request->buf, 8 + socksUserName->length + 1 + request->name->length + 1, socksWriteHandler, request); } else if(socksProxyType == aSocks5) { request->buf = malloc(8); /* 8 needed for the subsequent read */ if(request->buf == NULL) { CLOSE(request->fd); request->fd = -1; request->handler(-ENOMEM, request); destroySocksRequest(request); return 1; } request->buf[0] = 5; /* ver */ request->buf[1] = 1; /* nmethods */ request->buf[2] = 0; /* no authentication required */ do_stream(IO_WRITE, request->fd, 0, request->buf, 3, socksWriteHandler, request); } else { request->handler(-EUNKNOWN, request); } return 1; } static int socksWriteHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { SocksRequestPtr request = srequest->data; if(status < 0) goto error; if(!streamRequestDone(srequest)) { if(status) { status = -ESOCKS_PROTOCOL; goto error; } return 0; } do_stream(IO_READ | IO_NOTNOW, request->fd, 0, request->buf, 8, socksProxyType == aSocks5 ? socks5ReadHandler : socksReadHandler, request); return 1; error: CLOSE(request->fd); request->fd = -1; request->handler(status, request); destroySocksRequest(request); return 1; } static int socksReadHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { SocksRequestPtr request = srequest->data; if(status < 0) goto error; if(srequest->offset < 8) { if(status) { status = -ESOCKS_PROTOCOL; goto error; } return 0; } if(request->buf[0] != 0 || request->buf[1] != 90) { if(request->buf[1] >= 91 && request->buf[1] <= 93) status = -(ESOCKS_PROTOCOL + request->buf[1] - 90); else status = -ESOCKS_PROTOCOL; goto error; } request->handler(1, request); destroySocksRequest(request); return 1; error: CLOSE(request->fd); request->fd = -1; request->handler(status, request); destroySocksRequest(request); return 1; } static int socks5ReadHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { SocksRequestPtr request = srequest->data; if(status < 0) goto error; if(srequest->offset < 2) { if(status) { status = -ESOCKS_PROTOCOL; goto error; } return 0; } if(request->buf[0] != 5 || request->buf[1] != 0) { status = -ESOCKS_PROTOCOL; goto error; } free(request->buf); request->buf = malloc(5 + request->name->length + 2); if(request->buf == NULL) { status = -ENOMEM; goto error; } request->buf[0] = 5; /* ver */ request->buf[1] = 1; /* cmd */ request->buf[2] = 0; /* rsv */ request->buf[3] = 3; /* atyp */ request->buf[4] = request->name->length; memcpy(request->buf + 5, request->name->string, request->name->length); request->buf[5 + request->name->length] = (request->port >> 8) & 0xFF; request->buf[5 + request->name->length + 1] = request->port & 0xFF; do_stream(IO_WRITE, request->fd, 0, request->buf, 5 + request->name->length + 2, socks5WriteHandler, request); return 1; error: CLOSE(request->fd); request->fd = -1; request->handler(status, request); destroySocksRequest(request); return 1; } static int socks5WriteHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { SocksRequestPtr request = srequest->data; if(status < 0) goto error; if(!streamRequestDone(srequest)) { if(status) { status = -ESOCKS_PROTOCOL; goto error; } return 0; } do_stream(IO_READ | IO_NOTNOW, request->fd, 0, request->buf, 10, socks5ReadHandler2, request); return 1; error: request->handler(status, request); destroySocksRequest(request); return 1; } static int socks5ReadHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { SocksRequestPtr request = srequest->data; if(status < 0) goto error; if(srequest->offset < 4) { if(status) { status = -ESOCKS_PROTOCOL; goto error; } return 0; } if(request->buf[0] != 5) { status = -ESOCKS_PROTOCOL; goto error; } if(request->buf[1] != 0) { status = -(ESOCKS5_BASE + request->buf[1]); goto error; } if(request->buf[3] != 1) { status = -ESOCKS_PROTOCOL; goto error; } if(srequest->offset < 10) return 0; request->handler(1, request); destroySocksRequest(request); return 1; error: CLOSE(request->fd); request->fd = -1; request->handler(status, request); destroySocksRequest(request); return 1; } #endif polipo-1.0.4.1/server.h0000644000175000017500000001067611331407220014151 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ extern int serverExpireTime, dontCacheRedirects; typedef struct _HTTPServer { char *name; int port; int addrindex; int isProxy; int version; int persistent; int pipeline; int lies; int rtt; int rate; time_t time; int numslots; int maxslots; HTTPConnectionPtr *connection; FdEventHandlerPtr *idleHandler; HTTPRequestPtr request, request_last; struct _HTTPServer *next; } HTTPServerRec, *HTTPServerPtr; extern AtomPtr parentHost; extern int parentPort; void preinitServer(void); void initServer(void); void httpServerAbortHandler(ObjectPtr object); int httpMakeServerRequest(char *name, int port, ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor); int httpServerQueueRequest(HTTPServerPtr server, HTTPRequestPtr request); int httpServerTrigger(HTTPServerPtr server); int httpServerSideRequest(HTTPServerPtr server); int httpServerDoSide(HTTPConnectionPtr connection); int httpServerSideHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest); int httpServerSideHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest); int httpServerConnectionDnsHandler(int status, GethostbynameRequestPtr request); int httpServerConnectionHandler(int status, FdEventHandlerPtr event, ConnectRequestPtr request); int httpServerSocksHandler(int status, SocksRequestPtr request); int httpServerConnectionHandlerCommon(int status, HTTPConnectionPtr connection); void httpServerFinish(HTTPConnectionPtr connection, int s, int offset); void httpServerReply(HTTPConnectionPtr connection, int immediate); void httpServerAbort(HTTPConnectionPtr connection, int, int, struct _Atom *); void httpServerAbortRequest(HTTPRequestPtr request, int, int, struct _Atom *); void httpServerClientReset(HTTPRequestPtr request); void httpServerUnpipeline(HTTPRequestPtr request); int httpServerSendRequest(HTTPConnectionPtr connection); int httpServerHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServerReplyHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServerIndirectHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServerDirectHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServerDirectHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServerRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr, void*); int httpServerHandlerHeaders(int eof, FdEventHandlerPtr event, StreamRequestPtr request, HTTPConnectionPtr connection); int httpServerReadData(HTTPConnectionPtr, int); int connectionAddData(HTTPConnectionPtr connection, int skip); int httpWriteRequest(HTTPConnectionPtr connection, HTTPRequestPtr request, int); void listServers(FILE*); polipo-1.0.4.1/server.c0000644000175000017500000027563711331407220014156 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" int serverExpireTime = 24 * 60 * 60; int smallRequestTime = 10; int replyUnpipelineTime = 20; int replyUnpipelineSize = 1024 * 1024; int pipelineAdditionalRequests = 1; int maxPipelineTrain = 10; AtomPtr parentProxy = NULL; AtomPtr parentHost = NULL; int parentPort = -1; int pmmFirstSize = 0, pmmSize = 0; int serverSlots = 2; int serverSlots1 = 4; int serverMaxSlots = 8; int dontCacheRedirects = 0; int maxSideBuffering = 1500; int maxConnectionAge = 1260; int maxConnectionRequests = 400; static HTTPServerPtr servers = 0; static int initParentProxy(void); static int parentProxySetter(ConfigVariablePtr var, void *value); static void httpServerDelayedFinish(HTTPConnectionPtr); static int allowUnalignedRangeRequests = 0; void preinitServer(void) { CONFIG_VARIABLE_SETTABLE(parentProxy, CONFIG_ATOM_LOWER, parentProxySetter, "Parent proxy (host:port)."); CONFIG_VARIABLE(serverExpireTime, CONFIG_TIME, "Time during which server data is valid."); CONFIG_VARIABLE_SETTABLE(smallRequestTime, CONFIG_TIME, configIntSetter, "Estimated time for a small request."); CONFIG_VARIABLE_SETTABLE(replyUnpipelineTime, CONFIG_TIME, configIntSetter, "Estimated time for a pipeline break."); CONFIG_VARIABLE_SETTABLE(replyUnpipelineSize, CONFIG_INT, configIntSetter, "Size for a pipeline break."); CONFIG_VARIABLE_SETTABLE(pipelineAdditionalRequests, CONFIG_TRISTATE, configIntSetter, "Pipeline requests on an active connection."); CONFIG_VARIABLE_SETTABLE(maxPipelineTrain, CONFIG_INT, configIntSetter, "Maximum number of requests " "pipelined at a time."); CONFIG_VARIABLE(pmmFirstSize, CONFIG_INT, "The size of the first PMM chunk."); CONFIG_VARIABLE(pmmSize, CONFIG_INT, "The size of a PMM chunk."); CONFIG_VARIABLE(serverSlots, CONFIG_INT, "Maximum number of connections per server."); CONFIG_VARIABLE(serverSlots1, CONFIG_INT, "Maximum number of connections per HTTP/1.0 server."); CONFIG_VARIABLE(serverMaxSlots, CONFIG_INT, "Maximum number of connections per broken server."); CONFIG_VARIABLE(dontCacheRedirects, CONFIG_BOOLEAN, "If true, don't cache redirects."); CONFIG_VARIABLE_SETTABLE(allowUnalignedRangeRequests, CONFIG_BOOLEAN, configIntSetter, "Allow unaligned range requests (unreliable)."); CONFIG_VARIABLE_SETTABLE(maxSideBuffering, CONFIG_INT, configIntSetter, "Maximum buffering for PUT and POST requests."); CONFIG_VARIABLE_SETTABLE(maxConnectionAge, CONFIG_TIME, configIntSetter, "Maximum age of a server-side connection."); CONFIG_VARIABLE_SETTABLE(maxConnectionRequests, CONFIG_INT, configIntSetter, "Maximum number of requests on a server-side connection."); } static int parentProxySetter(ConfigVariablePtr var, void *value) { configAtomSetter(var, value); initParentProxy(); return 1; } static void discardServer(HTTPServerPtr server) { HTTPServerPtr previous; assert(!server->request); if(server == servers) servers = server->next; else { previous = servers; while(previous->next != server) previous = previous->next; previous->next = server->next; } free(server); } static int httpServerIdle(HTTPServerPtr server) { int i; if(server->request) return 0; for(i = 0; i < server->maxslots; i++) if(server->connection[i]) return 0; return 1; } static int expireServersHandler(TimeEventHandlerPtr event) { HTTPServerPtr server, next; TimeEventHandlerPtr e; server = servers; while(server) { next = server->next; if(httpServerIdle(server) && server->time + serverExpireTime < current_time.tv_sec) discardServer(server); server = next; } e = scheduleTimeEvent(serverExpireTime / 60 + 60, expireServersHandler, 0, NULL); if(!e) { do_log(L_ERROR, "Couldn't schedule server expiry.\n"); polipoExit(); } return 1; } static int roundSize(int size) { return (size + CHUNK_SIZE - 1) / CHUNK_SIZE * CHUNK_SIZE; } static int initParentProxy() { AtomPtr host, port_atom; int rc, port; if(parentHost) { releaseAtom(parentHost); parentHost = NULL; } if(parentPort >= 0) parentPort = -1; if(parentProxy != NULL && parentProxy->length == 0) { releaseAtom(parentProxy); parentProxy = NULL; } if(parentProxy == NULL) return 1; rc = atomSplit(parentProxy, ':', &host, &port_atom); if(rc <= 0) { do_log(L_ERROR, "Couldn't parse parentProxy."); releaseAtom(parentProxy); parentProxy = NULL; return -1; } port = atoi(port_atom->string); if(port <= 0 || port >= 0x10000) { releaseAtom(host); releaseAtom(port_atom); do_log(L_ERROR, "Couldn't parse parentProxy."); releaseAtom(parentProxy); parentProxy = NULL; return -1; } parentHost = host; parentPort = port; return 1; } void initServer(void) { TimeEventHandlerPtr event; servers = NULL; if(pmmFirstSize || pmmSize) { if(pmmSize == 0) pmmSize = pmmFirstSize; if(pmmFirstSize == 0) pmmFirstSize = pmmSize; pmmSize = roundSize(pmmSize); pmmFirstSize = roundSize(pmmFirstSize); } if(serverMaxSlots < 1) serverMaxSlots = 1; if(serverSlots < 1) serverSlots = 1; if(serverSlots > serverMaxSlots) serverSlots = serverMaxSlots; if(serverSlots1 < serverSlots) serverSlots1 = serverSlots; if(serverSlots1 > serverMaxSlots) serverSlots1 = serverMaxSlots; initParentProxy(); event = scheduleTimeEvent(serverExpireTime / 60 + 60, expireServersHandler, 0, NULL); if(event == NULL) { do_log(L_ERROR, "Couldn't schedule server expiry.\n"); exit(1); } } static HTTPServerPtr getServer(char *name, int port, int proxy) { HTTPServerPtr server; int i; server = servers; while(server) { if(strcmp(server->name, name) == 0 && server->port == port && server->isProxy == proxy) { if(httpServerIdle(server) && server->time + serverExpireTime < current_time.tv_sec) { discardServer(server); server = NULL; break; } else { server->time = current_time.tv_sec; return server; } } server = server->next; } server = malloc(sizeof(HTTPServerRec)); if(server == NULL) { do_log(L_ERROR, "Couldn't allocate server.\n"); return NULL; } server->connection = malloc(serverMaxSlots * sizeof(HTTPConnectionPtr)); if(server->connection == NULL) { do_log(L_ERROR, "Couldn't allocate server.\n"); free(server); return NULL; } server->idleHandler = malloc(serverMaxSlots * sizeof(FdEventHandlerPtr)); if(server->connection == NULL) { do_log(L_ERROR, "Couldn't allocate server.\n"); free(server->connection); free(server); return NULL; } server->maxslots = serverMaxSlots; server->name = strdup(name); if(server->name == NULL) { do_log(L_ERROR, "Couldn't allocate server name.\n"); free(server); return NULL; } server->port = port; server->addrindex = 0; server->isProxy = proxy; server->version = HTTP_UNKNOWN; server->persistent = 0; server->pipeline = 0; server->time = current_time.tv_sec; server->rtt = -1; server->rate = -1; server->numslots = MIN(serverSlots, server->maxslots); for(i = 0; i < server->maxslots; i++) { server->connection[i] = NULL; server->idleHandler[i] = NULL; } server->request = NULL; server->request_last = NULL; server->lies = 0; server->next = servers; servers = server; return server; } int httpServerQueueRequest(HTTPServerPtr server, HTTPRequestPtr request) { assert(request->request && request->request->request == request); assert(request->connection == NULL); if(server->request) { server->request_last->next = request; server->request_last = request; } else { server->request_last = request; server->request = request; } return 1; } void httpServerAbort(HTTPConnectionPtr connection, int fail, int code, AtomPtr message) { HTTPRequestPtr request = connection->request; if(request) { if(request->request) { httpClientError(request->request, code, retainAtom(message)); } if(fail) { request->object->flags |= OBJECT_FAILED; if(request->object->flags & OBJECT_INITIAL) abortObject(request->object, code, retainAtom(message)); notifyObject(request->object); } } releaseAtom(message); if(!connection->connecting) httpServerFinish(connection, 1, 0); } void httpServerAbortRequest(HTTPRequestPtr request, int fail, int code, AtomPtr message) { if(request->connection && request == request->connection->request) { httpServerAbort(request->connection, fail, code, message); } else { HTTPRequestPtr requestor = request->request; if(requestor) { requestor->request = NULL; request->request = NULL; httpClientError(requestor, code, retainAtom(message)); } if(fail) { request->object->flags |= OBJECT_FAILED; if(request->object->flags & OBJECT_INITIAL) abortObject(request->object, code, retainAtom(message)); notifyObject(request->object); } releaseAtom(message); } } void httpServerClientReset(HTTPRequestPtr request) { if(request->connection && request->connection->fd >= 0 && !request->connection->connecting && request->connection->request == request) pokeFdEvent(request->connection->fd, -ECLIENTRESET, POLLIN | POLLOUT); } int httpMakeServerRequest(char *name, int port, ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor) { HTTPServerPtr server; HTTPRequestPtr request; int rc; assert(!(object->flags & OBJECT_INPROGRESS)); if(parentHost) { server = getServer(parentHost->string, parentPort, 1); } else { server = getServer(name, port, 0); } if(server == NULL) return -1; object->flags |= OBJECT_INPROGRESS; object->requestor = requestor; request = httpMakeRequest(); if(!request) { do_log(L_ERROR, "Couldn't allocate request.\n"); return -1; } /* Because we allocate objects in chunks, we cannot have data that doesn't start at a chunk boundary. */ if(from % CHUNK_SIZE != 0) { if(allowUnalignedRangeRequests) { objectFillFromDisk(object, from / CHUNK_SIZE * CHUNK_SIZE, 1); if(objectHoleSize(object, from - 1) != 0) from = from / CHUNK_SIZE * CHUNK_SIZE; } else { from = from / CHUNK_SIZE * CHUNK_SIZE; } } request->object = retainObject(object); request->method = method; if(method == METHOD_CONDITIONAL_GET) { if(server->lies > 0) request->method = METHOD_HEAD; } request->flags = REQUEST_PERSISTENT | (expectContinue ? (requestor->flags & REQUEST_WAIT_CONTINUE) : 0); request->from = from; request->to = to; request->request = requestor; requestor->request = request; request->cache_control = requestor->cache_control; request->time0 = null_time; request->time1 = null_time; rc = httpServerQueueRequest(server, request); if(rc < 0) { do_log(L_ERROR, "Couldn't queue request.\n"); request->request = NULL; requestor->request = NULL; object->flags &= ~(OBJECT_INPROGRESS | OBJECT_VALIDATING); releaseNotifyObject(object); httpDestroyRequest(request); return 1; } if(request->flags & REQUEST_WAIT_CONTINUE) { if(server->version == HTTP_10) { httpServerAbortRequest(request, 1, 417, internAtom("Expectation failed")); return 1; } } else if(expectContinue >= 2 && server->version == HTTP_11) { if(request->method == METHOD_POST || request->method == METHOD_PUT) request->flags |= REQUEST_WAIT_CONTINUE; } again: rc = httpServerTrigger(server); if(rc < 0) { /* We must be very short on memory. If there are any requests queued, we abort one and try again. If there aren't, we give up. */ do_log(L_ERROR, "Couldn't trigger server -- out of memory?\n"); if(server->request) { httpServerAbortRequest(server->request, 1, 503, internAtom("Couldn't trigger server")); goto again; } } return 1; } int httpServerConnection(HTTPServerPtr server) { HTTPConnectionPtr connection; int i; connection = httpMakeConnection(); if(connection == NULL) { do_log(L_ERROR, "Couldn't allocate server connection.\n"); return -1; } connection->server = server; for(i = 0; i < server->numslots; i++) { if(!server->connection[i]) { server->connection[i] = connection; break; } } assert(i < server->numslots); connection->request = NULL; connection->request_last = NULL; do_log(D_SERVER_CONN, "C... %s:%d.\n", connection->server->name, connection->server->port); httpSetTimeout(connection, serverTimeout); if(socksParentProxy) { connection->connecting = CONNECTING_SOCKS; do_socks_connect(server->name, connection->server->port, httpServerSocksHandler, connection); } else { connection->connecting = CONNECTING_DNS; do_gethostbyname(server->name, 0, httpServerConnectionDnsHandler, connection); } return 1; } int httpServerConnectionDnsHandler(int status, GethostbynameRequestPtr request) { HTTPConnectionPtr connection = request->data; httpSetTimeout(connection, -1); if(status <= 0) { AtomPtr message; message = internAtomF("Host %s lookup failed: %s", request->name ? request->name->string : "(unknown)", request->error_message ? request->error_message->string : pstrerror(-status)); do_log(L_ERROR, "Host %s lookup failed: %s (%d).\n", request->name ? request->name->string : "(unknown)", request->error_message ? request->error_message->string : pstrerror(-status), -status); connection->connecting = 0; if(connection->server->request) httpServerAbortRequest(connection->server->request, 1, 504, retainAtom(message)); httpServerAbort(connection, 1, 502, message); return 1; } if(request->addr->string[0] == DNS_CNAME) { if(request->count > 10) { AtomPtr message = internAtom("DNS CNAME loop"); do_log(L_ERROR, "DNS CNAME loop.\n"); connection->connecting = 0; if(connection->server->request) httpServerAbortRequest(connection->server->request, 1, 504, retainAtom(message)); httpServerAbort(connection, 1, 504, message); return 1; } httpSetTimeout(connection, serverTimeout); do_gethostbyname(request->addr->string + 1, request->count + 1, httpServerConnectionDnsHandler, connection); return 1; } connection->connecting = CONNECTING_CONNECT; httpSetTimeout(connection, serverTimeout); do_connect(retainAtom(request->addr), connection->server->addrindex, connection->server->port, httpServerConnectionHandler, connection); return 1; } int httpServerConnectionHandler(int status, FdEventHandlerPtr event, ConnectRequestPtr request) { HTTPConnectionPtr connection = request->data; assert(connection->fd < 0); if(request->fd >= 0) { int rc; connection->fd = request->fd; connection->server->addrindex = request->index; rc = setNodelay(connection->fd, 1); if(rc < 0) do_log_error(L_WARN, errno, "Couldn't disable Nagle's algorithm"); } return httpServerConnectionHandlerCommon(status, connection); } int httpServerSocksHandler(int status, SocksRequestPtr request) { HTTPConnectionPtr connection = request->data; assert(connection->fd < 0); if(request->fd >= 0) { connection->fd = request->fd; connection->server->addrindex = 0; } return httpServerConnectionHandlerCommon(status, connection); } int httpServerConnectionHandlerCommon(int status, HTTPConnectionPtr connection) { httpSetTimeout(connection, -1); if(status < 0) { AtomPtr message = internAtomError(-status, "Connect to %s:%d failed", connection->server->name, connection->server->port); if(status != -ECLIENTRESET) do_log_error(L_ERROR, -status, "Connect to %s:%d failed", connection->server->name, connection->server->port); connection->connecting = 0; if(connection->server->request) httpServerAbortRequest(connection->server->request, status != -ECLIENTRESET, 504, retainAtom(message)); httpServerAbort(connection, status != -ECLIENTRESET, 504, message); return 1; } do_log(D_SERVER_CONN, "C %s:%d.\n", connection->server->name, connection->server->port); connection->connecting = 0; /* serverTrigger will take care of inserting any timeouts */ httpServerTrigger(connection->server); return 1; } int httpServerIdleHandler(int a, FdEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; HTTPServerPtr server = connection->server; int i; assert(!connection->request); do_log(D_SERVER_CONN, "Idle connection to %s:%d died.\n", connection->server->name, connection->server->port); for(i = 0; i < server->maxslots; i++) { if(connection == server->connection[i]) { server->idleHandler[i] = NULL; break; } } assert(i < server->maxslots); httpServerAbort(connection, 1, 504, internAtom("Timeout")); return 1; } /* Discard aborted requests at the head of the queue. */ static void httpServerDiscardRequests(HTTPServerPtr server) { HTTPRequestPtr request; while(server->request && !server->request->request) { request = server->request; server->request = request->next; request->next = NULL; if(server->request == NULL) server->request_last = NULL; request->object->flags &= ~(OBJECT_INPROGRESS | OBJECT_VALIDATING); releaseNotifyObject(request->object); request->object = NULL; httpDestroyRequest(request); } } static int pipelineIsSmall(HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; if(pipelineAdditionalRequests <= 0) return 0; else if(pipelineAdditionalRequests >= 2) return 1; if(!request) return 1; if(request->next || !(request->flags & REQUEST_PERSISTENT)) return 0; if(request->method == METHOD_HEAD || request->method == METHOD_CONDITIONAL_GET) return 1; if(request->to >= 0 && connection->server->rate > 0 && request->to - request->from < connection->server->rate * smallRequestTime) return 1; return 0; } static int numRequests(HTTPServerPtr server) { int n = 0; HTTPRequestPtr request = server->request; while(request) { n++; request = request->next; } return n; } HTTPConnectionPtr httpServerGetConnection(HTTPServerPtr server, int *idle_return) { int i, j; int connecting = 0, empty = 0, idle = 0; j = -1; /* Try to find an idle connection */ for(i = 0; i < server->numslots; i++) { if(server->connection[i]) { if(!server->connection[i]->connecting) { if(!server->connection[i]->request) { if(server->idleHandler[i]) unregisterFdEvent(server->idleHandler[i]); server->idleHandler[i] = NULL; if(j < 0) j = i; idle++; } } else connecting++; } else empty++; } if(j >= 0) { *idle_return = idle; return server->connection[j]; } /* If there's an empty slot, schedule connection creation */ if(empty) { /* Don't open a connection if there are already enough in progress, except if the server doesn't do persistent connections and there's only one in progress. */ if((connecting == 0 || (server->persistent <= 0 && connecting <= 1)) || connecting < numRequests(server)) { httpServerConnection(server); } } /* Find a connection that can accept additional requests */ if(server->version == HTTP_11 && server->pipeline >= 4) { for(i = 0; i < serverSlots; i++) { if(server->connection[i] && !server->connection[i]->connecting && pipelineIsSmall(server->connection[i])) { if(server->idleHandler[i]) unregisterFdEvent(server->idleHandler[i]); server->idleHandler[i] = NULL; *idle_return = 0; return server->connection[i]; } } } *idle_return = idle; return NULL; } int httpServerTrigger(HTTPServerPtr server) { HTTPConnectionPtr connection; HTTPRequestPtr request; int idle, n, i, rc, numidle; while(server->request) { httpServerDiscardRequests(server); if(!server->request) break; if(REQUEST_SIDE(server->request)) { rc = httpServerSideRequest(server); /* If rc is 0, httpServerSideRequest didn't dequeue this request. Go through the scheduling loop again, come back later. */ if(rc <= 0) break; continue; } connection = httpServerGetConnection(server, &numidle); if(!connection) break; /* If server->pipeline <= 0, we don't do pipelining. If server->pipeline is 1, then we are ready to start probing for pipelining on the server; we then send exactly two requests in what is hopefully a single packet to check whether the server has the nasty habit of discarding its input buffers after each request. If server->pipeline is 2 or 3, the pipelining probe is in progress on this server, and we don't pipeline anything until it succeeds. When server->pipeline >= 4, pipelining is believed to work on this server. */ if(server->version != HTTP_11 || server->pipeline <= 0 || server->pipeline == 2 || server->pipeline == 3) { if(connection->pipelined == 0) n = 1; else n = 0; } else if(server->pipeline == 1) { if(connection->pipelined == 0) n = MIN(2, maxPipelineTrain); else n = 0; } else { n = maxPipelineTrain; } /* Don't pipeline if there are more idle connections */ if(numidle >= 2) n = MIN(n, 1); idle = !connection->pipelined; i = 0; while(server->request && connection->pipelined < n) { httpServerDiscardRequests(server); if(!server->request) break; request = server->request; assert(request->request->request == request); rc = httpWriteRequest(connection, request, -1); if(rc < 0) { if(i == 0) httpServerAbortRequest(request, rc != -ECLIENTRESET, 503, internAtom("Couldn't " "write request")); break; } do_log(D_SERVER_CONN, "W: "); do_log_n(D_SERVER_CONN, request->object->key, request->object->key_size); do_log(D_SERVER_CONN, " (%d)\n", request->method); if(connection->pipelined > 0) request->flags |= REQUEST_PIPELINED; request->time0 = current_time; i++; server->request = request->next; request->next = NULL; if(server->request == NULL) server->request_last = NULL; httpQueueRequest(connection, request); connection->pipelined++; } if(server->persistent > 0 && server->pipeline == 1 && i >= 2) server->pipeline = 2; if(i > 0) httpServerSendRequest(connection); if(idle && connection->pipelined > 0) httpServerReply(connection, 0); if(i == 0) break; } for(i = 0; i < server->maxslots; i++) { if(server->connection[i] && !server->connection[i]->connecting && !server->connection[i]->request) { /* Artificially age any fresh connections that aren't used straight away; this is necessary for the logic for POST and the logic that determines whether a given request should be restarted. */ if(server->connection[i]->serviced == 0) server->connection[i]->serviced = 1; if(!server->idleHandler[i]) server->idleHandler[i] = registerFdEvent(server->connection[i]->fd, POLLIN, httpServerIdleHandler, sizeof(HTTPConnectionPtr), &server->connection[i]); if(!server->idleHandler[i]) { do_log(L_ERROR, "Couldn't register idle handler.\n"); httpServerFinish(server->connection[i], 1, 0); } httpSetTimeout(server->connection[i], serverIdleTimeout); } } return 1; } int httpServerSideRequest(HTTPServerPtr server) { HTTPRequestPtr request = server->request; HTTPConnectionPtr connection; HTTPRequestPtr requestor = request->request; HTTPConnectionPtr client = requestor->connection; int rc, i, freeslots, idle, connecting; assert(REQUEST_SIDE(request)); connection = NULL; freeslots = 0; idle = -1; connecting = 0; /* Find a fresh connection */ for(i = 0; i < server->numslots; i++) { if(!server->connection[i]) freeslots++; else if(!server->connection[i]->connecting) { if(!server->connection[i]->request) { if(server->connection[i]->serviced == 0) { if(server->idleHandler[i]) unregisterFdEvent(server->idleHandler[i]); server->idleHandler[i] = NULL; connection = server->connection[i]; break; } else { idle = i; } } } else { connecting++; } } if(!connection) { /* Make sure that a fresh connection will be established at some point, then wait until httpServerTrigger calls us again. */ if(freeslots) { httpServerConnection(server); } else { if(idle >= 0) { /* Shutdown a random idle connection */ pokeFdEvent(server->connection[idle]->fd, -EDOSHUTDOWN, POLLIN | POLLOUT); } } return 0; } rc = httpWriteRequest(connection, request, client->bodylen); if(rc < 0) { do_log(L_ERROR, "Couldn't write POST or PUT request.\n"); httpServerAbortRequest(request, rc != -ECLIENTRESET, 503, internAtom("Couldn't write request")); return 0; } server->request = request->next; request->next = NULL; if(server->request == NULL) server->request_last = NULL; httpQueueRequest(connection, request); connection->pipelined = 1; request->time0 = current_time; connection->reqoffset = 0; connection->bodylen = client->bodylen; httpServerDoSide(connection); return 1; } int httpServerDoSide(HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; HTTPRequestPtr requestor = request->request; HTTPConnectionPtr client = requestor->connection; int len = MIN(client->reqlen - client->reqbegin, connection->bodylen - connection->reqoffset); int doflush = len > 0 && (len >= maxSideBuffering || client->reqbegin > 0 || (connection->reqoffset + client->reqlen - client->reqbegin) >= connection->bodylen); int done = connection->reqoffset >= connection->bodylen; assert(connection->bodylen >= 0); httpSetTimeout(connection, 60); if(connection->reqlen > 0) { /* Send the headers, but don't send any part of the body if we're in wait_continue. */ do_stream_2(IO_WRITE, connection->fd, 0, connection->reqbuf, connection->reqlen, client->reqbuf + client->reqbegin, (request->flags & REQUEST_WAIT_CONTINUE) ? 0 : len, httpServerSideHandler2, connection); httpServerReply(connection, 0); } else if(request->object->flags & OBJECT_ABORTED) { if(connection->reqbuf) dispose_chunk(connection->reqbuf); connection->reqbuf = NULL; connection->reqlen = 0; pokeFdEvent(connection->fd, -ESHUTDOWN, POLLIN); if(client->flags & CONN_READER) { client->flags |= CONN_SIDE_READER; do_stream(IO_READ | IO_IMMEDIATE | IO_NOTNOW, client->fd, 0, NULL, 0, httpClientSideHandler, client); } } else if(!(request->flags & REQUEST_WAIT_CONTINUE) && doflush) { /* Make sure there's a reqbuf, as httpServerFinish uses it to determine if there's a writer. */ if(connection->reqbuf == NULL) connection->reqbuf = get_chunk(); assert(connection->reqbuf != NULL); do_stream(IO_WRITE, connection->fd, 0, client->reqbuf + client->reqbegin, len, httpServerSideHandler, connection); } else { if(connection->reqbuf) { httpConnectionDestroyReqbuf(connection); connection->reqlen = 0; } if(request->flags & REQUEST_WAIT_CONTINUE) { do_log(D_SERVER_CONN, "W... %s:%d.\n", connection->server->name, connection->server->port); return 1; } client->flags |= CONN_SIDE_READER; do_stream(IO_READ | (done ? IO_IMMEDIATE : 0 ) | IO_NOTNOW, client->fd, client->reqlen, client->reqbuf, CHUNK_SIZE, httpClientSideHandler, client); } return 1; } static int httpClientDelayedDoSideHandler(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; httpServerDoSide(connection); return 1; } static int httpServerDelayedDoSide(HTTPConnectionPtr connection) { TimeEventHandlerPtr handler; handler = scheduleTimeEvent(1, httpClientDelayedDoSideHandler, sizeof(connection), &connection); if(!handler) { do_log(L_ERROR, "Couldn't schedule DoSide -- freeing memory.\n"); free_chunk_arenas(); handler = scheduleTimeEvent(1, httpClientDelayedDoSideHandler, sizeof(connection), &connection); do_log(L_ERROR, "Couldn't schedule DoSide.\n"); /* Somebody will hopefully end up timing out. */ return 1; } return 1; } static int httpServerSideHandlerCommon(int kind, int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; HTTPRequestPtr requestor = request->request; HTTPConnectionPtr client = requestor->connection; int bodylen; assert(request->object->flags & OBJECT_INPROGRESS); if(status) { do_log_error(L_ERROR, -status, "Couldn't write to server"); httpConnectionDestroyReqbuf(connection); if(status != -ECLIENTRESET) shutdown(connection->fd, 2); abortObject(request->object, 503, internAtom("Couldn't write to server")); /* Let the read side handle the error */ httpServerDoSide(connection); return 1; } assert(srequest->offset > 0); if(kind == 2) { if(srequest->offset < connection->reqlen) return 0; bodylen = srequest->offset - connection->reqlen; connection->reqlen = 0; httpConnectionDestroyReqbuf(connection); } else { bodylen = srequest->offset; } assert(client->reqbegin + bodylen <= client->reqlen); if(client->reqlen > client->reqbegin + bodylen) memmove(client->reqbuf, client->reqbuf + client->reqbegin + bodylen, client->reqlen - client->reqbegin - bodylen); client->reqlen -= bodylen + client->reqbegin; client->reqbegin = 0; connection->reqoffset += bodylen; httpServerDoSide(connection); return 1; } int httpServerSideHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { return httpServerSideHandlerCommon(1, status, event, srequest); } int httpServerSideHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { return httpServerSideHandlerCommon(2, status, event, srequest); } /* s is 0 to keep the connection alive, 1 to shutdown the connection */ void httpServerFinish(HTTPConnectionPtr connection, int s, int offset) { HTTPServerPtr server = connection->server; HTTPRequestPtr request = connection->request; int i; if(request) { assert(connection->pipelined >= 1); assert((connection->pipelined > 1) == (request->next != NULL)); } else { assert(connection->pipelined == 0); } if(!s && (!connection->request || !(connection->request->flags & REQUEST_PERSISTENT))) s = 1; if(connection->serviced >= maxConnectionRequests || connection->time < current_time.tv_sec - maxConnectionAge) s = 1; if(connection->reqbuf) { /* As most normal requests go out in a single packet, this is extremely unlikely to happen. As for POST/PUT requests, they are not pipelined, so this can only happen if the server sent an error reply early. */ assert(connection->fd >= 0); shutdown(connection->fd, 1); pokeFdEvent(connection->fd, -EDOSHUTDOWN, POLLOUT); httpServerDelayedFinish(connection); goto done; } if(request) { /* Update statistics about the server */ int size = -1, d = -1, rtt = -1, rate = -1; if(connection->offset > 0 && request->from >= 0) size = connection->offset - request->from; if(request->time1.tv_sec != null_time.tv_sec) { d = timeval_minus_usec(¤t_time, &request->time1); if(!(request->flags & REQUEST_PIPELINED) && request->time0.tv_sec != null_time.tv_sec) rtt = timeval_minus_usec(&request->time1, &request->time0); if(size >= 8192 && d > 50000) rate = ((double)size / (double)d) * 1000000.0 + 0.5; } request->time0 = null_time; request->time1 = null_time; if(rtt >= 0) { if(server->rtt >= 0) server->rtt = (3 * server->rtt + rtt + 2) / 4; else server->rtt = rtt; } if(rate >= 0) { if(server->rate >= 0) server->rate = (3 * server->rate + rate + 2) / 4; else server->rate = rate; } httpDequeueRequest(connection); connection->pipelined--; request->object->flags &= ~(OBJECT_INPROGRESS | OBJECT_VALIDATING); if(request->request) { request->request->request = NULL; request->request = NULL; } releaseNotifyObject(request->object); request->object = NULL; httpDestroyRequest(request); } do_log(D_SERVER_CONN, "Done with server %s:%d connection (%d)\n", connection->server->name, connection->server->port, s); assert(offset <= connection->len); if(!s) { if(offset < connection->len) { assert(connection->buf != NULL); if(!connection->pipelined) { do_log(L_WARN, "Closing connection to %s:%d: " "%d stray bytes of data.\n", server->name, server->port, connection->len - offset); s = 1; } else { memmove(connection->buf, connection->buf + offset, connection->len - offset); connection->len = connection->len - offset; if((connection->flags & CONN_BIGBUF) && connection->len <= CHUNK_SIZE) httpConnectionUnbigify(connection); } } else { connection->len = 0; } } connection->server->time = current_time.tv_sec; connection->serviced++; if(s) { if(connection->timeout) cancelTimeEvent(connection->timeout); connection->timeout = NULL; httpConnectionDestroyBuf(connection); if(connection->fd >= 0) CLOSE(connection->fd); connection->fd = -1; server->persistent -= 1; if(server->persistent < -5) server->numslots = MIN(server->maxslots, serverMaxSlots); if(connection->request) { HTTPRequestPtr req; do_log(D_SERVER_CONN, "Restarting pipeline to %s:%d.\n", server->name, server->port); if(server->pipeline == 2) server->pipeline -= 20; else server->pipeline -= 5; req = connection->request; while(req) { req->connection = NULL; req = req->next; } if(server->request) connection->request_last->next = server->request; else server->request_last = connection->request_last; server->request = connection->request; connection->request = NULL; connection->request_last = NULL; } /* Make sure we don't get confused into thinking a probe is in progress. */ if(server->pipeline == 2 || server->pipeline == 3) server->pipeline = 1; for(i = 0; i < server->maxslots; i++) if(connection == server->connection[i]) break; assert(i < server->maxslots); if(server->idleHandler[i]) unregisterFdEvent(server->idleHandler[i]); server->idleHandler[i] = NULL; server->connection[i] = NULL; free(connection); } else { server->persistent += 1; if(server->persistent > 0) server->numslots = MIN(server->maxslots, server->version == HTTP_10 ? serverSlots1 : serverSlots); httpSetTimeout(connection, serverTimeout); /* See httpServerTrigger */ if(connection->pipelined || (server->version == HTTP_11 && server->pipeline <= 0) || (server->pipeline == 3)) { server->pipeline++; } if(connection->pipelined) { httpServerReply(connection, 1); } else { httpConnectionDestroyBuf(connection); } } done: httpServerTrigger(server); } static int httpServerDelayedFinishHandler(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; httpServerFinish(connection, 1, 0); return 1; } static void httpServerDelayedFinish(HTTPConnectionPtr connection) { TimeEventHandlerPtr handler; handler = scheduleTimeEvent(1, httpServerDelayedFinishHandler, sizeof(connection), &connection); if(!handler) { do_log(L_ERROR, "Couldn't schedule delayed finish -- freeing memory."); free_chunk_arenas(); handler = scheduleTimeEvent(1, httpServerDelayedFinishHandler, sizeof(connection), &connection); if(!handler) { do_log(L_ERROR, "Couldn't schedule delayed finish -- aborting.\n"); polipoExit(); } } } void httpServerReply(HTTPConnectionPtr connection, int immediate) { assert(connection->pipelined > 0); if(connection->request->request == NULL) { do_log(L_WARN, "Aborting pipeline on %s:%d.\n", connection->server->name, connection->server->port); httpServerFinish(connection, 1, 0); return; } do_log(D_SERVER_CONN, "R: "); do_log_n(D_SERVER_CONN, connection->request->object->key, connection->request->object->key_size); do_log(D_SERVER_CONN, " (%d)\n", connection->request->method); if(connection->len == 0) httpConnectionDestroyBuf(connection); httpSetTimeout(connection, serverTimeout); do_stream_buf(IO_READ | (immediate ? IO_IMMEDIATE : 0) | IO_NOTNOW, connection->fd, connection->len, &connection->buf, CHUNK_SIZE, httpServerReplyHandler, connection); } int httpConnectionPipelined(HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; int i = 0; while(request) { i++; request = request->next; } return i; } void httpServerUnpipeline(HTTPRequestPtr request) { HTTPConnectionPtr connection = request->connection; HTTPServerPtr server = connection->server; request->flags &= ~REQUEST_PERSISTENT; if(request->next) { HTTPRequestPtr req; do_log(L_WARN, "Restarting pipeline to %s:%d.\n", connection->server->name, connection->server->port); req = request->next; while(req) { req->connection = NULL; req = req->next; } if(server->request) connection->request_last->next = server->request; else server->request_last = connection->request_last; server->request = request->next; request->next = NULL; connection->request_last = request; } connection->pipelined = httpConnectionPipelined(connection); } void httpServerRestart(HTTPConnectionPtr connection) { HTTPServerPtr server = connection->server; HTTPRequestPtr request = connection->request; if(request) { HTTPRequestPtr req; if(request->next) do_log(L_WARN, "Restarting pipeline to %s:%d.\n", connection->server->name, connection->server->port); req = request; while(req) { req->connection = NULL; req = req->next; } if(server->request) connection->request_last->next = server->request; else server->request_last = connection->request_last; server->request = request; connection->request = NULL; connection->request_last = NULL; } connection->pipelined = 0; httpServerFinish(connection, 1, 0); } int httpServerRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor, void *closure) { int rc; char name[132]; int port; int x, y, z; assert(from >= 0 && (to < 0 || to > from)); assert(closure == NULL); assert(!(object->flags & OBJECT_LOCAL)); assert(object->type == OBJECT_HTTP); if(object->flags & OBJECT_INPROGRESS) return 1; if(requestor->flags & REQUEST_REQUESTED) return 0; assert(requestor->request == NULL); if(proxyOffline) return -1; rc = parseUrl(object->key, object->key_size, &x, &y, &port, &z); if(rc < 0 || x < 0 || y < 0 || y - x > 131) { do_log(L_ERROR, "Couldn't parse URL: "); do_log_n(L_ERROR, object->key, object->key_size); do_log(L_ERROR, "\n"); abortObject(object, 400, internAtom("Couldn't parse URL")); notifyObject(object); return 1; } if(!intListMember(port, allowedPorts)) { do_log(L_ERROR, "Attempted connection to port %d.\n", port); abortObject(object, 403, internAtom("Forbidden port")); notifyObject(object); return 1; } memcpy(name, ((char*)object->key) + x, y - x); name[y - x] = '\0'; requestor->flags |= REQUEST_REQUESTED; rc = httpMakeServerRequest(name, port, object, method, from, to, requestor); if(rc < 0) { abortObject(object, 503, internAtom("Couldn't schedule server request")); notifyObject(object); return 1; } return 1; } int httpWriteRequest(HTTPConnectionPtr connection, HTTPRequestPtr request, int bodylen) { ObjectPtr object = request->object; int from = request->from, to = request->to, method = request->method; char *url = object->key, *m; int url_size = object->key_size; int x, y, port, z, location_size; char *location; int l, n, rc, bufsize; assert(method != METHOD_NONE); if(request->method == METHOD_GET || request->method == METHOD_CONDITIONAL_GET) { if(to >= 0) { assert(to >= from); if(to == from) { do_log(L_ERROR, "Requesting empty segment?\n"); return -1; } } if(object->flags & OBJECT_DYNAMIC) { from = 0; to = -1; } else { objectFillFromDisk(object, from / CHUNK_SIZE * CHUNK_SIZE, 1); l = objectHoleSize(request->object, from); if(l > 0) { if(to <= 0 || to > from + l) to = from + l; } if(pmmSize && connection->server->pipeline >= 4) { if(from == 0) to = to < 0 ? pmmFirstSize : MIN(to, pmmFirstSize); else to = to < 0 ? from + pmmSize : MIN(to, from + pmmSize); } if(from % CHUNK_SIZE != 0) if(objectHoleSize(object, from - 1) != 0) from = from / CHUNK_SIZE * CHUNK_SIZE; } } rc = parseUrl(url, url_size, &x, &y, &port, &z); if(rc < 0 || x < 0 || y < 0) { return -1; } if(connection->reqbuf == NULL) { connection->reqbuf = get_chunk(); if(connection->reqbuf == NULL) return -1; connection->reqlen = 0; } if(method == METHOD_CONDITIONAL_GET && object->last_modified < 0 && object->etag == NULL) method = request->method = METHOD_GET; again: bufsize = (connection->flags & CONN_BIGREQBUF) ? bigBufferSize : CHUNK_SIZE; n = connection->reqlen; switch(method) { case METHOD_GET: case METHOD_CONDITIONAL_GET: m = "GET"; break; case METHOD_HEAD: m = "HEAD"; break; case METHOD_POST: m = "POST"; break; case METHOD_PUT: m = "PUT"; break; default: abort(); } n = snnprintf(connection->reqbuf, n, bufsize, "%s ", m); if(connection->server->isProxy) { n = snnprint_n(connection->reqbuf, n, bufsize, url, url_size); } else { if(url_size - z == 0) { location = "/"; location_size = 1; } else { location = url + z; location_size = url_size - z; } n = snnprint_n(connection->reqbuf, n, bufsize, location, location_size); } do_log(D_SERVER_REQ, "Server request: "); do_log_n(D_SERVER_REQ, url + x, y - x); do_log(D_SERVER_REQ, ": "); do_log_n(D_SERVER_REQ, connection->reqbuf, n); do_log(D_SERVER_REQ, " (method %d from %d to %d, 0x%lx for 0x%lx)\n", method, from, to, (unsigned long)connection, (unsigned long)object); n = snnprintf(connection->reqbuf, n, bufsize, " HTTP/1.1"); n = snnprintf(connection->reqbuf, n, bufsize, "\r\nHost: "); n = snnprint_n(connection->reqbuf, n, bufsize, url + x, y - x); if(port != 80) n = snnprintf(connection->reqbuf, n, bufsize, ":%d", port); if(connection->server->isProxy && parentAuthCredentials) { n = buildServerAuthHeaders(connection->reqbuf, n, bufsize, parentAuthCredentials); } if(bodylen >= 0) n = snnprintf(connection->reqbuf, n, bufsize, "\r\nContent-Length: %d", bodylen); if(request->flags & REQUEST_WAIT_CONTINUE) n = snnprintf(connection->reqbuf, n, bufsize, "\r\nExpect: 100-continue"); if(method != METHOD_HEAD && (from > 0 || to >= 0)) { if(to >= 0) { n = snnprintf(connection->reqbuf, n, bufsize, "\r\nRange: bytes=%d-%d", from, to - 1); } else { n = snnprintf(connection->reqbuf, n, bufsize, "\r\nRange: bytes=%d-", from); } } if(method == METHOD_GET && object->etag && (from > 0 || to >= 0)) { if(request->request && request->request->request == request && request->request->from == 0 && request->request->to == -1 && pmmSize == 0 && pmmFirstSize == 0) n = snnprintf(connection->reqbuf, n, bufsize, "\r\nIf-Range: \"%s\"", object->etag); } if(method == METHOD_CONDITIONAL_GET) { if(object->last_modified >= 0) { n = snnprintf(connection->reqbuf, n, bufsize, "\r\nIf-Modified-Since: "); n = format_time(connection->reqbuf, n, bufsize, object->last_modified); } if(object->etag) { n = snnprintf(connection->reqbuf, n, bufsize, "\r\nIf-None-Match: \"%s\"", object->etag); } } n = httpPrintCacheControl(connection->reqbuf, n, bufsize, 0, &request->cache_control); if(n < 0) goto fail; if(request->request && request->request->headers) { n = snnprint_n(connection->reqbuf, n, bufsize, request->request->headers->string, request->request->headers->length); } if(!disableVia) { if(request->request && request->request->via) { n = snnprintf(connection->reqbuf, n, bufsize, "\r\nVia: %s, 1.1 %s", request->request->via->string, proxyName->string); } else { n = snnprintf(connection->reqbuf, n, bufsize, "\r\nVia: 1.1 %s", proxyName->string); } } n = snnprintf(connection->reqbuf, n, bufsize, "\r\nConnection: %s\r\n\r\n", (request->flags & REQUEST_PERSISTENT) ? "keep-alive" : "close"); if(n < 0 || n >= bufsize - 1) goto fail; connection->reqlen = n; return n; fail: rc = 0; if(!(connection->flags & CONN_BIGREQBUF)) rc = httpConnectionBigifyReqbuf(connection); if(rc == 1) goto again; return -1; } int httpServerHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; AtomPtr message; assert(connection->request->object->flags & OBJECT_INPROGRESS); if(connection->reqlen == 0) { do_log(D_SERVER_REQ, "Writing aborted on 0x%lx\n", (unsigned long)connection); goto fail; } if(status == 0 && !streamRequestDone(srequest)) { httpSetTimeout(connection, serverTimeout); return 0; } httpConnectionDestroyReqbuf(connection); if(status) { if(connection->serviced >= 1) { httpServerRestart(connection); return 1; } if(status >= 0 || status == ECONNRESET) { message = internAtom("Couldn't send request to server: " "short write"); } else { if(status != -EPIPE) do_log_error(L_ERROR, -status, "Couldn't send request to server"); message = internAtomError(-status, "Couldn't send request to server"); } goto fail; } return 1; fail: dispose_chunk(connection->reqbuf); connection->reqbuf = NULL; shutdown(connection->fd, 2); pokeFdEvent(connection->fd, -EDOSHUTDOWN, POLLIN); httpSetTimeout(connection, 60); return 1; } int httpServerSendRequest(HTTPConnectionPtr connection) { assert(connection->server); if(connection->reqlen == 0) { do_log(D_SERVER_REQ, "Writing aborted on 0x%lx\n", (unsigned long)connection); httpConnectionDestroyReqbuf(connection); shutdown(connection->fd, 2); pokeFdEvent(connection->fd, -EDOSHUTDOWN, POLLIN | POLLOUT); return -1; } httpSetTimeout(connection, serverTimeout); do_stream(IO_WRITE, connection->fd, 0, connection->reqbuf, connection->reqlen, httpServerHandler, connection); return 1; } int httpServerReplyHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; int i, body; int bufsize = (connection->flags & CONN_BIGBUF) ? bigBufferSize : CHUNK_SIZE; assert(request->object->flags & OBJECT_INPROGRESS); if(status < 0) { if(connection->serviced >= 1) { httpServerRestart(connection); return 1; } if(status != -ECLIENTRESET) do_log_error(L_ERROR, -status, "Read from server failed"); httpServerAbort(connection, status != -ECLIENTRESET, 502, internAtomError(-status, "Read from server failed")); return 1; } i = findEndOfHeaders(connection->buf, 0, srequest->offset, &body); connection->len = srequest->offset; if(i >= 0) { request->time1 = current_time; return httpServerHandlerHeaders(status, event, srequest, connection); } if(status) { if(connection->serviced >= 1) { httpServerRestart(connection); return 1; } if(status < 0) { do_log(L_ERROR, "Error reading server headers: %d\n", -status); httpServerAbort(connection, status != -ECLIENTRESET, 502, internAtomError(-status, "Error reading server headers")); } else httpServerAbort(connection, 1, 502, internAtom("Server dropped connection")); return 1; } if(connection->len >= bufsize) { int rc = 0; if(!(connection->flags & CONN_BIGBUF)) rc = httpConnectionBigify(connection); if(rc == 0) { do_log(L_ERROR, "Couldn't find end of server's headers.\n"); httpServerAbort(connection, 1, 502, internAtom("Couldn't find end " "of server's headers")); return 1; } else if(rc < 0) { do_log(L_ERROR, "Couldn't allocate big buffer.\n"); httpServerAbort(connection, 1, 500, internAtom("Couldn't allocate big buffer")); return 1; } /* Can't just return 0 -- buf has moved. */ do_stream(IO_READ, connection->fd, connection->len, connection->buf, bigBufferSize, httpServerReplyHandler, connection); return 1; } return 0; } int httpServerHandlerHeaders(int eof, FdEventHandlerPtr event, StreamRequestPtr srequest, HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; ObjectPtr object = request->object; int rc; int code, version; int full_len; AtomPtr headers; int len; int te; CacheControlRec cache_control; int age = -1; time_t date, last_modified, expires; struct timeval *init_time; char *etag; AtomPtr via, new_via; int expect_body; HTTPRangeRec content_range; ObjectPtr new_object = NULL, old_object = NULL; int supersede = 0; AtomPtr message = NULL; int suspectDynamic; AtomPtr url = NULL; int waiting = 0; assert(request->object->flags & OBJECT_INPROGRESS); assert(eof >= 0); httpSetTimeout(connection, -1); if(request->flags & REQUEST_WAIT_CONTINUE) { waiting = 1; do_log(D_SERVER_CONN, "W %s:%d.\n", connection->server->name, connection->server->port); request->flags &= ~REQUEST_WAIT_CONTINUE; } rc = httpParseServerFirstLine(connection->buf, &code, &version, &message); if(rc <= 0) { do_log(L_ERROR, "Couldn't parse server status line.\n"); httpServerAbort(connection, 1, 502, internAtom("Couldn't parse server status line")); return 1; } do_log(D_SERVER_REQ, "Server status: "); do_log_n(D_SERVER_REQ, connection->buf, connection->buf[rc - 1] == '\r' ? rc - 2 : rc - 2); do_log(D_SERVER_REQ, " (0x%lx for 0x%lx)\n", (unsigned long)connection, (unsigned long)object); if(version != HTTP_10 && version != HTTP_11) { do_log(L_ERROR, "Unknown server HTTP version\n"); httpServerAbort(connection, 1, 502, internAtom("Unknown server HTTP version")); releaseAtom(message); return 1; } connection->version = version; connection->server->version = version; request->flags |= REQUEST_PERSISTENT; url = internAtomN(object->key, object->key_size); rc = httpParseHeaders(0, url, connection->buf, rc, request, &headers, &len, &cache_control, NULL, &te, &date, &last_modified, &expires, NULL, NULL, NULL, &age, &etag, NULL, NULL, &content_range, NULL, &via, NULL); if(rc < 0) { do_log(L_ERROR, "Couldn't parse server headers\n"); releaseAtom(url); releaseAtom(message); httpServerAbort(connection, 1, 502, internAtom("Couldn't parse server headers")); return 1; } if(date < 0) date = current_time.tv_sec; object->code = code; if(code == 100) { if(!REQUEST_SIDE(request)) { httpServerAbort(connection, 1, 502, internAtom("Unexpected continue status")); goto fail; } releaseAtom(url); releaseAtom(message); /* We've already reset wait_continue above, but we must still ensure that the writer notices if it is waiting. The server may send continue status for POST or PUT requests, even when we don't expect it. */ if(waiting) { httpServerDelayedDoSide(connection); notifyObject(object); } connection->len -= rc; if(connection->len > 0) memmove(connection->buf, connection->buf + rc, connection->len); httpServerReply(connection, 1); return 1; } else if(waiting) { /* The server responded with something other than 100 Continue, but the client side is still has its flag set. Tell it to clear it now. */ notifyObject(object); } if(code == 101) { httpServerAbort(connection, 1, 501, internAtom("Upgrade not implemented")); goto fail; } if(via && !checkVia(proxyName, via)) { httpServerAbort(connection, 1, 504, internAtom("Proxy loop detected")); goto fail; } full_len = content_range.full_length; if(code == 206) { if(content_range.from == -1 || content_range.to == -1) { do_log(L_ERROR, "Partial content without range.\n"); httpServerAbort(connection, 1, 502, internAtom("Partial content without range")); goto fail; } if(len >= 0 && len != content_range.to - content_range.from) { do_log(L_ERROR, "Inconsistent partial content.\n"); httpServerAbort(connection, 1, 502, internAtom("Inconsistent partial content")); goto fail; } } else if(code < 400 && (content_range.from >= 0 || content_range.to >= 0 || content_range.full_length >= 0)) { do_log(L_WARN, "Range without partial content.\n"); /* Damn anakata. */ content_range.from = -1; content_range.to = -1; content_range.full_length = -1; } else if(code != 304 && code != 412) { full_len = len; } if(te != TE_IDENTITY && te != TE_CHUNKED) { do_log(L_ERROR, "Unsupported transfer-encoding\n"); httpServerAbort(connection, 1, 502, internAtom("Unsupported transfer-encoding")); goto fail; } if(code == 304) { if(request->method != METHOD_CONDITIONAL_GET) { do_log(L_ERROR, "Unexpected \"not changed\" reply from server\n"); httpServerAbort(connection, 1, 502, internAtom("Unexpected \"not changed\" " "reply from server")); goto fail; } if(object->etag && !etag) { /* RFC 2616 10.3.5. Violated by some front-end proxies. */ do_log(L_WARN, "\"Not changed\" reply with no ETag.\n"); } } if(code == 412) { if(request->method != METHOD_CONDITIONAL_GET || (!object->etag && !object->last_modified)) { do_log(L_ERROR, "Unexpected \"precondition failed\" reply from server.\n"); httpServerAbort(connection, 1, 502, internAtom("Unexpected \"precondition failed\" " "reply from server")); goto fail; } } releaseAtom(url); /* Okay, we're going to accept this reply. */ if((code == 200 || code == 206 || code == 304 || code == 412) && (cache_control.flags & (CACHE_NO | CACHE_NO_STORE) || cache_control.max_age == 0 || (cacheIsShared && cache_control.s_maxage == 0) || (expires >= 0 && expires <= object->age))) { do_log(L_UNCACHEABLE, "Uncacheable object "); do_log_n(L_UNCACHEABLE, object->key, object->key_size); do_log(L_UNCACHEABLE, " (%d)\n", cache_control.flags); } if(request->time0.tv_sec != null_time.tv_sec) init_time = &request->time0; else init_time = ¤t_time; age = MIN(init_time->tv_sec - age, init_time->tv_sec); if(request->method == METHOD_HEAD || code < 200 || code == 204 || code == 304) expect_body = 0; else if(te == TE_IDENTITY) expect_body = (len != 0); else expect_body = 1; connection->chunk_remaining = -1; connection->te = te; old_object = object; connection->server->lies--; if(object->cache_control & CACHE_MISMATCH) supersede = 1; if(code == 304 || code == 412) { if((object->etag && etag && strcmp(object->etag, etag) != 0) || (object->last_modified >= 0 && last_modified >= 0 && object->last_modified != last_modified)) { do_log(L_ERROR, "Inconsistent \"%s\" reply for ", code == 304 ? "not changed":"precondition failed"); do_log_n(L_ERROR, object->key, object->key_size); do_log(L_ERROR, "\n"); object->flags |= OBJECT_DYNAMIC; supersede = 1; } } else if(!(object->flags & OBJECT_INITIAL)) { if((object->last_modified < 0 || last_modified < 0) && (!object->etag || !etag)) supersede = 1; else if(object->last_modified != last_modified) supersede = 1; else if(object->etag || etag) { /* We need to be permissive here so as to deal with some front-end proxies that discard ETags on partial replies but not on full replies. */ if(etag && object->etag && strcmp(object->etag, etag) != 0) supersede = 1; else if(!object->etag) supersede = 1; } if(!supersede && (object->cache_control & CACHE_VARY) && dontTrustVaryETag >= 1) { /* Check content-type to work around mod_gzip bugs */ if(!httpHeaderMatch(atomContentType, object->headers, headers) || !httpHeaderMatch(atomContentEncoding, object->headers, headers)) supersede = 1; } if(full_len < 0 && te == TE_IDENTITY) { /* It's an HTTP/1.0 CGI. Be afraid. */ if(expect_body && content_range.from < 0 && content_range.to < 0) supersede = 1; } if(!supersede && object->length >= 0 && full_len >= 0 && object->length != full_len) { do_log(L_WARN, "Inconsistent length.\n"); supersede = 1; } if(!supersede && ((object->last_modified >= 0 && last_modified >= 0) || (object->etag && etag))) { if(request->method == METHOD_CONDITIONAL_GET) { do_log(L_WARN, "Server ignored conditional request.\n"); connection->server->lies += 10; /* Drop the connection? */ } } } else if(code == 416) { do_log(L_ERROR, "Unexpected \"range not satisfiable\" reply\n"); httpServerAbort(connection, 1, 502, internAtom("Unexpected \"range not satisfiable\" " "reply")); /* The object may be superseded. Make sure the next request won't be partial. */ abortObject(object, 502, internAtom("Unexpected \"range not satisfiable\" reply")); return 1; } if(object->flags & OBJECT_INITIAL) supersede = 0; if(supersede) { do_log(L_SUPERSEDED, "Superseding object: "); do_log_n(L_SUPERSEDED, old_object->key, old_object->key_size); do_log(L_SUPERSEDED, " (%d %d %d %s -> %d %d %d %s)\n", object->code, object->length, (int)object->last_modified, object->etag?object->etag: "(none)", code, full_len, (int)last_modified, etag?etag:"(none)"); privatiseObject(old_object, 0); new_object = makeObject(object->type, object->key, object->key_size, 1, 0, object->request, NULL); if(new_object == NULL) { do_log(L_ERROR, "Couldn't allocate object\n"); httpServerAbort(connection, 1, 500, internAtom("Couldn't allocate object")); return 1; } if(urlIsLocal(new_object->key, new_object->key_size)) new_object->flags |= OBJECT_LOCAL; } else { new_object = object; } suspectDynamic = (!etag && last_modified < 0) || (cache_control.flags & (CACHE_NO_HIDDEN | CACHE_NO | CACHE_NO_STORE | (cacheIsShared ? CACHE_PRIVATE : 0))) || (cache_control.max_age >= 0 && cache_control.max_age <= 2) || (cacheIsShared && cache_control.s_maxage >= 0 && cache_control.s_maxage <= 5) || (old_object->last_modified >= 0 && old_object->expires >= 0 && (old_object->expires - old_object->last_modified <= 1)) || (supersede && (old_object->date - date <= 5)); if(suspectDynamic) new_object->flags |= OBJECT_DYNAMIC; else if(!supersede) new_object->flags &= ~OBJECT_DYNAMIC; else if(old_object->flags & OBJECT_DYNAMIC) new_object->flags |= OBJECT_DYNAMIC; new_object->age = age; new_object->cache_control |= cache_control.flags; new_object->max_age = cache_control.max_age; new_object->s_maxage = cache_control.s_maxage; new_object->flags &= ~OBJECT_FAILED; if(date >= 0) new_object->date = date; if(last_modified >= 0) new_object->last_modified = last_modified; if(expires >= 0) new_object->expires = expires; if(new_object->etag == NULL) new_object->etag = etag; else free(etag); switch(code) { case 200: case 300: case 301: case 302: case 303: case 307: case 403: case 404: case 405: case 401: if(new_object->message) releaseAtom(new_object->message); new_object->code = code; new_object->message = message; break; case 206: case 304: case 412: if(new_object->code != 200 || !new_object->message) { if(new_object->message) releaseAtom(new_object->message); new_object->code = 200; new_object->message = internAtom("OK"); } releaseAtom(message); break; default: if(new_object->message) releaseAtom(new_object->message); new_object->code = code; new_object->message = retainAtom(message); break; } httpTweakCachability(new_object); if(!via) new_via = internAtomF("%s %s", version == HTTP_11 ? "1.1" : "1.0", proxyName->string); else new_via = internAtomF("%s, %s %s", via->string, version == HTTP_11 ? "1.1" : "1.0", proxyName->string); if(new_via == NULL) { do_log(L_ERROR, "Couldn't allocate Via.\n"); } else { if(new_object->via) releaseAtom(new_object->via); new_object->via = new_via; } if(new_object->flags & OBJECT_INITIAL) { objectPartial(new_object, full_len, headers); } else { if(new_object->length < 0) new_object->length = full_len; /* XXX -- RFC 2616 13.5.3 */ releaseAtom(headers); } if(supersede) { assert(new_object != old_object); supersedeObject(old_object); } if(new_object != old_object) { if(new_object->flags & OBJECT_INPROGRESS) { /* Make sure we don't fetch this object two times at the same time. Just drop the connection. */ releaseObject(new_object); httpServerFinish(connection, 1, 0); return 1; } old_object->flags &= ~OBJECT_VALIDATING; new_object->flags |= OBJECT_INPROGRESS; /* Signal the client side to switch to the new object -- see httpClientGetHandler. If it doesn't, we'll give up on this request below. */ new_object->flags |= OBJECT_MUTATING; request->can_mutate = new_object; notifyObject(old_object); request->can_mutate = NULL; new_object->flags &= ~OBJECT_MUTATING; old_object->flags &= ~OBJECT_INPROGRESS; if(request->object == old_object) { if(request->request) request->request->request = NULL; request->request = NULL; request->object = new_object; } else { assert(request->object == new_object); } releaseNotifyObject(old_object); old_object = NULL; object = new_object; } else { objectMetadataChanged(new_object, 0); } if(object->flags & OBJECT_VALIDATING) { object->flags &= ~OBJECT_VALIDATING; notifyObject(object); } if(!expect_body) { httpServerFinish(connection, 0, rc); return 1; } if(request->request == NULL) { httpServerFinish(connection, 1, 0); return 1; } if(code == 412) { /* 412 replies contain a useless body. For now, we drop the connection. */ httpServerFinish(connection, 1, 0); return 1; } if(request->flags & REQUEST_PERSISTENT) { if(request->method != METHOD_HEAD && connection->te == TE_IDENTITY && len < 0) { do_log(L_ERROR, "Persistent reply with no Content-Length\n"); /* That's potentially dangerous, as we could start reading arbitrary data into the object. Unfortunately, some servers do that. */ request->flags &= ~REQUEST_PERSISTENT; } } /* we're getting a body */ if(content_range.from > 0) connection->offset = content_range.from; else connection->offset = 0; if(content_range.to >= 0) request->to = content_range.to; do_log(D_SERVER_OFFSET, "0x%lx(0x%lx): offset = %d\n", (unsigned long)connection, (unsigned long)object, connection->offset); if(connection->len > rc) { rc = connectionAddData(connection, rc); if(rc) { if(rc < 0) { if(rc == -2) { do_log(L_ERROR, "Couldn't parse chunk size.\n"); httpServerAbort(connection, 1, 502, internAtom("Couldn't parse chunk size")); } else { do_log(L_ERROR, "Couldn't add data to connection.\n"); httpServerAbort(connection, 1, 500, internAtom("Couldn't add data " "to connection")); } return 1; } else { if(code != 206) { if(object->length < 0) { object->length = object->size; objectMetadataChanged(object, 0); } else if(object->length != object->size) { httpServerAbort(connection, 1, 500, internAtom("Inconsistent " "object size")); object->length = -1; return 1; } } httpServerFinish(connection, 0, 0); return 1; } } } else { connection->len = 0; } if(eof) { if(connection->te == TE_CHUNKED || (object->length >= 0 && connection->offset < object->length)) { do_log(L_ERROR, "Server closed connection.\n"); httpServerAbort(connection, 1, 502, internAtom("Server closed connection")); return 1; } else { if(code != 206 && eof > 0 && object->length < 0) { object->length = object->size; objectMetadataChanged(object, 0); } httpServerFinish(connection, 1, 0); return 1; } } else { return httpServerReadData(connection, 1); } return 0; fail: releaseAtom(url); releaseAtom(message); if(headers) releaseAtom(headers); if(etag) free(etag); if(via) releaseAtom(via); return 1; } int httpServerIndirectHandlerCommon(HTTPConnectionPtr connection, int eof) { HTTPRequestPtr request = connection->request; assert(eof >= 0); assert(request->object->flags & OBJECT_INPROGRESS); if(connection->len > 0) { int rc; rc = connectionAddData(connection, 0); if(rc) { if(rc < 0) { if(rc == -2) { do_log(L_ERROR, "Couldn't parse chunk size.\n"); httpServerAbort(connection, 1, 502, internAtom("Couldn't parse chunk size")); } else { do_log(L_ERROR, "Couldn't add data to connection.\n"); httpServerAbort(connection, 1, 500, internAtom("Couldn't add data " "to connection")); } return 1; } else { if(request->to < 0) { if(request->object->length < 0) { request->object->length = request->object->size; objectMetadataChanged(request->object, 0); } else if(request->object->length != request->object->size) { request->object->length = -1; httpServerAbort(connection, 1, 502, internAtom("Inconsistent " "object size")); return 1; } } httpServerFinish(connection, 0, 0); } return 1; } } if(eof && connection->len == 0) { if(connection->te == TE_CHUNKED || (request->to >= 0 && connection->offset < request->to)) { do_log(L_ERROR, "Server dropped connection.\n"); httpServerAbort(connection, 1, 502, internAtom("Server dropped connection")); return 1; } else { if(request->object->length < 0 && eof > 0 && (request->to < 0 || request->to > request->object->size)) { request->object->length = request->object->size; objectMetadataChanged(request->object, 0); } httpServerFinish(connection, 1, 0); return 1; } } else { return httpServerReadData(connection, 0); } } int httpServerIndirectHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; assert(connection->request->object->flags & OBJECT_INPROGRESS); httpSetTimeout(connection, -1); if(status < 0) { if(status != -ECLIENTRESET) do_log_error(L_ERROR, -status, "Read from server failed"); httpServerAbort(connection, status != -ECLIENTRESET, 502, internAtomError(-status, "Read from server failed")); return 1; } connection->len = srequest->offset; return httpServerIndirectHandlerCommon(connection, status); } int httpServerReadData(HTTPConnectionPtr connection, int immediate) { HTTPRequestPtr request = connection->request; ObjectPtr object = request->object; int to = -1; assert(object->flags & OBJECT_INPROGRESS); if(request->request == NULL) { httpServerFinish(connection, 1, 0); return 1; } if(request->to >= 0) to = request->to; else to = object->length; if(to >= 0 && to == connection->offset) { httpServerFinish(connection, 0, 0); return 1; } if(connection->len == 0 && ((connection->te == TE_IDENTITY && to > connection->offset) || (connection->te == TE_CHUNKED && connection->chunk_remaining > 0))) { /* Read directly into the object */ int i = connection->offset / CHUNK_SIZE; int j = connection->offset % CHUNK_SIZE; int end, len, more; /* See httpServerDirectHandlerCommon if you change this */ if(connection->te == TE_CHUNKED) { len = connection->chunk_remaining; /* The logic here is that we want more to just fit the chunk header if we're doing a large read, but do a large read if we would otherwise do a small one. The magic constant 2000 comes from the assumption that the server uses chunks that have a size that are a power of two (possibly including the chunk header), and that we want a full ethernet packet to fit into our read. */ more = (len >= 2000 ? 20 : MIN(2048 - len, CHUNK_SIZE)); } else { len = to - connection->offset; /* We read more data only when there is a reasonable chance of there being another reply coming. */ more = (connection->pipelined > 1) ? CHUNK_SIZE : 0; } end = len + connection->offset; httpConnectionDestroyBuf(connection); /* The order of allocation is important in case we run out of memory. */ lockChunk(object, i); if(object->chunks[i].data == NULL) object->chunks[i].data = get_chunk(); if(object->chunks[i].data && object->chunks[i].size >= j) { if(len + j > CHUNK_SIZE) { lockChunk(object, i + 1); if(object->chunks[i + 1].data == NULL) object->chunks[i + 1].data = get_chunk(); /* Unless we're grabbing all len of data, we do not want to do an indirect read immediately afterwards. */ if(more && len + j <= 2 * CHUNK_SIZE) { if(!connection->buf) connection->buf = get_chunk(); /* checked below */ } if(object->chunks[i + 1].data) { do_stream_3(IO_READ | IO_NOTNOW, connection->fd, j, object->chunks[i].data, CHUNK_SIZE, object->chunks[i + 1].data, MIN(CHUNK_SIZE, end - (i + 1) * CHUNK_SIZE), connection->buf, connection->buf ? more : 0, httpServerDirectHandler2, connection); return 1; } unlockChunk(object, i + 1); } if(more && len + j <= CHUNK_SIZE) { if(!connection->buf) connection->buf = get_chunk(); } do_stream_2(IO_READ | IO_NOTNOW, connection->fd, j, object->chunks[i].data, MIN(CHUNK_SIZE, end - i * CHUNK_SIZE), connection->buf, connection->buf ? more : 0, httpServerDirectHandler, connection); return 1; } else { unlockChunk(object, i); } } if(connection->len == 0) httpConnectionDestroyBuf(connection); httpSetTimeout(connection, serverTimeout); do_stream_buf(IO_READ | IO_NOTNOW | ((immediate && connection->len) ? IO_IMMEDIATE : 0), connection->fd, connection->len, &connection->buf, (connection->te == TE_CHUNKED ? MIN(2048, CHUNK_SIZE) : CHUNK_SIZE), httpServerIndirectHandler, connection); return 1; } int httpServerDirectHandlerCommon(int kind, int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; ObjectPtr object = request->object; int i = connection->offset / CHUNK_SIZE; int to, end, end1; assert(request->object->flags & OBJECT_INPROGRESS); httpSetTimeout(connection, -1); if(status < 0) { unlockChunk(object, i); if(kind == 2) unlockChunk(object, i + 1); if(status != -ECLIENTRESET) do_log_error(L_ERROR, -status, "Read from server failed"); httpServerAbort(connection, status != -ECLIENTRESET, 502, internAtomError(-status, "Read from server failed")); return 1; } /* We have incestuous knowledge of the decisions made in httpServerReadData */ if(request->to >= 0) to = request->to; else to = object->length; if(connection->te == TE_CHUNKED) end = connection->offset + connection->chunk_remaining; else end = to; /* The amount of data actually read into the object */ end1 = MIN(end, i * CHUNK_SIZE + MIN(kind * CHUNK_SIZE, srequest->offset)); assert(end >= 0 && end1 >= i * CHUNK_SIZE && end1 <= (i + 2) * CHUNK_SIZE); object->chunks[i].size = MAX(object->chunks[i].size, MIN(end1 - i * CHUNK_SIZE, CHUNK_SIZE)); if(kind == 2 && end1 > (i + 1) * CHUNK_SIZE) { object->chunks[i + 1].size = MAX(object->chunks[i + 1].size, end1 - (i + 1) * CHUNK_SIZE); } if(connection->te == TE_CHUNKED) { connection->chunk_remaining -= (end1 - connection->offset); assert(connection->chunk_remaining >= 0); } connection->offset = end1; object->size = MAX(object->size, end1); unlockChunk(object, i); if(kind == 2) unlockChunk(object, i + 1); if(i * CHUNK_SIZE + srequest->offset > end1) { connection->len = i * CHUNK_SIZE + srequest->offset - end1; return httpServerIndirectHandlerCommon(connection, status); } else { notifyObject(object); if(status) { httpServerFinish(connection, 1, 0); return 1; } else { return httpServerReadData(connection, 0); } } } int httpServerDirectHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { return httpServerDirectHandlerCommon(1, status, event, srequest); } int httpServerDirectHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { return httpServerDirectHandlerCommon(2, status, event, srequest); } /* Add the data accumulated in connection->buf into the object in connection->request. Returns 0 in the normal case, 1 if the TE is self-terminating and we're done, -1 if there was a problem with objectAddData, -2 if there was a problem with the data. */ int connectionAddData(HTTPConnectionPtr connection, int skip) { HTTPRequestPtr request = connection->request; ObjectPtr object = request->object; int rc; if(connection->te == TE_IDENTITY) { int len; len = connection->len - skip; if(object->length >= 0) { len = MIN(object->length - connection->offset, len); } if(request->to >= 0) len = MIN(request->to - connection->offset, len); if(len > 0) { rc = objectAddData(object, connection->buf + skip, connection->offset, len); if(rc < 0) return -1; connection->offset += len; connection->len -= (len + skip); do_log(D_SERVER_OFFSET, "0x%lx(0x%lx): offset = %d\n", (unsigned long)connection, (unsigned long)object, connection->offset); } if(connection->len > 0 && skip + len > 0) { memmove(connection->buf, connection->buf + skip + len, connection->len); } if((object->length >= 0 && object->length <= connection->offset) || (request->to >= 0 && request->to <= connection->offset)) { notifyObject(object); return 1; } else { if(len > 0) notifyObject(object); return 0; } } else if(connection->te == TE_CHUNKED) { int i = skip, j, size; /* connection->chunk_remaining is 0 at the end of a chunk, -1 after the CR/LF pair ending a chunk, and -2 after we've seen a chunk of length 0. */ if(connection->chunk_remaining > -2) { while(1) { if(connection->chunk_remaining <= 0) { if(connection->chunk_remaining == 0) { if(connection->len < i + 2) break; if(connection->buf[i] != '\r' || connection->buf[i + 1] != '\n') return -1; i += 2; connection->chunk_remaining = -1; } if(connection->len < i + 2) break; j = parseChunkSize(connection->buf, i, connection->len, &size); if(j < 0) return -2; if(j == 0) break; else i = j; if(size == 0) { connection->chunk_remaining = -2; break; } else { connection->chunk_remaining = size; } } else { /* connection->chunk_remaining > 0 */ size = MIN(connection->chunk_remaining, connection->len - i); if(size <= 0) break; rc = objectAddData(object, connection->buf + i, connection->offset, size); connection->offset += size; if(rc < 0) return -1; i += size; connection->chunk_remaining -= size; do_log(D_SERVER_OFFSET, "0x%lx(0x%lx): offset = %d\n", (unsigned long)connection, (unsigned long)object, connection->offset); } } } connection->len -= i; if(connection->len > 0) memmove(connection->buf, connection->buf + i, connection->len); if(i > 0 || connection->chunk_remaining == -2) notifyObject(object); if(connection->chunk_remaining == -2) return 1; else return 0; } else { abort(); } } void listServers(FILE *out) { HTTPServerPtr server; int i, n, m, entry; fprintf(out, "\n" "\n" "\r\nKnown servers\n" "\n" "

Known servers

\n"); alternatingHttpStyle(out, "servers"); fprintf(out, "\n"); fprintf(out, "" "" "" "" "" "" "" "" "\n"); fprintf(out, "\n"); server = servers; entry = 0; while(server) { fprintf(out, "", entry % 2 == 0 ? "even" : "odd"); if(server->port == 80) fprintf(out, "", server->name); else fprintf(out, "", server->name, server->port); if(server->version == HTTP_11) fprintf(out, ""); else if(server->version == HTTP_10) fprintf(out, ""); else fprintf(out, ""); if(server->persistent < 0) fprintf(out, ""); else if(server->persistent > 0) fprintf(out, ""); else fprintf(out, ""); if(server->version != HTTP_11 || server->persistent <= 0) fprintf(out, ""); else if(server->pipeline < 0) fprintf(out, ""); else if(server->pipeline >= 0 && server->pipeline <= 1) fprintf(out, ""); else if(server->pipeline == 2 || server->pipeline == 3) fprintf(out, ""); else fprintf(out, ""); n = 0; m = 0; for(i = 0; i < server->maxslots; i++) if(server->connection[i] && !server->connection[i]->connecting) { if(i < server->numslots) n++; else m++; } fprintf(out, "", m); else fprintf(out, ""); if(server->lies > 0) fprintf(out, "", (server->lies + 9) / 10); else fprintf(out, ""); if(server->rtt > 0) fprintf(out, "", (double)server->rtt / 1000000.0); else fprintf(out, ""); if(server->rate > 0) fprintf(out, "", server->rate); else fprintf(out, ""); fprintf(out, "\n"); server = server->next; entry++; } fprintf(out, "\n"); fprintf(out, "
ServerVersionPersistentPipelineConnectionsrttrate
%s%s:%d1.11.0unknownnoyesunknownnounknownprobingyes%d/%d", n, server->numslots); if(m) fprintf(out, " + %d(%d lies)%.3f%d
\n"); fprintf(out, "

back

"); fprintf(out, "\n"); } polipo-1.0.4.1/polipo.texi0000644000175000017500000024535311331407220014671 0ustar chrisdchrisd\input texinfo @c -*-texinfo-*- @c %**start of header @setfilename polipo.info @settitle The Polipo Manual @afourpaper @c %**end of header @dircategory Network Applications @direntry * Polipo: (polipo). The Polipo caching web proxy. @end direntry @copying Copyright @copyright{} 2003 -- 2006 by Juliusz Chroboczek. @end copying @titlepage @title The Polipo Manual @author Juliusz Chroboczek @page @vskip 0pt plus 1fill Polipo is a caching web proxy designed to be used as a personal cache or a cache shared among a few users. @vskip 0pt plus 1fill @insertcopying @end titlepage @contents @ifnottex @node Top, Background, (dir), (dir) @top Polipo Polipo is a caching web proxy designed to be used as a personal cache or a cache shared among a few users. @ifhtml The latest version of Polipo can be found on @uref{http://www.pps.jussieu.fr/~jch/software/polipo/,the Polipo web page}. @end ifhtml This manual was written by @uref{http://www.pps.jussieu.fr/~jch/,,Juliusz Chroboczek}. @end ifnottex @menu * Background:: Background information. * Running:: Running Polipo * Network:: Polipo and the network. * Caching:: Caching. * Memory usage:: Limiting Polipo's memory usage. * Copying:: Your rights and mine. * Variable index:: Variable index. * Concept index:: Concept index. @end menu @node Background, Running, Top, Top @chapter Background @menu * The web:: The web and HTTP. * Proxies and caches:: Proxies and caches. * Latency and throughput:: Optimise latency, not throughput. * Network traffic:: Be nice to the net. * Partial instances:: Don't discard data. * POST and PUT:: Other requests * Other HTTP proxies:: Why did I write Polipo from scratch? @end menu @node The web, Proxies and caches, Background, Background @section The web and HTTP @cindex URL @cindex resource @cindex instance @cindex entity @cindex HTTP The web is a wide-scale decentralised distributed hypertext system, something that's obviously impossible to achieve reliably. The web is a collection of @dfn{resources} which are identified by @dfn{URLs}, strings starting with @code{http://}. At any point in time, a resource has a certain value, which is called an @dfn{instance} of the resource. The fundamental protocol of the web is HTTP, a simple request/response protocol. With HTTP, a client can make a request for a resource to a server, and the server replies with an @dfn{entity}, which is an on-the-wire representation of an instance or of a fragment thereof. @node Proxies and caches, Latency and throughput, The web, Background @section Proxies and caches @cindex proxy @cindex caching A proxy is a program that acts as both a client and a server. It listens for client requests and forwards them to servers, and forwards the servers' replies to clients. An HTTP proxy can optimise web traffic away by @dfn{caching} server replies, storing them in memory in case they are needed again. If a reply has been cached, a later client request may, under some conditions, be satisfied without going to the source again. In addition to taking the shortcuts made possible by caching, proxies can improve performance by generating better network traffic than the client applications would do. Proxies are also useful in ways unrelated to raw performance. A proxy can be used to contact a server that are not visible to the browser, for example because there is a firewall in the way (@pxref{Parent proxies}), or because the client and the server use different lower layer protocols (for example IPv4 and IPv6). Another common application of proxies is to modify the data sent to servers and returned to clients, for example by censoring headers that expose too much about the client's identity (@pxref{Censoring headers}) or removing advertisements from the data returned by the server (@pxref{Forbidden}). Polipo is a caching HTTP proxy that was originally designed as a @dfn{personal} proxy, i.e.@: a proxy that is used by a single user or a small group of users. @node Latency and throughput, Network traffic, Proxies and caches, Background @section Latency and throughput @cindex throughput @cindex latency Most network benchmarks consider @dfn{throughput}, or the average amount of data being pushed around per unit of time. While important for batch applications (for example benchmarks), average throughput is mostly irrelevant when it comes to interactive web usage. What is more important is a transaction's median @dfn{latency}, or whether the data starts to trickle down before the user gets annoyed. Typical web caches optimise for throughput --- for example, by consulting sibling caches before accessing a remote resource. By doing so, they significantly add to the median latency, and therefore to the average user frustration. Polipo was designed to minimise latency. @node Network traffic, Partial instances, Latency and throughput, Background @section Network traffic The web was developed by people who were interested in text processing rather than in networking and, unsurprisingly enough, the first versions of the HTTP protocol did not make very good use of network resources. The main problem in HTTP/0.9 and early versions of HTTP/1.0 was that a separate TCP connection (``virtual circuit'' for them telecom people) was created for every entity transferred. Opening multiple TCP connections has significant performance implications. Obviously, connection setup and teardown require additional packet exchanges which increase network usage and, more importantly, latency. Less obviously, TCP is not optimised for that sort of usage. TCP aims to avoid network @dfn{congestion}, a situation in which the network becomes unusable due to overly aggressive traffic patterns. A correct TCP implementation will very carefully probe the network at the beginning of every connection, which means that a TCP connection is very slow during the first couple of kilobytes transferred, and only gets up to speed later. Because most HTTP entities are small (in the 1 to 10 kilobytes range), HTTP/0.9 uses TCP where it is most inefficient. @menu * Persistent connections:: Don't shut connections down. * Pipelining:: Send a bunch of requests at once. * Poor Mans Multiplexing:: Split requests. @end menu @node Persistent connections, Pipelining, Network traffic, Network traffic @subsection Persistent connections @cindex persistent connection @cindex keep-alive connection Later HTTP versions allow the transfer of multiple entities on a single connection. A connection that carries multiple entities is said to be @dfn{persistent} (or sometimes @dfn{keep-alive}). Unfortunately, persistent connections are an optional feature of HTTP, even in version 1.1. Polipo will attempt to use persistent connections on the server side, and will honour persistent connection requests from clients. @node Pipelining, Poor Mans Multiplexing, Persistent connections, Network traffic @subsection Pipelining @cindex Pipelining With persistent connections it becomes possible to @dfn{pipeline} or @dfn{stream} requests, i.e. to send multiple requests on a single connection without waiting for the replies to come back. Because this technique gets the requests to the server faster, it reduces latency. Additionally, because multiple requests can often be sent in a single packet, pipelining reduces network traffic. Pipelining is a fairly common technique@footnote{The X11 protocol fundamentally relies on pipelining. NNTP does support pipelining. SMTP doesn't, while ESMTP makes it an option. FTP does support pipelining on the control connection.}, but it is not supported by HTTP/1.0. HTTP/1.1 makes pipelining support compulsory in every server implementation that can use persistent connections, but there are a number of buggy servers that claim to implement HTTP/1.1 but don't support pipelining. Polipo carefully probes for pipelining support in a server and uses pipelining if it believes that it is reliable. Polipo also deeply enjoys being pipelined at by a client@footnote{Other client-side implementations of HTTP that make use of pipelining include @uref{http://www.opera.com/,,Opera}, recent versions of @uref{http://www.mozilla.org,,Mozilla}, APT (the package downloader used by @uref{http://www.debian.org,,Debian} GNU/Linux) and LFTP.}. @node Poor Mans Multiplexing, , Pipelining, Network traffic @subsection Poor Man's Multiplexing @cindex Poor Man's Multiplexing @cindex multiplexing A major weakness of the HTTP protocol is its inability to share a single connection between multiple simultaneous transactions --- to @dfn{multiplex} a number of transactions over a single connection. In HTTP, a client can either request all instances sequentially, which significantly increases latency, or else open multiple concurrent connections, with all the problems that this implies (@pxref{Persistent connections}). Poor Man's Multiplexing (PMM) is a technique that simulates multiplexing by requesting an instance in multiple segments; because the segments are fetched in independent transactions, they can be interleaved with requests for other resources. Obviously, PMM only makes sense in the presence of persistent connections; additionally, it is only effective in the presence of pipelining (@pxref{Pipelining}). PMM poses a number of reliability issues. If the resource being fetched is dynamic, it is quite possible that it will change between segments; thus, an implementation making use of PMM needs to be able to switch to full-resource retrieval when it detects a dynamic resource. Polipo supports PMM, but it is disabled it by default (@pxref{PMM}). @node Partial instances, POST and PUT, Network traffic, Background @section Caching partial instances @cindex partial instance @cindex range request A partial instance is an instance that is being cached but only part of which is available in the local cache. There are three ways in which partial instances can arise: client applications requesting only part of an instance (Adobe's Acrobat Reader plugin is famous for that), a server dropping a connection mid-transfer (because it is short on resources, or, surprisingly often, because it is buggy), a client dropping a connection (usually because the user pressed @emph{stop}). When an instance is requested that is only partially cached, it is possible to request just the missing data by using a feature of HTTP known as a @dfn{range} request. While support for range requests is optional, most servers honour them in case of static data (data that are stored on disk, rather then being generated on the fly e.g.@: by a CGI script). Caching partial instances has a number of positive effects. Obviously, it reduces the amount of data transmitted as the available data needn't be fetched again. Because it prevents partial data from being discarded, it makes it reasonable for a proxy to unconditionally abort a download when requested by the user, and therefore reduces network traffic. Polipo caches arbitrary partial instances in its in-memory cache. It will only store the initial segment of a partial instance (from its beginning up to its first hole) in its on-disk cache, though. In either case, it will attempt to use range requests to fetch the missing data. @node POST and PUT, Other HTTP proxies, Partial instances, Background @section Other requests @cindex GET request @cindex HEAD request @cindex PUT request @cindex POST request @cindex OPTIONS request @cindex PROPFIND request The previous sections pretend that there is only one kind of request in HTTP --- the @samp{GET} request. In fact, there are some others. The @samp{HEAD} request method retrieves data about an resource. Polipo does not normally use @samp{HEAD}, but will fall back to using it for validation it if finds that a given server fails to cooperate with its standard validation methods (@pxref{Cache transparency}). Polipo will correctly reply to a client's @samp{HEAD} request. The @samp{POST} method is used to request that the server should do something rather than merely sending an entity; it is usually used with HTML forms that have an effect@footnote{HTML forms should use the @samp{GET} method when the form has no side-effect as this makes the results cacheable.}. The @samp{PUT} method is used to replace an resource with a different instance; it is typically used by web publishing applications. @samp{POST} and @samp{PUT} requests are handled by Polipo pretty much like @samp{GET} and @samp{HEAD}; however, for various reasons, some precautions must be taken. In particular, any cached data for the resource they refer to must be discarded, and they can never be pipelined. Finally, HTTP/1.1 includes a convenient backdoor with the @samp{CONNECT} method. For more information, please see @ref{Tunnelling connections}. Polipo does not currently handle the more exotic methods such as @samp{OPTIONS} and @samp{PROPFIND}. @node Other HTTP proxies, , POST and PUT, Background @section Other HTTP proxies @cindex proxy I started writing Polipo because the weather was bad. But also because I wanted to implement some features that other web proxies don't have. @menu * Harvest and Squid:: Historic proxies. * Apache:: The web server has a proxy. * WWWOFFLE:: A personal proxy. * Junkbuster:: Get rid of ads. * Privoxy:: Junkbuster on speed. * Oops:: A multithreaded cache. @end menu @node Harvest and Squid, Apache, Other HTTP proxies, Other HTTP proxies @subsection Harvest and Squid @cindex Harvest @cindex Squid Harvest, the grandfather of all web caches, has since evolved into @uref{http://www.squid-cache.org/,,Squid}. Squid sports an elegant single-threaded non-blocking architecture and multiplexes multiple clients in a single process. It also features almost complete support for HTTP/1.1, although for some reason it doesn't currently advertise it. Squid is designed as a large-scale shared proxy running on a dedicated machine, and therefore carries certain design decisions which make it difficult to use as a personal proxy. Because Squid keeps all resource meta-data in memory, it requires a fair amount of RAM in order to manipulate a reasonably sized cache. Squid doesn't cache partial instances, and has trouble with instances larger than available memory@footnote{Recent versions of Squid support instances larger than available memory by using a hack that the authors call a ``sliding window algorithm''.}. If a client connection is interrupted, Squid has to decide whether to continue fetching the resource (and possibly waste bandwidth) or discard what it already has (and possibly waste bandwidth). Some versions of squid would, under some circumstances, pipeline up to two outgoing requests on a single connection. At the time of writing, this feature appears to have been disabled in the latest version. Squid's developers have decided to re-write it in C++. @node Apache, WWWOFFLE, Harvest and Squid, Other HTTP proxies @subsection The Apache proxy @cindex Apache The @uref{http://www.apache.org/,,Apache web server} includes a complete HTTP/1.1 proxy. The Apache web server was designed to maximise ease of programming --- a decision which makes Apache immensely popular for deploying web-based applications. Of course, this ease of programming comes at a cost, and Apache is not the most lightweight proxy available. As cheaper caching proxies are available, Apache is not useful as a standalone proxy. The main application of Apache's proxy is to join multiple web servers' trees into a single hierarchy. The Apache proxy doesn't cache partial instances and doesn't pipeline multiple outgoing requests. @node WWWOFFLE, Junkbuster, Apache, Other HTTP proxies @subsection WWWOFFLE @cindex WWWOFFLE @uref{http://www.gedanken.demon.co.uk/wwwoffle/,,WWWOFFLE}, an elegant personal proxy, is the primary model for Polipo. WWWOFFLE has more features than can be described here. It will censor banner ads, clean your HTML, decorate it with random colours, schedule fetches for off-peak hours. Unfortunately, the HTTP traffic that WWWOFFLE generates is disgusting. It will open a connection for every fetch, and forces the client to do the same. WWWOFFLE only caches complete instances. I used WWWOFFLE for many years, and frustration with WWWOFFLE's limitations is the main reason why I started Polipo in the first place. @node Junkbuster, Privoxy, WWWOFFLE, Other HTTP proxies @subsection Junkbuster @cindex Junkbuster @uref{http://internet.junkbuster.com/,,Junkbuster} is a simple non-caching web proxy designed to remove banner ads and cookies. It was the main model for WWWOFFLE's (and therefore Polipo's) header and ad-removing features. Junkbuster's HTTP support is very simple (some would say broken): it doesn't do persistent connections, and it breaks horribly if the client tries pipelining. Junkbuster is no longer being maintained, and has evolved into Privoxy. @node Privoxy, Oops, Junkbuster, Other HTTP proxies @subsection Privoxy @cindex Privoxy @uref{http://www.privoxy.org/,,Privoxy} is the current incarnation of Junkbuster. Privoxy has the ability to randomly modify web pages before sending them to the browser --- for example, remove @samp{} or @samp{} tags. Just like its parent, Privoxy cannot do persistent connections. Under some circumstances, it will also buffer whole pages before sending them to the client, which significantly adds to its latency. However, this is difficult to avoid given the kinds of rewriting it attempts to perform. @node Oops, , Privoxy, Other HTTP proxies @subsection Oops @cindex Oops @uref{http://zipper.paco.net/~igor/oops.eng/,,Oops} is a caching web proxy that uses one thread (lightweight process) for every connection. This technique does cost additional memory, but allows good concurrency of requests while avoiding the need for complex non-blocking programming. Oops was apparently designed as a wide-scale shared proxy. Although Oops' programming model makes it easy to implement persistent connections, Oops insists on opening a separate connection to the server for every single resource fetch, which disqualifies it from production usage. @node Running, Network, Background, Top @chapter Running Polipo @menu * Polipo Invocation:: Starting Polipo. * Browser configuration:: Configuring your browser. * Stopping:: Stopping and refreshing Polipo. * Local server:: The local web server and web interface. @end menu @node Polipo Invocation, Browser configuration, Running, Running @section Starting Polipo @cindex invocation By default, Polipo runs as a normal foreground job in a terminal in which it can log random ``How do you do?'' messages. With the right configuration options, Polipo can run as a daemon. Polipo is run with the following command line: @example $ polipo [ -h ] [ -v ] [ -x ] [ -c @var{config} ] [ @var{var}=@var{val}... ] @end example All flags are optional. The flag @option{-h} causes Polipo to print a short help message and to quit. The flag @option{-v} causes Polipo to list all of its configuration variables and quit. The flag @option{-x} causes Polipo to purge its on-disk cache and then quit (@pxref{Purging}). The flag @option{-c} specifies the configuration file to use (by default @file{~/.polipo} or @file{/etc/polipo/config}). Finally, Polipo's configuration can be changed on the command line by assigning values to given configuration variables. @menu * Configuring Polipo:: Plenty of options. * Daemon:: Running in the background. * Logging:: Funnelling status messages. @end menu @node Configuring Polipo, Daemon, Polipo Invocation, Polipo Invocation @subsection Configuration @cindex runtime configuration @cindex variable @cindex configuration variable @cindex configuration file There is a number of variables that you can tweak in order to configure Polipo, and they should all be described in this manual (@pxref{Variable index}). You can display the complete, most up-to-date list of configuration variables by using the @option{-v} command line flag or by accessing the ``current configuration'' page of Polipo's web interface (@pxref{Web interface}). Configuration variables can be set either on the command line or else in the configuration file given by the @option{-c} command-line flag. Configuration variables are typed, and @option{-v} will display their types. The type can be of one of the following: @itemize @bullet @item @samp{integer} or @samp{float}: a numeric value; @item @samp{boolean}: a truth value, one of @samp{true} or @samp{false}; @item @samp{tristate}: one of @samp{false}, @samp{maybe} or @samp{true}; @item @samp{4-state}, one of @samp{false}, @samp{reluctantly}, @samp{happily} or @samp{true}; @item @samp{5-state}, one of @samp{false}, @samp{reluctantly}, @samp{maybe}, @samp{happily} or @samp{true}; @item @samp{atom}, a string written within double quotes @samp{"}); @item @samp{list}, a comma-separated list of strings; @item @samp{intlist}, a comma-separated list of integers and ranges of integers (of the form `@var{n}--@var{m}'). @end itemize The configuration file has a very simple syntax. All blank lines are ignored, as are lines starting with a hash sign @samp{#}. Other lines must be of the form @example @var{var} = @var{val} @end example where @var{var} is a variable to set and @var{val} is the value to set it to. It is possible to change the configuration of a running polipo by using the local configuration interface (@pxref{Web interface}). @node Daemon, Logging, Configuring Polipo, Polipo Invocation @subsection Running as a daemon @cindex daemon @cindex terminal @cindex pid @vindex daemonise @vindex pidFile If the configuration variable @code{daemonise} is set to true, Polipo will run as a daemon: it will fork and detach from its controlling terminal (if any). The variable @code{daemonise} defaults to false. When Polipo is run as a daemon, it can be useful to get it to atomically write its @emph{pid} to a file. If the variable @code{pidFile} is defined, it should be the name of a file where Polipo will write its @emph{pid}. If the file already exists when it is started, Polipo will refuse to run. @node Logging, , Daemon, Polipo Invocation @subsection Logging @cindex logging @vindex logLevel @vindex logFile @vindex logSyslog @vindex logFacility When it encounters a difficulty, Polipo will print a friendly message. The location where these messages go is controlled by the configuration variables @code{logFile} and @code{logSyslog}. If @code{logSyslog} is @code{true}, error messages go to the system log facility given by @code{logFacility}. If @code{logFile} is set, it is the name of a file where all output will accumulate. If @code{logSyslog} is @code{false} and @code{logFile} is empty, messages go to the error output of the process (normally the terminal). The variable @code{logFile} defaults to empty if @code{daemonise} is false, and to @samp{/var/log/polipo} otherwise. The variable @code{logSyslog} defaults to @code{false}, and @code{logFacility} defaults to @samp{user}. The amount of logging is controlled by the variable @code{logLevel}. Please see the file @samp{log.h} in the Polipo sources for the possible values of @code{logLevel}. (Note that if you are using a shared cache, or logging to syslog, an overly low @code{logLevel} could be a privacy violation.) @node Browser configuration, Stopping, Polipo Invocation, Running @section Configuring your browser @cindex browser configuration @cindex user-agent configuration Telling your user-agent (web browser) to use Polipo is an operation that depends on the browser. Many user-agents will transparently use Polipo if the environment variable @samp{http_proxy} points at it; e.g.@: @example $ export http_proxy=http://localhost:8123/ @end example Netscape Navigator, Mozilla, Mozilla Firefox, KDE's Konqueror and probably other browsers require that you configure them manually through their @emph{Preferences} or @emph{Configure} menu. If your user-agent sports such options, tell it to use persistent connections when speaking to proxies, to speak HTTP/1.1 and to use HTTP/1.1 pipelining. @node Stopping, Local server, Browser configuration, Running @section Stopping Polipo and getting it to reload @cindex signals @cindex shutting down @cindex stopping Polipo will shut down cleanly if it receives @code{SIGHUP}, @code{SIGTERM} or @code{SIGINT} signals; this will normally happen when a Polipo in the foreground receives a @code{^C} key press, when your system shuts down, or when you use the @code{kill} command with no flags. Polipo will then write-out all its in-memory data to disk and quit. If Polipo receives the @code{SIGUSR1} signal, it will write out all the in-memory data to disk (but won't discard them), reopen the log file, and then reload the forbidden URLs file (@pxref{Forbidden}). Finally, if Polipo receives the @code{SIGUSR2} signal, it will write out all the in-memory data to disk and discard as much of the memory cache as possible. It will then reopen the log file and reload the forbidden URLs file. @node Local server, , Stopping, Running @section The local web server @vindex localDocumentRoot @vindex disableProxy @cindex web server @cindex local server Polipo includes a local web server, which is accessible on the same port as the one the proxy listens to. Therefore, by default you can access Polipo's local web server as @samp{http://localhost:8123/}. The data for the local web server can be configured by setting @code{localDocumentRoot}, which defaults to @file{/usr/share/polipo/www/}. Setting this variable to @samp{""} will disable the local server. Polipo assumes that the local web tree doesn't change behind its back. If you change any of the local files, you will need to notify Polipo by sending it a @code{SIGUSR2} signal (@pxref{Stopping}). If you use polipo as a publicly accessible web server, you might want to set the variable @code{disableProxy}, which will prevent it from acting as a web proxy. (You will also want to set @code{disableLocalInterface} (@pxref{Web interface}), and perhaps run Polipo in a @emph{chroot} jail.) @menu * Web interface:: The web interface. @end menu @node Web interface, , Local server, Local server @subsection The web interface @cindex runtime configuration @cindex web interface @vindex disableLocalInterface @vindex disableConfiguration @vindex disableServersList The subtree of the local web space rooted at @samp{http://localhost:8123/polipo/} is treated specially: URLs under this root do not correspond to on-disk files, but are generated by Polipo on-the-fly. We call this subtree Polipo's @dfn{local web interface}. The page @samp{http://localhost:8123/polipo/config?} contains the values of all configuration variables, and allows setting most of them. The page @samp{http://localhost:8123/polipo/status?} provides a summary status report about the running Polipo, and allows performing a number of actions on the proxy, notably flushing the in-memory cache. The page @samp{http://localhost:8123/polipo/servers?} contains the list of known servers, and the statistics maintained about them (@pxref{Server statistics}). The pages starting with @samp{http://localhost:8123/polipo/index?} contain indices of the disk cache. For example, the following page contains the index of the cached pages from the server of some random company: @example http://localhost:8123/polipo/index?http://www.microsoft.com/ @end example The pages starting with @samp{http://localhost:8123/polipo/recursive-index?} contain recursive indices of various servers. This functionality is disabled by default, and can be enabled by setting the variable @code{disableIndexing}. If you have multiple users, you will probably want to disable the local interface by setting the variable @code{disableLocalInterface}. You may also selectively control setting of variables, indexing and listing known servers by setting the variables @code{disableConfiguration}, @code{disableIndexing} and @code{disableServersList}. @node Network, Caching, Running, Top @chapter Polipo and the network @menu * Client connections:: Speaking to clients * Contacting servers:: Contacting servers. * HTTP tuning:: Tuning at the HTTP level. * Offline browsing:: Browsing with poor connectivity. * Server statistics:: Polipo keeps statistics about servers. * Server-side behaviour:: Tuning the server-side behaviour. * PMM:: Poor Man's Multiplexing. * Forbidden:: You can forbid some URLs. * DNS:: How Polipo finds hosts. * Parent proxies:: Fetching data from other proxies. * Tuning POST and PUT:: Tuning POST and PUT requests. * Tunnelling connections:: Tunnelling foreign protocols and https. @end menu @node Client connections, Contacting servers, Network, Network @section Client connections @vindex proxyAddress @vindex proxyPort @vindex proxyName @cindex address @cindex port @cindex IPv6 @cindex proxy loop @cindex loop @cindex proxy name @cindex via @cindex loopback address @cindex security There are three fundamental values that control how Polipo speaks to clients. The variable @code{proxyAddress}, defines the IP address on which Polipo will listen; by default, its value is the @dfn{loopback address} @code{"127.0.0.1"}, meaning that Polipo will listen on the IPv4 loopback interface (the local host) only. By setting this variable to a global IP address or to one of the special values @code{"::"} or @code{"0.0.0.0"}, it is possible to allow Polipo to serve remote clients. This is likely to be a security hole unless you set @code{allowedClients} to a reasonable value (@pxref{Access control}). Note that the type of address that you specify for @code{proxyAddress} will determine whether Polipo listens to IPv4 or IPv6. Currently, the only way to have Polipo listen to both protocols is to specify the IPv6 unspecified address (@code{"::"}) for @code{proxyAddress}. The variable @code{proxyPort}, by default 8123, defines the TCP port on which Polipo will listen. The variable @code{proxyName}, which defaults to the host name of the machine on which Polipo is running, defines the @dfn{name} of the proxy. This can be an arbitrary string that should be unique among all instances of Polipo that you are running. Polipo uses it in error messages and optionally for detecting proxy loops (by using the @samp{Via} HTTP header, @pxref{Censoring headers}). @menu * Access control:: Deciding who can connect. @end menu @node Access control, , Client connections, Client connections @subsection Access control @vindex proxyAddress @vindex authCredentials @vindex authRealm @vindex allowedClients @cindex access control @cindex authentication @cindex loopback address @cindex security @cindex username @cindex password By making it possible to have Polipo listen on a non-routable address (for example the loopback address @samp{127.0.0.1}), the variable @code{proxyAddress} provides a very crude form of @dfn{access control}: the ability to decide which hosts are allowed to connect. A finer form of access control can be implemented by specifying explicitly a number of client addresses or ranges of addresses (networks) that a client is allowed to connect from. This is done by setting the variable @code{allowedClients}. Every entry in @code{allowedClients} can be an IP address, for example @samp{134.157.168.57} or @samp{::1}. It can also be a network address, i.e.@: an IP address and the number of bits in the network prefix, for example @samp{134.157.168.0/24} or @samp{2001:660:116::/48}. Typical uses of @samp{allowedClients} variable include @example allowedClients = 127.0.0.1, ::1, 134.157.168.0/24, 2001:660:116::/48 @end example or, for an IPv4-only version of Polipo, @example allowedClients = 127.0.0.1, 134.157.168.0/24 @end example A different form of access control can be implemented by requiring each client to @dfn{authenticate}, i.e.@: to prove its identity before connecting. Polipo currently only implements the most insecure form of authentication, @dfn{HTTP basic authentication}, which sends usernames and passwords in clear over the network. HTTP basic authentication is required when the variable @code{authCredentials} is not null; its value should be of the form @samp{username:password}. Note that both IP-based authentication and HTTP basic authentication are insecure: the former is vulnerable to IP address spoofing, the latter to replay attacks. If you need to access Polipo over the public Internet, the only secure option is to have it listen over the loopback interface only and use an ssh tunnel (@pxref{Parent proxies})@footnote{It is not quite clear to me whether HTTP digest authentication is worth implementing. On the one hand, if implemented correctly, it appears to provide secure authentication; on the other hand, and unlike ssh or SSL, it doesn't make any attempt at ensuring privacy, and its optional integrity guarantees are impossible to implement without significantly impairing latency.}. @node Contacting servers, HTTP tuning, Client connections, Network @section Contacting servers @cindex multiple addresses @cindex IPv6 @vindex useTemporarySourceAddress A server can have multiple addresses, for example if it is @dfn{multihomed} (connected to multiple networks) or if it can speak both IPv4 and IPv6. Polipo will try all of a hosts addresses in turn; once it has found one that works, it will stick to that address until it fails again. If connecting via IPv6 there is the possibility to use temporary source addresses to increase privacy (RFC@tie{}3041). The variable @code{useTemporarySourceAddress} controls the use of temporary addresses for outgoing connections; if set to @code{true} temporary addresses are preferred, if set to @code{false} static addresses are used and if set to @code{maybe} (the default) the operation system default is in effect. This setting is not available on all operation systems. @menu * Allowed ports:: Where the proxy is allowed to connect. @end menu @node Allowed ports, , Contacting servers, Contacting servers @subsection Allowed ports @cindex Allowed ports @cindex Forbidden ports @cindex ports @vindex allowedPorts A TCP service is identified not only by the IP address of the machine it is running on, but also by a small integer, the TCP @dfn{port} it is @dfn{listening} on. Normally, web servers listen on port 80, but it is not uncommon to have them listen on different ports; Polipo's internal web server, for example, listens on port 8123 by default. The variable @code{allowedPorts} contains the list of ports that Polipo will accept to connect to on behalf of clients; it defaults to @samp{80-100, 1024-65535}. Set this variable to @samp{1-65535} if your clients (and the web pages they consult!) are fully trusted. (The variable @code{allowedPorts} is not considered for tunnelled connections; @pxref{Tunnelling connections}). @node HTTP tuning, Offline browsing, Contacting servers, Network @section Tuning at the HTTP level @cindex HTTP @cindex headers @menu * Tuning the HTTP parser:: Tuning parsing of HTTP headers. * Censoring headers:: Censoring HTTP headers. @end menu @node Tuning the HTTP parser, Censoring headers, HTTP tuning, HTTP tuning @subsection Tuning the HTTP parser @vindex laxHttpParser @vindex bigBufferSize As a number of HTTP servers and CGI scripts serve incorrect HTTP headers, Polipo uses a @emph{lax} parser, meaning that incorrect HTTP headers will be ignored (a warning will be logged by default). If the variable @code{laxHttpParser} is not set (it is set by default), Polipo will use a @emph{strict} parser, and refuse to serve an instance unless it could parse all the headers. When the amount of headers exceeds one chunk's worth (@pxref{Chunk memory}), Polipo will allocate a @dfn{big buffer} in order to store the headers. The size of big buffers, and therefore the maximum amount of headers Polipo can parse, is specified by the variable @code{bigBufferSize} (32@dmn{kB} by default). @node Censoring headers, , Tuning the HTTP parser, HTTP tuning @subsection Censoring headers @cindex privacy @cindex anonymity @cindex Referer @cindex cookies @vindex censorReferer @vindex censoredHeaders @vindex proxyName @vindex disableVia Polipo offers the option to censor given HTTP headers in both client requests and server replies. The main application of this feature is to very slightly improve the user's privacy by eliminating cookies and some content-negotiation headers. It is important to understand that these features merely make it slightly more difficult to gather statistics about the user's behaviour. While they do not actually prevent such statistics from being collected, they might make it less cost-effective to do so. The general mechanism is controlled by the variable @code{censoredHeaders}, the value of which is a case-insensitive list of headers to unconditionally censor. By default, it is empty, but I recommend that you set it to @samp{From, Accept-Language}. Adding headers such as @samp{Set-Cookie}, @samp{Set-Cookie2}, @samp{Cookie}, @samp{Cookie2} or @samp{User-Agent} to this list will probably break many web sites. The case of the @samp{Referer}@footnote{HTTP contains many mistakes and even one spelling error.} header is treated specially because many sites will refuse to serve pages when it is not provided. If @code{censorReferer} is @code{false} (the default), @samp{Referer} headers are passed unchanged to the server. If @code{censorReferer} is @code{maybe}, @samp{Referer} headers are passed to the server only when they refer to the same host as the resource being fetched. If @code{censorReferer} is @code{true}, all @samp{Referer} headers are censored. I recommend setting @code{censorReferer} to @code{maybe}. Another header that can have privacy implications is the @samp{Via} header, which is used to specify the chain of proxies through which a given request has passed. Polipo will generate @samp{Via} headers if the variable @code{disableVia} is @code{false} (it is true by default). If you choose to generate @samp{Via} headers, you may want to set the @code{proxyName} variable to some innocuous string (@pxref{Client connections}). @menu * Censor Accept-Language:: Why Accept-Language is evil. @end menu @node Censor Accept-Language, , Censoring headers, Censoring headers @subsubsection Why censor Accept-Language @cindex negotiation @cindex content negotiation @cindex Accept-Language Recent versions of HTTP include a mechanism known as @dfn{content negotiation} which allows a user-agent and a server to negotiate the best representation (instance) for a given resource. For example, a server that provides both PNG and GIF versions of an image will serve the PNG version to user-agents that support PNG, and the GIF version to Internet Explorer. Content negotiation requires that a client should send with every single request a number of headers specifying the user's cultural and technical preferences. Most of these headers do not expose sensitive information (who cares whether your browser supports PNG?). The @samp{Accept-Language} header, however, is meant to convey the user's linguistic preferences. In some cases, this information is sufficient to pinpoint with great precision the user's origins and even his political or religious opinions; think, for example, of the implications of sending @samp{Accept-Language: yi} or @samp{ar_PS}. At any rate, @samp{Accept-Language} is not useful. Its design is based on the assumption that language is merely another representation for the same information, and @samp{Accept-Language} simply carries a prioritised list of languages, which is not enough to usefully describe a literate user's preferences. A typical French user, for example, will prefer an English-language original to a French (mis-)translation, while still wanting to see French language texts when they are original. Such a situation cannot be described by the simple-minded @samp{Accept-Language} header. @node Offline browsing, Server statistics, HTTP tuning, Network @section Offline browsing @vindex proxyOffline @cindex offline browsing @cindex browsing offline @cindex connectivity @cindex warning @cindex shift-click In an ideal world, all machines would have perfect connectivity to the network at all times and servers would never crash. In the real world, it may be necessary to avoid hitting the network and have Polipo serve stale objects from its cache. Setting @code{proxyOffline} to @code{true} prevents Polipo from contacting remote servers, no matter what. This setting is suitable when you have no network connection whatsoever. If @code{proxyOffline} is false, Polipo's caching behaviour is controlled by a number of variables documented in @ref{Tweaking validation}. @node Server statistics, Server-side behaviour, Offline browsing, Network @section Server statistics @vindex serverExpireTime @cindex server statistics @cindex round-trip time @cindex transfer rate In order to decide when to pipeline requests (@pxref{Pipelining}) and whether to perform Poor Man's Multiplexing (@pxref{Poor Mans Multiplexing}), Polipo needs to keep statistics about servers. These include the server's ability to handle persistent connections, the server's ability to handle pipelined requests, the round-trip time to the server, and the server's transfer rate. The statistics are accessible from Polipo's web interface (@pxref{Web interface}). The variable @samp{serverExpireTime} (default 1 day) specifies how long such information remains valid. If a server has not been accessed for a time interval of at least @code{serverExpireTime}, information about it will be discarded. As Polipo will eventually recover from incorrect information about a server, this value can be made fairly large. The reason why it exists at all is to limit the amount of memory used up by information about servers. @node Server-side behaviour, PMM, Server statistics, Network @section Tweaking server-side behaviour @vindex serverSlots @vindex serverSlots1 @vindex serverMaxSlots @vindex smallRequestTime @vindex replyUnpipelineTime @vindex replyUnpipelineSize @vindex maxPipelineTrain @vindex pipelineAdditionalRequests @vindex maxSideBuffering @cindex small request @cindex large request @cindex breaking pipelines The most important piece of information about a server is whether it supports persistent connections. If this is the case, Polipo will open at most @code{serverSlots} connections to that server (@code{serverSlots1} if the server only implements HTTP/1.0), and attempt to pipeline; if not, Polipo will hit the server harder, opening up to @code{serverMaxSlots} connections. Another use of server information is to decide whether to pipeline additional requests on a connection that already has in-flight requests. This is controlled by the variable @code{pipelineAdditionalRequests}; if it is @code{false}, no additional requests will be pipelined. If it is @code{true}, additional requests will be pipelined whenever possible. If it is @code{maybe} (the default), additional requests will only be pipelined following @dfn{small} requests, where a small request one whose download is estimated to take no more than @code{smallRequestTime} (default 5@dmn{s}). Sometimes, a request has been pipelined after a request that prompts a very large reply from the server; when that happens, the pipeline needs be broken in order to reduce latency. A reply is @dfn{large} and will cause a pipeline to be broken if either its size is at least @code{replyUnpipelineSize} (default one megabyte) or else the server's transfer rate is known and the body is expected to take at least @code{replyUnpipelineTime} to download (default 15@dmn{s}). The variable @code{maxPipelineTrain} defines the maximum number of requests that will be pipelined in a single write (default 10). Setting this variable to a very low value might (or might not) fix interaction with some unreliable servers that the normal heuristics are unable to detect. The variable @code{maxSideBuffering} specifies how much data will be buffered in a PUT or POST request; it defaults to 1500 bytes. Setting this variable to 0 may cause some media players that abuse the HTTP protocol to work. @node PMM, Forbidden, Server-side behaviour, Network @section Poor Man's Multiplexing @cindex Poor Man's Multiplexing @cindex multiplexing @vindex pmmSize @vindex pmmFirstSize By default, Polipo does not use Poor Man's Multiplexing (@pxref{Poor Mans Multiplexing}). If the variable @code{pmmSize} is set to a positive value, Polipo will use PMM when speaking to servers that are known to support pipelining. It will request resources by segments of @code{pmmSize} bytes. The first segment requested has a size of @code{pmmFirstSize}, which defaults to twice @code{pmmSize}. PMM is an intrinsically unreliable technique. Polipo makes heroic efforts to make it at least usable, requesting that the server disable PMM when not useful (by using the @samp{If-Range} header) and disabling it on its own if a resource turns out to be dynamic. Notwithstanding these precautions, unless the server cooperates@footnote{More precisely, unless CGI scripts cooperate.}, you will see failures when using PMM, which will usually result in blank pages and broken image icons; hitting @emph{Reload} on your browser will usually cause Polipo to notice that something went wrong and correct the problem. @node Forbidden, DNS, PMM, Network @section Forbidden and redirected URLs @cindex forbidden @cindex redirect @cindex web counter @cindex counter @cindex web bug @cindex bug @cindex advertisement @cindex web ad @cindex banner ad The web contains advertisements that a user-agent is supposed to download together with the requested pages. Not only do advertisements pollute the user's brain, pushing them around takes time and uses up network bandwidth. Many so-called content providers also track user activities by using @dfn{web bugs}, tiny embedded images that cause a server to log where they are requested from. Such images can be detected because they are usually uncacheable (@pxref{Cache transparency}) and therefore logged by Polipo by default. Polipo can be configured to prevent certain URLs from reaching the browser, either by returning a @emph{forbidden} error message to the user, or by @emph{redirecting} such URLs to some other URL. @menu * Internal forbidden list:: Specifying forbidden URLs. * External redirectors:: Using an external redirector. @end menu @node Internal forbidden list, External redirectors, Forbidden, Forbidden @subsection Internal forbidden list @cindex forbidden @cindex redirect @vindex forbiddenFile @vindex forbiddenUrl @vindex forbiddenRedirectCode The file pointed at by the variable @code{forbiddenFile} (defaults to @file{~/.polipo-forbidden} or @file{/etc/polipo/forbidden}, whichever exists) specifies the set of URLs that should never be fetched. If @code{forbiddenFile} is a directory, it will be recursively searched for files with forbidden URLs. Every line in a file listing forbidden URLs can either be a domain name --- a string that doesn't contain any of @samp{/}, @samp{*} or @samp{\} ---, or a POSIX extended regular expression. Blank lines are ignored, as are those that start with a hash sign @samp{#}. By default, whenever it attempts to fetch a forbidden URL, the browser will receive a @emph{403 forbidden} error from Polipo. Some users prefer to have the browser display a different page or an image. If @code{forbiddenUrl} is not null, it should represent a URL to which all forbidden URLs will be redirected. The kind of redirection used is specified by @code{forbiddenRedirectCode}; if this is 302 (the default) the redirection will be marked as temporary, if 301 it will be a permanent one. @node External redirectors, , Internal forbidden list, Forbidden @subsection External redirectors @cindex forbidden @cindex redirect @cindex redirector @cindex Squid-style redirector @cindex Adzapper @vindex redirector @vindex redirectorRedirectCode Polipo can also use an external process (a @dfn{Squid-style redirector}) to determine which URLs should be redirected. The name of the redirector binary is determined from the variable @code{redirector}, and the kind of redirection generated is specified by @code{redirectorRedirectCode}, which should be 302 (the default) or 301. For example, to use Adzapper to redirect ads to an innocuous image, just set @example redirector = /usr/bin/adzapper @end example @node DNS, Parent proxies, Forbidden, Network @section The domain name service @cindex DNS @cindex name server @cindex gethostbyname @cindex resolver @cindex IPv6 @vindex dnsMaxTimeout @vindex dnsUseGethostbyname @vindex dnsNameServer @vindex dnsNegativeTtl @vindex dnsGethostbynameTtl @vindex dnsQueryIPv6 The low-level protocols beneath HTTP identify machines by IP addresses, sequences of four 8-bit integers such as @samp{199.232.41.10}@footnote{Or sequences of eight 16-bit integers if you are running IPv6.}. HTTP, on the other hand, and most application protocols, manipulate host names, strings such as @samp{www.polipo.org}. The @dfn{domain name service} (DNS) is a distributed database that maps host names to IP addresses. When an application wants to make use of the DNS, it invokes a @dfn{resolver}, a local library or process that contacts remote name servers. Polipo usually tries to speak the DNS protocol itself rather than using the system resolver@footnote{The Unix interface to the resolver is provided by the @code{gethostbyname}(3) library call (@code{getaddrinfo}(3) on recent systems), which was designed at a time when a host lookup consisted in searching for one of five hosts in a @samp{HOSTS.TXT} file. The @code{gethostbyname} call is @dfn{blocking}, meaning that all activity must cease while a host lookup is in progress. When the call eventually returns, it doesn't provide a @dfn{time to live} (TTL) value to indicate how long the address may be cached. For these reasons, @code{gethostbyname} is hardly useful for programs that need to contact more than a few hosts. (Recent systems replace @code{gethostbyname}(3) by @code{getaddrinfo}(3), which is reentrant. While this removes one important problem that multi-threaded programs encounter, it doesn't solve any of the other issues with @code{gethostbyname}.)}. Its precise behaviour is controlled by the value of @code{dnsUseGethostbyname}. If @code{dnsUseGethostbyname} is @code{false}, Polipo never uses the system resolver. If it is @code{reluctantly} (the default), Polipo tries to speak DNS and falls back to the system resolver if a name server could not be contacted. If it is @code{happily}, Polipo tries to speak DNS, and falls back to the system resolver if the host couldn't be found for any reason (this is not a good idea for shared proxies). Finally, if @code{dnsUseGethostbyname} is @code{true}, Polipo never tries to speak DNS itself and uses the system resolver straight away (this is not recommended). If the internal DNS support is used, Polipo must be given a recursive name server to speak to. By default, this information is taken from the @samp{/etc/resolv.conf} file; however, if you wish to use a different name server, you may set the variable @code{dnsNameServer} to an IP address@footnote{While Polipo does its own caching of DNS data, I recommend that you run a local caching name server. I am very happy with @uref{http://home.t-online.de/home/Moestl/,,@code{pdnsd}}, notwithstanding its somewhat bizarre handling of TCP connections.}. When the reply to a DNS request is late to come, Polipo will retry multiple times using an exponentially increasing timeout. The maximum timeout used before Polipo gives up is defined by @code{dnsMaxTimeout} (default 60@dmn{s}); the total time before Polipo gives up on a DNS query will be roughly twice @code{dnsMaxTimeout}. The variable @code{dnsNegativeTtl} specifies the time during which negative DNS information (information that a host @emph{doesn't} exist) will be cached; this defaults to 120@dmn{s}. Increasing this value reduces both latency and network traffic but may cause a failed host not to be noticed when it comes back up. The variable @code{dnsQueryIPv6} specifies whether to query for IPv4 or IPv6 addresses. If @code{dnsQueryIPv6} is @code{false}, only IPv4 addresses are queried. If @code{dnsQueryIPv6} is @code{reluctantly}, both types of addresses are queried, but IPv4 addresses are preferred. If @code{dnsQueryIPv6} is @code{happily} (the default), IPv6 addresses are preferred. Finally, if @code{dnsQueryIPv6} is @code{true}, only IPv6 addresses are queried. If the system resolver is used, the value @code{dnsGethostbynameTtl} specifies the time during which a @code{gethostbyname} reply will be cached (default 5 minutes). @node Parent proxies, Tuning POST and PUT, DNS, Network @section Parent proxies Polipo will usually fetch instances directly from source servers as this configuration minimises latency. In some cases, however, it may be useful to have Polipo fetch instances from a @dfn{parent} proxy. Polipo can use two protocols to speak to a parent proxy: HTTP and SOCKS. When configured to use both HTTP and SOCKS proxying, Polipo will contact an HTTP proxy over SOCKS --- in other words, SOCKS is considered as being at a lower (sub)layer than HTTP. @menu * HTTP parent proxies:: Using an HTTP parent proxy. * SOCKS parent proxies:: Using a SOCKS4a parent proxy. @end menu @node HTTP parent proxies, SOCKS parent proxies, Parent proxies, Parent proxies @subsection HTTP parent proxies @vindex parentProxy @vindex parentAuthCredentials @cindex parent proxy @cindex upstream proxy @cindex firewall @cindex authentication The variable @code{parentProxy} specifies the hostname and port number of an HTTP parent proxy; it should have the form @samp{host:port}. If the parent proxy requires authorisation, the username and password should be specified in the variable @code{parentAuthCredentials} in the form @samp{username:password}. Only @emph{Basic} authentication is supported, which is vulnerable to replay attacks. The main application of the parent proxy support is to cross firewalls. Given a machine, say @code{trurl}, with unrestricted access to the web, the following evades a firewall by using an encrypted compressed @code{ssh} link: @example $ ssh -f -C -L 8124:localhost:8123 trurl polipo $ polipo parentProxy=localhost:8124 @end example @node SOCKS parent proxies, , HTTP parent proxies, Parent proxies @subsection SOCKS parent proxies @cindex SOCKS @vindex socksParentProxy @vindex socksUserName @vindex socksProxyType The variable @code{socksParentProxy} specifies the hostname and port number of a SOCKS parent proxy; it should have the form @samp{host:port}. The variant of the SOCKS protocol being used is defined by @code{socksProxyType}, which can be either @samp{socks4a} or @samp{socks5}; the latter value specifies ``SOCKS5 with hostnames'', and is the default. The user name passed to the SOCKS4a proxy is defined by the variable @code{socksUserName}. This value is currently ignored with a SOCKS5 proxy. The main application of the SOCKS support is to use @uref{http://tor.eff.org,,Tor} to evade overly restrictive or misconfigured firewalls. Assuming you have a Tor client running on the local host listening on the default port (9050), the following uses Tor for all outgoing HTTP traffic: @example $ polipo socksParentProxy=localhost:9050 @end example @node Tuning POST and PUT, Tunnelling connections, Parent proxies, Network @section Tuning POST and PUT requests @cindex POST request @cindex PUT request @vindex expectContinue The main assumption behind the design of the HTTP protocol is that requests are idempotent: since a request can be repeated by a client, a server is allowed to drop a connection at any time. This fact, more than anything else, explains the amazing scalability of the protocol. This assumption breaks down in the case of POST requests. Indeed, a POST request usually causes some action to be performed (a page to be printed, a significant amount of money to be transferred from your bank account, or, in Florida, a vote to be registered), and such a request should not be repeated. The only solution to this problem is to reserve HTTP to idempotent activities, and use reliable protocols for action-effecting ones. Notwithstanding that, HTTP/1.1 makes a weak attempt at making POST requests slightly more reliable and efficient than they are in HTTP/1.0. When speaking to an HTTP/1.1 server, an HTTP client is allowed to request that the server check @emph{a priori} whether it intends to honour a POST request. This is done by sending @dfn{an expectation}, a specific header with the request, @samp{Expect: 100-continue}, and waiting for either an error message or a @samp{100 Continue} reply from the server. If the latter arrives, the client is welcome to send the rest of the POST request@footnote{This, of course, is only part of the story. Additionally, the server is not required to reply with @samp{100 Continue}, hence the client must implement a timeout. Furthermore, according to the obsolete RFC2068, the server is allowed to spontaneously send @samp{100 Continue}, so the client must be prepared to ignore such a reply at any time.}. Polipo's behaviour w.r.t.@: client expectations is controlled by the variable @code{expectContinue}. If this variable is false, Polipo will never send an expectation to the server; if a client sends an expectation, Polipo will fail the expectation straight away, causing the client (if correctly implemented) to retry with no expectation. If @code{expectContinue} is @code{maybe} (the default), Polipo will behave in a standards-compliant manner: it will forward expectations to the server when allowed to do so, and fail client expectations otherwise. Finally, if @code{expectContinue} is @code{true}, Polipo will always send expectations when it is reasonable to do so; this violates the relevant standards and will break some websites, but might decrease network traffic under some circumstances. @node Tunnelling connections, , Tuning POST and PUT, Network @section Tunnelling connections @cindex tunnel @cindex tunnelling proxy @cindex https @cindex HTTP/SSL @cindex rsync @cindex CONNECT @vindex tunnelAllowedPorts Polipo is an HTTP proxy; it proxies HTTP traffic, and clients using other protocols should either establish a direct connection to the server or use an @emph{ad hoc} proxy. In many circumstances, however, it is not possible to establish a direct connection to the server, for example due to mis-configured firewalls or when trying to access the IPv4 Internet from an IPv6-only host. In such situations, it is possible to have Polipo behave as a @emph{tunnelling} proxy --- a proxy that merely forwards traffic between the client and the server without understanding it. Polipo enters tunnel mode when the client requests it by using the HTTP @samp{CONNECT} method. Most web browsers will use this technique for HTTP over SSL if configured to use Polipo as their `https proxy'. More generally, the author has successfully used it to cross mis-configured firewalls using OpenSSH, rsync, Jabber, IRC, etc. The variable @code{tunnelAllowedPorts} specifies the set of ports that Polipo will accept to tunnel traffic to. It defaults to allowing ssh, HTTP, https, rsync, IMAP, imaps, POP, pops, Jabber, CVS and Git traffic. @node Caching, Memory usage, Network, Top @chapter Caching @menu * Cache transparency:: Fresh and stale data. * Memory cache:: The in-memory cache. * Disk cache:: The on-disk cache. @end menu @node Cache transparency, Memory cache, Caching, Caching @section Cache transparency and validation @cindex transparent cache @cindex cache transparency @cindex out-of-date instances @cindex validation @cindex revalidation @cindex expire @cindex stale @cindex fresh If resources on a server change, it is possible for a cached instance to become out-of date. Ideally, a cache would be perfectly @dfn{transparent}, meaning that it never serves an out-of-date instance; in a universe with a finite speed of signal propagation, however, this ideal is impossible to achieve. If a caching proxy decides that a cached instance is new enough to likely still be valid, it will directly serve the instance to the client; we then say that the cache decided that the instance is @dfn{fresh}. When an instance is @dfn{stale} (not fresh), the cache will check with the upstream server whether the resource has changed; we say that the cached instance is being @dfn{revalidated}. In HTTP/1.1, responsibility for revalidation is shared between the client, the server and the proxy itself. The client can override revalidation policy by using the @samp{Cache-Control} header@footnote{Or the obsolete @samp{Pragma} header.}; for example, some user-agents will request end-to-end revalidation in this way when the user shift-clicks on @emph{reload}. The server may choose to specify revalidation policy by using the @samp{Expires} and @samp{Cache-Control} headers. As to the proxy, it needs to choose a revalidation policy for instances with neither server- nor client-side cache control information. Of course, nothing (except the HTTP/1.1 spec, but that is easily ignored) prevents a proxy from overriding the client's and server's cache control directives. @menu * Tuning validation:: Tuning Polipo's validation behaviour. * Tweaking validation:: Further tweaking of validation. @end menu @node Tuning validation, Tweaking validation, Cache transparency, Cache transparency @subsection Tuning validation behaviour @cindex age @vindex maxAge @vindex maxAgeFraction @vindex maxExpiresAge @vindex maxNoModifiedAge Polipo's revalidation behaviour is controlled by a number of variables. In the following, an resource's @dfn{age} is the time since it was last validated, either because it was fetched from the server or because it was revalidated. The policy defining when cached instances become stale in the absence of server-provided information is controlled by the variables @code{maxAge}, @code{maxAgeFraction}, @code{maxExpiresAge} and @code{maxNoModifiedAge}. If an instance has an @samp{Expires} header, it becomes stale at the date given by that header, or when its age becomes larger than @code{maxExpiresAge}, whichever happens first. If an instance has no @samp{Expires} header but has a @samp{LastModified} header, it becomes stale when its age reaches either @code{maxAgeFraction} of the time since it was last modified or else the absolute value @code{maxAge}, whichever happens first. Finally, if an instance has neither @samp{Expires} nor @samp{Last-Modified}, it will become stale when its age reaches @code{maxNoModifiedAge}. @node Tweaking validation, , Tuning validation, Cache transparency @subsection Further tweaking of validation behaviour @cindex uncachable @cindex vary @vindex cacheIsShared @vindex mindlesslyCacheVary @vindex uncachableFile @vindex dontCacheCookies @vindex dontCacheRedirects @vindex dontTrustVaryETag If @code{cacheIsShared} is false (it is true by default), Polipo will ignore the server-side @samp{Cache-Control} directives @samp{private}, @samp{s-maxage} and @samp{proxy-must-revalidate}. This is highly desirable behaviour when the proxy is used by just one user, but might break some sites if the proxy is shared. When connectivity is very poor, the variable @code{relaxTransparency} can be used to cause Polipo to serve stale instances under some circumstances. If @code{relaxTransparency} is @code{false} (the default), all stale instances are validated (@pxref{Cache transparency}), and failures to connect are reported to the client. This is the default mode of operation of most other proxies, and the least likely to surprise the user. If @code{relaxTransparency} is @code{maybe}, all stale instances are still validated, but a failure to connect is only reported as an error if no data is available in the cache. If a connection fails and stale data is available, it is served to the client with a suitable HTTP/1.1 @samp{Warning} header. Current user-agents do not provide visible indication of such warnings, however, and this setting will typically cause the browser to display stale data with no indication that anything went wrong. It is useful when you are consulting a live web site but don't want to be bothered with failed revalidations. If @code{relaxTransparency} is @code{true}, missing data is fetched from remote servers, but stale data are unconditionally served with no validation. Client-side @samp{Cache-Control} directives are still honoured, which means that you can force an end-to-end revalidation from the browser's interface (typically by shift-clicking on ``reload''). This setting is only useful if you have very bad network connectivity or are consulting a very slow web site or one that provides incorrect cache control information@footnote{This is for example the case of @code{www.microsoft.com}, and also of websites generated by a popular Free content management system written in Python.} and are willing to manually revalidate pages that you suspect are stale. If @code{mindlesslyCacheVary} is true, the presence of a @samp{Vary} header (which indicates that content-negotiation occurred, @pxref{Censor Accept-Language}) is ignored, and cached negotiated instances are mindlessly returned to the client. If it is false (the default), negotiated instances are revalidated on every client request. Unfortunately, a number of servers (most notably some versions of Apache's @code{mod_deflate} module) send objects with a @samp{ETag} header that will confuse Polipo in the presence of a @samp{Vary} header. Polipo will make a reasonable check for consistency if @samp{dontTrustVaryETag} is set to @samp{maybe} (the default); it will systematically ignore @samp{ETag} headers on objects with @samp{Vary} headers if it is set to @samp{true}. A number of websites incorrectly mark variable resources as cachable; such issues can be worked around in polipo by manually marking given categories of objects as uncachable. If @code{dontCacheCookies} is true, all pages carrying HTTP cookies will be treated as uncachable. If @code{dontCacheRedirects} is true, all redirects (301 and 302) will be treated as uncachable. Finally, if everything else fails, a list of uncachable URLs can be given in the file specified by @code{uncachableFile}, which has the same format as the @code{forbiddenFile} (@pxref{Internal forbidden list}). If not specified, its location defaults to @samp{~/.polipo-uncachable} or @samp{/etc/polipo/uncachable}, whichever exists. @node Memory cache, Disk cache, Cache transparency, Caching @section The in-memory cache The in-memory cache consists of a list of HTTP and DNS objects maintained in least-recently used order. An index to the in-memory cache is maintained as a (closed) hash table. When the in-memory cache grows beyond a certain size (controlled by a number of variables, @pxref{Memory usage}), or when a hash table collision occurs, resources are written out to disk. @node Disk cache, , Memory cache, Caching @section The on-disk cache @cindex filesystem @cindex NFS @vindex diskCacheRoot @vindex maxDiskEntries @vindex diskCacheWriteoutOnClose @vindex diskCacheFilePermissions @vindex diskCacheDirectoryPermissions @vindex maxDiskCacheEntrySize The on-disk cache consists in a filesystem subtree rooted at a location defined by the variable @code{diskCacheRoot}, by default @code{"/var/cache/polipo/"}. This directory should normally be writeable, readable and seekable by the user running Polipo. While it is best to use a local filesystem for the on-disk cache, a NFSv3- or AFS-mounted filesystem should be safe in most implementations. Do not use NFSv2, as it will cause cache corruption @footnote{Polipo assumes that @samp{open(O_CREAT | O_EXCL)} works reliably.}. If @code{diskCacheRoot} is an empty string, no disk cache is used. The value @code{maxDiskEntries} (32 by default) is the absolute maximum of file descriptors held open for on-disk objects. When this limit is reached, Polipo will close descriptors on a least-recently-used basis. This value should be set to be slightly larger than the number of resources that you expect to be live at a single time; defining the right notion of liveness is left as an exercise for the interested reader. The value @code{diskCacheWriteoutOnClose} (32@dmn{kB} by default) is the amount of data that Polipo will write out when closing a disk file. Writing out data when closing a file can avoid subsequently reopening it, but causes unnecessary work if the instance is later superseded. The integers @code{diskCacheDirectoryPermissions} and @code{diskCacheFilePermissions} are the Unix filesystem permissions with which files and directories are created in the on-disk cache; they default to @samp{0700} and @samp{0600} respectively. The variable @code{maxDiskCacheEntrySize} specifies the maximum size, in bytes, of an instance that is stored in the on-disk cache. If set to -1 (the default), all objects are stored in the on-disk cache, @menu * Asynchronous writing:: Writing out data when idle. * Purging:: Purging the on-disk cache. * Disk format:: Format of the on-disk cache. * Modifying the on-disk cache:: @end menu @node Asynchronous writing, Purging, Disk cache, Disk cache @subsection Asynchronous writing @vindex idleTime @vindex maxObjectsWhenIdle @vindex maxWriteoutWhenIdle When Polipo runs out of memory (@pxref{Limiting memory usage}), it will start discarding instances from its memory cache. If a disk cache has been configured, it will write out any instance that it discards. Any memory allocation that prompted the purge must then wait for the write to complete. In order to avoid the latency hit that this causes, Polipo will preemptively write out instances to the disk cache whenever it is idle. The integer @code{idleTime} specifies the time during which Polipo will remain idle before it starts writing out random objects to the on-disk cache; this value defaults to 20@dmn{s}. You may want to decrease this value for a busy cache with little memory, or increase it if your cache is often idle and has a lot of memory. The value @code{maxObjectsWhenIdle} (default 32) specifies the maximum number of instances that an idle Polipo will write out without checking whether there's any new work to do. The value @code{maxWriteoutWhenIdle} specifies the maximum amount of data (default 64@dmn{kB}) that Polipo will write out without checking for new activity. Increasing these values will make asynchronous write-out slightly faster, at the cost of possibly increasing Polipo's latency in some rare circumstances. @node Purging, Disk format, Asynchronous writing, Disk cache @subsection Purging the on-disk cache @cindex purging @vindex diskCacheUnlinkTime @vindex diskCacheTruncateTime @vindex diskCacheTruncateSize @vindex preciseExpiry Polipo never removes a file in its on-disk cache, except when it finds that the instance that it represents has been superseded by a newer version. In order to keep the on-disk cache from growing without bound, it is necessary to @dfn{purge} it once in a while. Purging the cache typically consists in removing some files, truncating large files (@pxref{Partial instances}) or moving them to off-line storage. Polipo itself can be used to purge its on-disk cache; this is done by invoking Polipo with the @option{-x} flag. This can safely be done when Polipo is running (@pxref{Modifying the on-disk cache}). For a purge to be effective, it is necessary to cause Polipo to write-out its in-memory cache to disk (@pxref{Stopping}). Additionally, Polipo will not necessarily notice the changed files until it attempts to access them; thus, you will want it to discard its in-memory cache after performing the purge. The safe way to perform a purge is therefore: @example $ kill -USR1 @var{polipo-pid} $ sleep 1 $ polipo -x $ kill -USR2 @var{polipo-pid} @end example The behaviour of the @option{-x} flag is controlled by three configuration variables. The variable @code{diskCacheUnlinkTime} specifies the time during which an on-disk entry should remain unused before it is eligible for removal; it defaults to 32 days. The variable @code{diskCacheTruncateTime} specifies the time for which an on-disk entry should remain unused before it is eligible for truncation; it defaults to 4 days and a half. The variable @code{diskCacheTruncateSize} specifies the size at which files are truncated after they have not been accessed for @code{diskCacheTruncateTime}; it defaults to 1@dmn{MB}. Usually, Polipo uses a file's modification time in order to determine whether it is old enough to be expirable. This heuristic can be disabled by setting the variable @code{preciseExpiry} to true. @node Disk format, Modifying the on-disk cache, Purging, Disk cache @subsection Format of the on-disk cache @vindex DISK_CACHE_BODY_OFFSET @cindex on-disk file @cindex on-disk cache The on-disk cache consists of a collection of files, one per instance. The format of an on-disk resource is similar to that of an HTTP message: it starts with an HTTP status line, followed by HTTP headers, followed by a blank line (@samp{\r\n\r\n}). The blank line is optionally followed by a number of binary zeroes. The body of the instance follows. The headers of an on-disk file have a few minor differences with HTTP messages. Obviously, there is never a @samp{Transfer-Encoding} line. A few additional headers are used by Polipo for its internal bookkeeping: @itemize @item @samp{X-Polipo-Location}: this is the URL of the resource stored in this file. This is always present. @item @samp{X-Polipo-Date}: this is Polipo's estimation of the date at which this instance was last validated, and is used for generating the @samp{Age} header of HTTP messages. This is optional, and only stored if different from the instance's date. @item @samp{X-Polipo-Access}: this is the date when the instance was last accessed by Polipo, and is used for cache purging (@pxref{Purging}). This is optional, and is absent if the instance was never accessed. @item @samp{X-Polipo-Body-Offset}: the presence of this line indicates that the blank line following the headers is followed by a number of zero bytes. Its value is an integer, which indicates the offset since the beginning of the file at which the instance body actually starts. This line is optional, and if absent the body starts immediately after the blank line. @end itemize @node Modifying the on-disk cache, , Disk format, Disk cache @subsection Modifying the on-disk cache @cindex on-disk cache It is safe to modify the on-disk cache while Polipo is running as long as no file is ever modified in place. More precisely, the only safe operations are to unlink (remove, delete) files in the disk cache, or to atomically add new files to the cache (by performing an exclusive open, or by using one of the @samp{link} or @samp{rename} system calls). It is @emph{not} safe to truncate a file in place. @node Memory usage, Copying, Caching, Top @chapter Memory usage @cindex memory Polipo uses two distinct pools of memory, the @dfn{chunk pool} and the @dfn{malloc pool}. @menu * Chunk memory:: Chunk memory. * Malloc memory:: Malloc memory. * Limiting memory usage:: Limiting Polipo's memory usage. @end menu @node Chunk memory, Malloc memory, Memory usage, Memory usage @section Chunk memory @vindex CHUNK_SIZE @vindex MALLOC_CHUNKS @cindex chunk @cindex memory Most of the memory used by Polipo is stored in chunks, fixed-size blocks of memory; the size of a chunk is defined by the compile-time constant @code{CHUNK_SIZE}, and defaults to 4096 bytes on 32-bit platforms, 8192 on 64-bit ones. Chunks are used for storing object data (bodies of instances) and for temporary I/O buffers. Increasing the chunk size increases performance somewhat, but at the cost of larger granularity of allocation and hence larger memory usage. By default, Polipo uses a hand-crafted memory allocator based on @code{mmap}(2) (@code{VirtualAlloc} under Windows) for allocating chunks; while this is very slightly faster than the stock memory allocator, its main benefit is that it limits memory fragmentation. It is possible to disable the chunk allocator, and use @code{malloc}(3) for all memory allocation, by defining @code{MALLOC_CHUNKS} at compile time; this is probably only useful for debugging. There is one assumption made about @code{CHUNK_SIZE}: @code{CHUNK_SIZE} multiplied by the number of bits in an @code{unsigned int} (actually in a @code{ChunkBitmap} --- see @file{chunk.c}) must be a multiple of the page size, which is 4096 on most systems (8192 on Alpha, 65536 on Windows --- go figure). As all network I/O will be performed in units of one to two chunks, @code{CHUNK_SIZE} should be at least equal to your network interface's MTU (typically 1500 bytes). Additionally, as much I/O will be done at @code{CHUNK_SIZE}-aligned addresses, @code{CHUNK_SIZE} should ideally be a multiple of the page size. In summary, 2048, 4096, 8192 and 16384 are good choices for @code{CHUNK_SIZE}. @node Malloc memory, Limiting memory usage, Chunk memory, Memory usage @section Malloc allocation @cindex malloc @cindex memory Polipo uses the standard @code{malloc}(3) memory allocator for allocating small data structures (up to 100 bytes), small strings and atoms (unique strings). @node Limiting memory usage, , Malloc memory, Memory usage @section Limiting Polipo's memory usage @cindex limiting memory @cindex memory Polipo is designed to work well when given little memory, but will happily scale to larger configurations. For that reason, you need to inform it of the amount of memory it can use. @menu * Limiting chunk usage:: Discard objects when low on chunks. * Limiting object usage:: Limit the number of objects. * OS usage limits:: Don't impose OS limits. @end menu @node Limiting chunk usage, Limiting object usage, Limiting memory usage, Limiting memory usage @subsection Limiting chunk usage @vindex chunkHighMark @vindex chunkCriticalMark @vindex chunkLowMark @vindex CHUNK_SIZE @cindex memory @cindex chunk You can limit Polipo's usage of chunk memory by setting @code{chunkHighMark} and @code{chunkLowMark}. The value @code{chunkHighMark} is the absolute maximum number of bytes of allocated chunk memory. When this value is reached, Polipo will try to purge objects from its in-memory cache; if that fails to free memory, Polipo will start dropping connections. This value defaults to 24@dmn{MB} or one quarter of the machine's physical memory, whichever is less. When chunk usage falls back below @code{chunkLowMark}, Polipo will stop discarding in-memory objects. The value @code{chunkCriticalMark}, which should be somewhere between @code{chunkLowMark} and @code{chunkHighMark}, specifies the value above which Polipo will make heroic efforts to free memory, including punching holes in the middle of instances, but without dropping connections. Unless set explicitly, both @code{chunkLowMark} and @code{chunkCriticalMark} are computed automatically from @code{chunkHighMark}. @node Limiting object usage, OS usage limits, Limiting chunk usage, Limiting memory usage @subsection Limiting object usage @vindex objectHighMark @vindex publicObjectLowMark @vindex objectHashTableSize Besides limiting chunk usage, it is possible to limit Polipo's memory usage by bounding the number of objects it keeps in memory at any given time. This is done with @code{objectHighMark} and @code{publicObjectLowMark}. The value @code{objectHighMark} is the absolute maximum of objects held in memory (including resources and server addresses). When the number of in-memory objects that haven't been superseded yet falls below @code{publicObjectLowMark}, Polipo will stop writing out objects to disk (superseded objects are discarded as soon as possible). On 32-bit architectures, every object costs 108 bytes of memory, plus storage for every globally unique header that is not handled specially (hopefully negligible), plus an overhead of one word (4 bytes) for every chunk of data in the object. You may also want to change @code{objectHashTableSize}. This is the size of the hash table used for holding objects; it should be a power of two and defaults to eight times @code{objectHighMark}. Increasing this value will reduce the number of objects being written out to disk due to hash table collisions. Every hash table entry costs one word. @node OS usage limits, , Limiting object usage, Limiting memory usage @subsection OS usage limits @cindex usage limit @cindex ulimit @cindex OOM killer Many operating systems permit limiting a process' memory usage by setting a @dfn{usage limit}; on most Unix-like systems, this is done with the @option{-v} option to the @command{ulimit} command. Typically, the effect is to cause calls to the @code{malloc} and @code{mmap} library functions to fail. Polipo will usually react gracefully to failures to allocate memory@footnote{There are exactly three places in the code where Polipo will give up and exit if out of memory; all three are extremely unlikely to happen in practice.}. Nonetheless, you should avoid using OS limits to limit Polipo's memory usage: when it hits an OS limit, Polipo cannot allocate the memory needed to schedule recovery from the out-of-memory condition, and has no choice other than to drop a connection. Unfortunately, some operating system kernels (notably certain Linux releases) fail to fail an allocation if no usage limit is given; instead, they either crash when memory is exhausted, or else start killing random processes with no advance warning@footnote{How I wish for a @samp{SIGXMEM} signal.}. On such systems, imposing an (unrealistically large) usage limit on Polipo is the safe thing to do. @node Copying, Variable index, Memory usage, Top @unnumbered Copying You are allowed to do anything you wish with Polipo as long as you don't deny my right to be recognised as its author and you don't blame me if anything goes wrong. More formally, Polipo is distributed under the following terms: @quotation Copyright @copyright{} 2003--2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @end quotation The last sentence is what happens when you allow lawyers to have it their way with a language. @node Variable index, Concept index, Copying, Top @unnumbered Variable index @printindex vr @node Concept index, , Variable index, Top @unnumbered Concept index @printindex cp @bye polipo-1.0.4.1/polipo.man0000644000175000017500000000402111331407220014454 0ustar chrisdchrisd.TH POLIPO 1 .SH NAME polipo \- a caching web proxy .SH SYNOPSIS .B polipo [ .B \-h | .B \-v | .B \-x ] [ .B \-c .I config ] [ .IB var=val ]... .SH DESCRIPTION Polipo is a caching HTTP proxy. It listens to requests for web pages from your browser and forwards them to web servers, and forwards the servers' replies to your browser. In the process, it optimises and cleans up the network traffic. .PP By default, Polipo listens on TCP port 8123. Please configure your web browser to use the proxy on .B localhost port 8123. .SH OPTIONS .TP .B \-h Display help and exit. .TP .B \-v Display the list of configuration variables and exit. .TP .B \-x Purge the on-disk cache and exit. .TP .B \-c Select an alternate configuration file. .TP .IB var = val Change the value of a configuration variable. .SH FILES .TP .B /etc/polipo/config The default location of Polipo's configuration file. .TP .B /etc/polipo/forbidden The default location of the list of forbidden URLs. .TP .B /var/cache/polipo/ The default location of the on-disk cache. .TP .B /usr/share/polipo/www/ The default root of the local web space. .SH SIGNALS .TP .B SIGUSR1 write out all in-memory objects to disk and reload the forbidden URLs file. .TP .B SIGUSR2 write out all in-memory objects to disk, discard all in-memory objects, and reload the forbidden URLs file. .TP .B SIGTERM, SIGINT, SIGHUP write out all in-memory objects to disk and quit. .SH SECURITY The internal web server will follow symbolic links that point outside the local document tree. You should run Polipo in a chroot jail if that is a problem. .PP There is no reason to run Polipo as root. .SH FULL DOCUMENTATION The full manual for Polipo is maintained in a texinfo file, and is normally available through a web server internal to Polipo. Please make sure that Polipo is running, and point your favourite web browser at .IP http://localhost:8123/ .PP Alternatively, you may type .IP $ info polipo .PP at a shell prompt, or .IP C-h i m polipo RET .PP in .BR Emacs . .SH AUTHOR Polipo was written by Juliusz Chroboczek. polipo-1.0.4.1/polipo.h0000644000175000017500000001045511331407220014140 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #ifdef __MINGW32_VERSION #define MINGW #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef MINGW #include #include #include #include #include #include #include #include #include #include #include #include #endif #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* nothing */ #elif defined(__GNUC__) #define inline __inline #if (__GNUC__ >= 3) #define restrict __restrict #else #define restrict /**/ #endif #else #define inline /**/ #define restrict /**/ #endif #if defined(__GNUC__) && (__GNUC__ >= 3) #define ATTRIBUTE(x) __attribute__(x) #else #define ATTRIBUTE(x) /**/ #endif #if defined __GLIBC__ #define HAVE_TM_GMTOFF #ifndef __UCLIBC__ #define HAVE_TIMEGM #define HAVE_FTS #define HAVE_FFSL #define HAVE_FFSLL #endif #define HAVE_SETENV #define HAVE_ASPRINTF #if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) #define HAVE_MEMRCHR #endif #endif #if defined(__linux__) && (__GNU_LIBRARY__ == 1) /* Linux libc 5 */ #define HAVE_TIMEGM #define HAVE_SETENV #endif #ifdef BSD #define HAVE_TM_GMTOFF #define HAVE_FTS #define HAVE_SETENV #endif #ifdef __CYGWIN__ #define HAVE_SETENV #define HAVE_ASPRINTF #endif #ifndef O_BINARY #define O_BINARY 0 #endif #define HAVE_TZSET #if _POSIX_VERSION >= 200112L #define HAVE_SETENV #endif #ifndef NO_IPv6 #ifdef __GLIBC__ #if (__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) #define HAVE_IPv6 #endif #endif #ifdef __FreeBSD__ #define HAVE_ASPRINTF #if __FreeBSD_version >= 400000 #define HAVE_IPv6 #define HAVE_TIMEGM #endif #endif #ifdef __NetBSD__ #if __NetBSD_Version__ >= 105000000 #define HAVE_IPv6 #endif #if __NetBSD_Version__ >= 200000000 #define HAVE_TIMEGM #define HAVE_ASPRINTF #endif #endif #ifdef __OpenBSD__ #if OpenBSD >= 200311 #define HAVE_IPv6 #endif #endif #endif #if defined(i386) || defined(__mc68020__) #define UNALIGNED_ACCESS #endif #ifndef MINGW #define HAVE_FORK #ifndef NO_SYSLOG #define HAVE_SYSLOG #endif #define HAVE_READV_WRITEV #define HAVE_FFS #define READ(x, y, z) read(x, y, z) #define WRITE(x, y, z) write(x, y, z) #define CLOSE(x) close(x) #else #ifndef HAVE_REGEX #define NO_FORBIDDEN #endif #endif #ifdef HAVE_READV_WRITEV #define WRITEV(x, y, z) writev(x, y, z) #define READV(x, y, z) readv(x, y, z) #endif #ifndef HAVE_FORK #define NO_REDIRECTOR #endif #include "mingw.h" #include "ftsimport.h" #include "atom.h" #include "util.h" #include "config.h" #include "event.h" #include "io.h" #include "socks.h" #include "chunk.h" #include "object.h" #include "dns.h" #include "http.h" #include "client.h" #include "local.h" #include "diskcache.h" #include "server.h" #include "http_parse.h" #include "parse_time.h" #include "forbidden.h" #include "log.h" #include "auth.h" #include "tunnel.h" extern AtomPtr configFile; extern int daemonise; extern AtomPtr pidFile; polipo-1.0.4.1/parse_time.h0000644000175000017500000000233511331407220014764 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* XXX */ extern const time_t time_t_max; int parse_time(const char *buf, int i, int len, time_t *time_return); int format_time(char *buf, int i, int len, time_t t); polipo-1.0.4.1/parse_time.c0000644000175000017500000001314711331407220014762 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" const time_t time_t_max = ((time_t)~(1U << 31)); static inline int d2i(char c) { if(c >= '0' && c <= '9') return c - '0'; else return -1; } static int parse_int(const char *buf, int i, int len, int *val_return) { int val, d; if(i >= len) return -1; val = d2i(buf[i]); if(val < 0) return -1; else i++; while(i < len) { d = d2i(buf[i]); if(d < 0) break; val = val * 10 + d; i++; } *val_return = val; return i; } static const char month_names[12][3] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", }; static int skip_word(const char *buf, int i, int len) { if(i >= len) return -1; if(!letter(buf[i])) return -1; while(i < len) { if(!letter(buf[i])) break; i++; } return i; } static int parse_month(const char *buf, int i, int len, int *val_return) { int j, k, l; j = skip_word(buf, i, len); if(j != i + 3) return -1; for(k = 0; k < 12; k++) { for(l = 0; l < 3; l++) { if(lwr(buf[i + l]) != month_names[k][l]) break; } if(l == 3) break; } if(k >= 12) return -1; *val_return = k; return j; } static int issep(char c) { return c == ' ' || c == '\t' || c == ',' || c == ':' || c == '-'; } int skip_separator(const char *buf, int i, int len) { if(i >= len) return -1; if(issep(buf[i])) i++; else return -1; while(i < len) { if(issep(buf[i])) i++; else break; } return i; } int parse_time(const char *buf, int offset, int len, time_t *time_return) { struct tm tm; time_t t; int i = offset; i = skip_word(buf, i, len); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; if(i >= len) return -1; if(d2i(buf[i]) >= 0) { i = parse_int(buf, i, len, &tm.tm_mday); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_month(buf, i, len, &tm.tm_mon); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_year); if(i < 0) return -1; if(tm.tm_year < 100) tm.tm_year += 1900; if(tm.tm_year < 1937) tm.tm_year += 100; if(tm.tm_year < 1937) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_hour); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_min); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_sec); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = skip_word(buf, i, len); if(i < 0) return -1; } else { /* funny American format */ i = parse_month(buf, i, len, &tm.tm_mon); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_mday); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_hour); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_min); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_sec); if(i < 0) return -1; i = skip_separator(buf, i, len); if(i < 0) return -1; i = parse_int(buf, i, len, &tm.tm_year); if(i < 0) return -1; if(tm.tm_year < 100) tm.tm_year += 1900; if(tm.tm_year < 1937) tm.tm_year += 100; if(tm.tm_year < 1937 || tm.tm_year > 2040) return -1; } if(tm.tm_year < 2038) { tm.tm_year -= 1900; tm.tm_isdst = -1; t = mktime_gmt(&tm); if(t == -1) return -1; } else { t = time_t_max; } *time_return = t; return i; } int format_time(char *buf, int i, int len, time_t t) { struct tm *tm; int rc; if(i < 0 || i > len) return -1; tm = gmtime(&t); if(tm == NULL) return -1; rc = strftime(buf + i, len - i, "%a, %d %b %Y %H:%M:%S GMT", tm); if(rc <= 0) /* yes, that's <= */ return -1; return i + rc; } polipo-1.0.4.1/object.h0000644000175000017500000001414111331407220014100 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #undef MAX #undef MIN #define MAX(x,y) ((x)<=(y)?(y):(x)) #define MIN(x,y) ((x)<=(y)?(x):(y)) struct _HTTPRequest; #if defined(USHRT_MAX) && CHUNK_SIZE <= USHRT_MAX typedef unsigned short chunk_size_t; #else typedef unsigned int chunk_size_t; #endif typedef struct _Chunk { short int locked; chunk_size_t size; char *data; } ChunkRec, *ChunkPtr; struct _Object; typedef int (*RequestFunction)(struct _Object *, int, int, int, struct _HTTPRequest*, void*); typedef struct _Object { short refcount; unsigned char type; RequestFunction request; void *request_closure; void *key; unsigned short key_size; unsigned short flags; unsigned short code; void *abort_data; struct _Atom *message; int length; time_t date; time_t age; time_t expires; time_t last_modified; time_t atime; char *etag; unsigned short cache_control; int max_age; int s_maxage; struct _Atom *headers; struct _Atom *via; int size; int numchunks; ChunkPtr chunks; void *requestor; struct _Condition condition; struct _DiskCacheEntry *disk_entry; struct _Object *next, *previous; } ObjectRec, *ObjectPtr; typedef struct _CacheControl { int flags; int max_age; int s_maxage; int min_fresh; int max_stale; } CacheControlRec, *CacheControlPtr; extern int cacheIsShared; extern int mindlesslyCacheVary; extern CacheControlRec no_cache_control; extern int objectExpiryScheduled; extern int publicObjectCount; extern int privateObjectCount; extern int idleTime; extern const time_t time_t_max; extern int publicObjectLowMark, objectHighMark; extern int log2ObjectHashTableSize; /* object->type */ #define OBJECT_HTTP 1 #define OBJECT_DNS 2 /* object->flags */ /* object is public */ #define OBJECT_PUBLIC 1 /* object hasn't got any headers yet */ #define OBJECT_INITIAL 2 /* a server connection is already taking care of the object */ #define OBJECT_INPROGRESS 4 /* the object has been superseded -- don't try to fetch it */ #define OBJECT_SUPERSEDED 8 /* the object is private and aditionally can only be used by its requestor */ #define OBJECT_LINEAR 16 /* the object is currently being validated */ #define OBJECT_VALIDATING 32 /* object has been aborted */ #define OBJECT_ABORTED 64 /* last object request was a failure */ #define OBJECT_FAILED 128 /* Object is a local file */ #define OBJECT_LOCAL 256 /* The object's data has been entirely written out to disk */ #define OBJECT_DISK_ENTRY_COMPLETE 512 /* The object is suspected to be dynamic -- don't PMM */ #define OBJECT_DYNAMIC 1024 /* Used for synchronisation between client and server. */ #define OBJECT_MUTATING 2048 /* object->cache_control and connection->cache_control */ /* RFC 2616 14.9 */ /* Non-standard: like no-cache, but kept internally */ #define CACHE_NO_HIDDEN 1 /* no-cache */ #define CACHE_NO 2 /* public */ #define CACHE_PUBLIC 4 /* private */ #define CACHE_PRIVATE 8 /* no-store */ #define CACHE_NO_STORE 16 /* no-transform */ #define CACHE_NO_TRANSFORM 32 /* must-revalidate */ #define CACHE_MUST_REVALIDATE 64 /* proxy-revalidate */ #define CACHE_PROXY_REVALIDATE 128 /* only-if-cached */ #define CACHE_ONLY_IF_CACHED 256 /* set if Vary header; treated as no-cache */ #define CACHE_VARY 512 /* set if Authorization header; treated specially */ #define CACHE_AUTHORIZATION 1024 /* set if cookie */ #define CACHE_COOKIE 2048 /* set if this object should never be combined with another resource */ #define CACHE_MISMATCH 4096 struct _HTTPRequest; void preinitObject(void); void initObject(void); ObjectPtr findObject(int type, const void *key, int key_size); ObjectPtr makeObject(int type, const void *key, int key_size, int public, int fromdisk, int (*request)(ObjectPtr, int, int, int, struct _HTTPRequest*, void*), void*); void objectMetadataChanged(ObjectPtr object, int dirty); ObjectPtr retainObject(ObjectPtr); void releaseObject(ObjectPtr); int objectSetChunks(ObjectPtr object, int numchunks); void lockChunk(ObjectPtr, int); void unlockChunk(ObjectPtr, int); void destroyObject(ObjectPtr object); void privatiseObject(ObjectPtr object, int linear); void abortObject(ObjectPtr object, int code, struct _Atom *message); void supersedeObject(ObjectPtr); void notifyObject(ObjectPtr); void releaseNotifyObject(ObjectPtr); ObjectPtr objectPartial(ObjectPtr object, int length, struct _Atom *headers); int objectHoleSize(ObjectPtr object, int offset) ATTRIBUTE ((pure)); int objectHasData(ObjectPtr object, int from, int to) ATTRIBUTE ((pure)); int objectAddData(ObjectPtr object, const char *data, int offset, int len); void objectPrintf(ObjectPtr object, int offset, const char *format, ...) ATTRIBUTE ((format (printf, 3, 4))); int discardObjectsHandler(TimeEventHandlerPtr); void writeoutObjects(int); int discardObjects(int all, int force); int objectIsStale(ObjectPtr object, CacheControlPtr cache_control) ATTRIBUTE ((pure)); int objectMustRevalidate(ObjectPtr object, CacheControlPtr cache_control) ATTRIBUTE ((pure)); polipo-1.0.4.1/object.c0000644000175000017500000007127111331407220014102 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" int mindlesslyCacheVary = 0; int objectHashTableSize = 0; int log2ObjectHashTableSize; static ObjectPtr object_list = NULL; static ObjectPtr object_list_end = NULL; int objectExpiryScheduled; int publicObjectCount; int privateObjectCount; int cacheIsShared = 1; int publicObjectLowMark = 0, objectHighMark = 2048; static ObjectPtr *objectHashTable; int maxExpiresAge = (30 * 24 + 1) * 3600; int maxAge = (14 * 24 + 1) * 3600; float maxAgeFraction = 0.1; int maxNoModifiedAge = 23 * 60; int maxWriteoutWhenIdle = 64 * 1024; int maxObjectsWhenIdle = 32; int idleTime = 20; int dontCacheCookies = 0; void preinitObject() { CONFIG_VARIABLE_SETTABLE(idleTime, CONFIG_TIME, configIntSetter, "Time to remain idle before writing out."); CONFIG_VARIABLE_SETTABLE(maxWriteoutWhenIdle, CONFIG_INT, configIntSetter, "Amount of data to write at a time when idle."); CONFIG_VARIABLE_SETTABLE(maxObjectsWhenIdle, CONFIG_INT, configIntSetter, "Number of objects to write at a time " "when idle."); CONFIG_VARIABLE_SETTABLE(cacheIsShared, CONFIG_BOOLEAN, configIntSetter, "If false, ignore s-maxage and private."); CONFIG_VARIABLE_SETTABLE(mindlesslyCacheVary, CONFIG_BOOLEAN, configIntSetter, "If true, mindlessly cache negotiated objects."); CONFIG_VARIABLE(objectHashTableSize, CONFIG_INT, "Size of the object hash table (0 = auto)."); CONFIG_VARIABLE(objectHighMark, CONFIG_INT, "High object count mark."); CONFIG_VARIABLE(publicObjectLowMark, CONFIG_INT, "Low object count mark (0 = auto)."); CONFIG_VARIABLE_SETTABLE(maxExpiresAge, CONFIG_TIME, configIntSetter, "Max age for objects with Expires header."); CONFIG_VARIABLE_SETTABLE(maxAge, CONFIG_TIME, configIntSetter, "Max age for objects without Expires header."); CONFIG_VARIABLE_SETTABLE(maxAgeFraction, CONFIG_FLOAT, configFloatSetter, "Fresh fraction of modification time."); CONFIG_VARIABLE_SETTABLE(maxNoModifiedAge, CONFIG_TIME, configIntSetter, "Max age for objects without Last-modified."); CONFIG_VARIABLE_SETTABLE(dontCacheCookies, CONFIG_BOOLEAN, configIntSetter, "Work around cachable cookies."); } void initObject() { int q; if(objectHighMark < 16) { objectHighMark = 16; do_log(L_WARN, "Impossibly low objectHighMark -- setting to %d.\n", objectHighMark); } q = 0; if(publicObjectLowMark == 0) q = 1; if(publicObjectLowMark < 8 || publicObjectLowMark >= objectHighMark - 4) { publicObjectLowMark = objectHighMark / 2; if(!q) do_log(L_WARN, "Impossible publicObjectLowMark value -- " "setting to %d.\n", publicObjectLowMark); } q = 1; if(objectHashTableSize <= objectHighMark / 2 || objectHashTableSize > objectHighMark * 1024) { if(objectHashTableSize != 0) q = 0; objectHashTableSize = objectHighMark * 16; } log2ObjectHashTableSize = log2_ceil(objectHashTableSize); objectHashTableSize = 1 << log2ObjectHashTableSize; if(!q) do_log(L_WARN, "Suspicious objectHashTableSize value -- " "setting to %d.\n", objectHashTableSize); object_list = NULL; object_list_end = NULL; publicObjectCount = 0; privateObjectCount = 0; objectHashTable = calloc(1 << log2ObjectHashTableSize, sizeof(ObjectPtr)); if(!objectHashTable) { do_log(L_ERROR, "Couldn't allocate object hash table.\n"); exit(1); } } ObjectPtr findObject(int type, const void *key, int key_size) { int h; ObjectPtr object; if(key_size >= 50000) return NULL; h = hash(type, key, key_size, log2ObjectHashTableSize); object = objectHashTable[h]; if(!object) return NULL; if(object->type != type || object->key_size != key_size || memcmp(object->key, key, key_size) != 0) { return NULL; } if(object->next) object->next->previous = object->previous; if(object->previous) object->previous->next = object->next; if(object_list == object) object_list = object->next; if(object_list_end == object) object_list_end = object->previous; object->previous = NULL; object->next = object_list; if(object_list) object_list->previous = object; object_list = object; if(!object_list_end) object_list_end = object; return retainObject(object); } ObjectPtr makeObject(int type, const void *key, int key_size, int public, int fromdisk, RequestFunction request, void* request_closure) { ObjectPtr object; int h; object = findObject(type, key, key_size); if(object != NULL) { if(public) return object; else privatiseObject(object, 0); } if(publicObjectCount + privateObjectCount >= objectHighMark) { if(!objectExpiryScheduled) discardObjects(0, 0); if(publicObjectCount + privateObjectCount >= objectHighMark) { return NULL; } } if(publicObjectCount >= publicObjectLowMark && !objectExpiryScheduled) { TimeEventHandlerPtr event; event = scheduleTimeEvent(-1, discardObjectsHandler, 0, NULL); if(event) objectExpiryScheduled = 1; else do_log(L_ERROR, "Couldn't schedule object expiry.\n"); } object = malloc(sizeof(ObjectRec)); if(object == NULL) return NULL; object->type = type; object->request = request; object->request_closure = request_closure; object->key = malloc(key_size); if(object->key == NULL) { free(object); return NULL; } memcpy(object->key, key, key_size); object->key_size = key_size; object->flags = (public?OBJECT_PUBLIC:0) | OBJECT_INITIAL; if(public) { h = hash(object->type, object->key, object->key_size, log2ObjectHashTableSize); if(objectHashTable[h]) { writeoutToDisk(objectHashTable[h], objectHashTable[h]->size, -1); privatiseObject(objectHashTable[h], 0); assert(!objectHashTable[h]); } objectHashTable[h] = object; object->next = object_list; object->previous = NULL; if(object_list) object_list->previous = object; object_list = object; if(!object_list_end) object_list_end = object; } else { object->next = NULL; object->previous = NULL; } object->abort_data = NULL; object->code = 0; object->message = NULL; initCondition(&object->condition); object->headers = NULL; object->via = NULL; object->numchunks = 0; object->chunks = NULL; object->length = -1; object->date = -1; object->age = -1; object->expires = -1; object->last_modified = -1; object->atime = -1; object->etag = NULL; object->cache_control = 0; object->max_age = -1; object->s_maxage = -1; object->size = 0; object->requestor = NULL; object->disk_entry = NULL; if(object->flags & OBJECT_PUBLIC) publicObjectCount++; else privateObjectCount++; object->refcount = 1; if(public && fromdisk) objectGetFromDisk(object); return object; } void objectMetadataChanged(ObjectPtr object, int revalidate) { if(revalidate) { revalidateDiskEntry(object); } else { object->flags &= ~OBJECT_DISK_ENTRY_COMPLETE; dirtyDiskEntry(object); } return; } ObjectPtr retainObject(ObjectPtr object) { do_log(D_REFCOUNT, "O 0x%lx %d++\n", (unsigned long)object, object->refcount); object->refcount++; return object; } void releaseObject(ObjectPtr object) { do_log(D_REFCOUNT, "O 0x%lx %d--\n", (unsigned long)object, object->refcount); object->refcount--; if(object->refcount == 0) { assert(!object->condition.handlers && !(object->flags & OBJECT_INPROGRESS)); if(!(object->flags & OBJECT_PUBLIC)) destroyObject(object); } } void releaseNotifyObject(ObjectPtr object) { do_log(D_REFCOUNT, "O 0x%lx %d--\n", (unsigned long)object, object->refcount); object->refcount--; if(object->refcount > 0) { notifyObject(object); } else { assert(!object->condition.handlers && !(object->flags & OBJECT_INPROGRESS)); if(!(object->flags & OBJECT_PUBLIC)) destroyObject(object); } } void lockChunk(ObjectPtr object, int i) { do_log(D_LOCK, "Lock 0x%lx[%d]: ", (unsigned long)object, i); assert(i >= 0); if(i >= object->numchunks) objectSetChunks(object, i + 1); object->chunks[i].locked++; do_log(D_LOCK, "%d\n", object->chunks[i].locked); } void unlockChunk(ObjectPtr object, int i) { do_log(D_LOCK, "Unlock 0x%lx[%d]: ", (unsigned long)object, i); assert(i >= 0 && i < object->numchunks); assert(object->chunks[i].locked > 0); object->chunks[i].locked--; do_log(D_LOCK, "%d\n", object->chunks[i].locked); } int objectSetChunks(ObjectPtr object, int numchunks) { int n; if(numchunks <= object->numchunks) return 0; if(object->length >= 0) n = MAX(numchunks, (object->length + (CHUNK_SIZE - 1)) / CHUNK_SIZE); else n = MAX(numchunks, MAX(object->numchunks + 2, object->numchunks * 5 / 4)); if(n == 0) { assert(object->chunks == NULL); } else if(object->numchunks == 0) { object->chunks = calloc(n, sizeof(ChunkRec)); if(object->chunks == NULL) { return -1; } object->numchunks = n; } else { ChunkPtr newchunks; newchunks = realloc(object->chunks, n * sizeof(ChunkRec)); if(newchunks == NULL) return -1; memset(newchunks + object->numchunks, 0, (n - object->numchunks) * sizeof(ChunkRec)); object->chunks = newchunks; object->numchunks = n; } return 0; } ObjectPtr objectPartial(ObjectPtr object, int length, struct _Atom *headers) { object->headers = headers; if(length >= 0) { if(object->size > length) { abortObject(object, 502, internAtom("Inconsistent Content-Length")); notifyObject(object); return object; } } if(length >= 0) object->length = length; object->flags &= ~OBJECT_INITIAL; revalidateDiskEntry(object); notifyObject(object); return object; } static int objectAddChunk(ObjectPtr object, const char *data, int offset, int plen) { int i = offset / CHUNK_SIZE; int rc; assert(offset % CHUNK_SIZE == 0); assert(plen <= CHUNK_SIZE); if(object->numchunks <= i) { rc = objectSetChunks(object, i + 1); if(rc < 0) return -1; } lockChunk(object, i); if(object->chunks[i].data == NULL) { object->chunks[i].data = get_chunk(); if(object->chunks[i].data == NULL) goto fail; } if(object->chunks[i].size >= plen) { unlockChunk(object, i); return 0; } if(object->size < offset + plen) object->size = offset + plen; object->chunks[i].size = plen; memcpy(object->chunks[i].data, data, plen); unlockChunk(object, i); return 0; fail: unlockChunk(object, i); return -1; } static int objectAddChunkEnd(ObjectPtr object, const char *data, int offset, int plen) { int i = offset / CHUNK_SIZE; int rc; assert(offset % CHUNK_SIZE != 0 && offset % CHUNK_SIZE + plen <= CHUNK_SIZE); if(object->numchunks <= i) { rc = objectSetChunks(object, i + 1); if(rc < 0) return -1; } lockChunk(object, i); if(object->chunks[i].data == NULL) object->chunks[i].data = get_chunk(); if(object->chunks[i].data == NULL) goto fail; if(offset > object->size) { goto fail; } if(object->chunks[i].size < offset % CHUNK_SIZE) { goto fail; } if(object->size < offset + plen) object->size = offset + plen; object->chunks[i].size = offset % CHUNK_SIZE + plen; memcpy(object->chunks[i].data + (offset % CHUNK_SIZE), data, plen); unlockChunk(object, i); return 0; fail: unlockChunk(object, i); return -1; } int objectAddData(ObjectPtr object, const char *data, int offset, int len) { int rc; do_log(D_OBJECT_DATA, "Adding data to 0x%lx (%d) at %d: %d bytes\n", (unsigned long)object, object->length, offset, len); if(len == 0) return 1; if(object->length >= 0) { if(offset + len > object->length) { do_log(L_ERROR, "Inconsistent object length (%d, should be at least %d).\n", object->length, offset + len); object->length = offset + len; } } object->flags &= ~OBJECT_FAILED; if(offset + len >= object->numchunks * CHUNK_SIZE) { rc = objectSetChunks(object, (offset + len - 1) / CHUNK_SIZE + 1); if(rc < 0) { return -1; } } if(offset % CHUNK_SIZE != 0) { int plen = CHUNK_SIZE - offset % CHUNK_SIZE; if(plen >= len) plen = len; rc = objectAddChunkEnd(object, data, offset, plen); if(rc < 0) { return -1; } offset += plen; data += plen; len -= plen; } while(len > 0) { int plen = (len >= CHUNK_SIZE) ? CHUNK_SIZE : len; rc = objectAddChunk(object, data, offset, plen); if(rc < 0) { return -1; } offset += plen; data += plen; len -= plen; } return 1; } void objectPrintf(ObjectPtr object, int offset, const char *format, ...) { char *buf; int rc; va_list args; va_start(args, format); buf = vsprintf_a(format, args); va_end(args); if(buf == NULL) { abortObject(object, 500, internAtom("Couldn't allocate string")); return; } rc = objectAddData(object, buf, offset, strlen(buf)); free(buf); if(rc < 0) abortObject(object, 500, internAtom("Couldn't add data to object")); } int objectHoleSize(ObjectPtr object, int offset) { int size = 0, i; if(offset < 0 || offset / CHUNK_SIZE >= object->numchunks) return -1; if(offset % CHUNK_SIZE != 0) { if(object->chunks[offset / CHUNK_SIZE].size > offset % CHUNK_SIZE) return 0; else { size += CHUNK_SIZE - offset % CHUNK_SIZE; offset += CHUNK_SIZE - offset % CHUNK_SIZE; if(offset < 0) { /* Overflow */ return -1; } } } for(i = offset / CHUNK_SIZE; i < object->numchunks; i++) { if(object->chunks[i].size == 0) size += CHUNK_SIZE; else break; } if(i >= object->numchunks) return -1; return size; } /* Returns 2 if the data is wholly in memory, 1 if it's available on disk */ int objectHasData(ObjectPtr object, int from, int to) { int first = from / CHUNK_SIZE; int last = to / CHUNK_SIZE; int i, upto; if(to < 0) { if(object->length >= 0) to = object->length; else return 0; } if(from >= to) return 2; if(to > object->size) { upto = to; goto disk; } if(last > object->numchunks || object->chunks[last].size > to % CHUNK_SIZE) { upto = to; goto disk; } for(i = last - 1; i >= first; i--) { if(object->chunks[i].size < CHUNK_SIZE) { upto = (i + 1) * CHUNK_SIZE; goto disk; } } return 2; disk: if(object->flags & OBJECT_DISK_ENTRY_COMPLETE) return 1; if(diskEntrySize(object) >= upto) return 1; return 0; } void destroyObject(ObjectPtr object) { int i; assert(object->refcount == 0 && !object->requestor); assert(!object->condition.handlers && (object->flags & OBJECT_INPROGRESS) == 0); if(object->disk_entry) destroyDiskEntry(object, 0); if(object->flags & OBJECT_PUBLIC) { privatiseObject(object, 0); } else { object->type = -1; if(object->message) releaseAtom(object->message); if(object->key) free(object->key); if(object->headers) releaseAtom(object->headers); if(object->etag) free(object->etag); if(object->via) releaseAtom(object->via); for(i = 0; i < object->numchunks; i++) { assert(!object->chunks[i].locked); if(object->chunks[i].data) dispose_chunk(object->chunks[i].data); object->chunks[i].data = NULL; object->chunks[i].size = 0; } if(object->chunks) free(object->chunks); privateObjectCount--; free(object); } } void privatiseObject(ObjectPtr object, int linear) { int i, h; if(!(object->flags & OBJECT_PUBLIC)) { if(linear) object->flags |= OBJECT_LINEAR; return; } if(object->disk_entry) destroyDiskEntry(object, 0); object->flags &= ~OBJECT_PUBLIC; for(i = 0; i < object->numchunks; i++) { if(object->chunks[i].locked) break; if(object->chunks[i].data) { object->chunks[i].size = 0; dispose_chunk(object->chunks[i].data); object->chunks[i].data = NULL; } } h = hash(object->type, object->key, object->key_size, log2ObjectHashTableSize); assert(objectHashTable[h] == object); objectHashTable[h] = NULL; if(object->previous) object->previous->next = object->next; if(object_list == object) object_list = object->next; if(object->next) object->next->previous = object->previous; if(object_list_end == object) object_list_end = object->previous; object->previous = NULL; object->next = NULL; publicObjectCount--; privateObjectCount++; if(object->refcount == 0) destroyObject(object); else { if(linear) object->flags |= OBJECT_LINEAR; } } void abortObject(ObjectPtr object, int code, AtomPtr message) { int i; assert(code != 0); object->flags &= ~(OBJECT_INITIAL | OBJECT_VALIDATING); object->flags |= OBJECT_ABORTED; object->code = code; if(object->message) releaseAtom(object->message); object->message = message; object->length = 0; object->date = object->age; object->expires = object->age; object->last_modified = -1; if(object->etag) free(object->etag); object->etag = NULL; if(object->headers) releaseAtom(object->headers); object->headers = NULL; object->size = 0; for(i = 0; i < object->numchunks; i++) { if(object->chunks[i].data) { if(!object->chunks[i].locked) { dispose_chunk(object->chunks[i].data); object->chunks[i].data = NULL; object->chunks[i].size = 0; } } } privatiseObject(object, 0); } void supersedeObject(ObjectPtr object) { object->flags |= OBJECT_SUPERSEDED; destroyDiskEntry(object, 1); privatiseObject(object, 0); notifyObject(object); } void notifyObject(ObjectPtr object) { retainObject(object); signalCondition(&object->condition); releaseObject(object); } int discardObjectsHandler(TimeEventHandlerPtr event) { return discardObjects(0, 0); } void writeoutObjects(int all) { ObjectPtr object = object_list; int bytes; int objects; int n; if(diskIsClean) return; objects = 0; bytes = 0; while(object) { do { if(!all) { if(objects >= maxObjectsWhenIdle || bytes >= maxWriteoutWhenIdle) { if(workToDo()) return; objects = 0; bytes = 0; } } n = writeoutToDisk(object, -1, all ? -1 : maxWriteoutWhenIdle); bytes += n; } while(!all && n == maxWriteoutWhenIdle); objects++; object = object->next; } diskIsClean = 1; } int discardObjects(int all, int force) { ObjectPtr object; int i; static int in_discardObjects = 0; TimeEventHandlerPtr event; if(in_discardObjects) return 0; in_discardObjects = 1; if(all || force || used_chunks >= CHUNKS(chunkHighMark) || publicObjectCount >= publicObjectLowMark || publicObjectCount + privateObjectCount >= objectHighMark) { object = object_list_end; while(object && (all || force || used_chunks >= CHUNKS(chunkLowMark))) { if(force || ((object->flags & OBJECT_PUBLIC) && object->numchunks > CHUNKS(chunkLowMark) / 4)) { int j; for(j = 0; j < object->numchunks; j++) { if(object->chunks[j].locked) { break; } if(object->chunks[j].size < CHUNK_SIZE) { continue; } writeoutToDisk(object, (j + 1) * CHUNK_SIZE, -1); dispose_chunk(object->chunks[j].data); object->chunks[j].data = NULL; object->chunks[j].size = 0; i++; } } object = object->previous; } i = 0; object = object_list_end; while(object && (all || force || used_chunks - i > CHUNKS(chunkLowMark) || used_chunks > CHUNKS(chunkCriticalMark) || publicObjectCount > publicObjectLowMark)) { ObjectPtr next_object = object->previous; if(object->refcount == 0) { i += object->numchunks; writeoutToDisk(object, object->size, -1); privatiseObject(object, 0); } else if(all || force) { writeoutToDisk(object, object->size, -1); destroyDiskEntry(object, 0); } object = next_object; } object = object_list_end; if(force || used_chunks > CHUNKS(chunkCriticalMark)) { if(used_chunks > CHUNKS(chunkCriticalMark)) { do_log(L_WARN, "Short on chunk memory -- " "attempting to punch holes " "in the middle of objects.\n"); } while(object && (force || used_chunks > CHUNKS(chunkCriticalMark))) { if(force || (object->flags & OBJECT_PUBLIC)) { int j; for(j = object->numchunks - 1; j >= 0; j--) { if(object->chunks[j].locked) continue; if(object->chunks[j].size < CHUNK_SIZE) continue; writeoutToDisk(object, (j + 1) * CHUNK_SIZE, -1); dispose_chunk(object->chunks[j].data); object->chunks[j].data = NULL; object->chunks[j].size = 0; } } object = object->previous; } } event = scheduleTimeEvent(2, discardObjectsHandler, 0, NULL); if(event) { objectExpiryScheduled = 1; } else { objectExpiryScheduled = 0; do_log(L_ERROR, "Couldn't schedule object expiry.\n"); } } else { objectExpiryScheduled = 0; } if(all) { if(privateObjectCount + publicObjectCount != 0) { do_log(L_WARN, "Discarded all objects, " "%d + %d objects left (%d chunks and %d atoms used).\n", publicObjectCount, privateObjectCount, used_chunks, used_atoms); } else if(used_chunks != 0) { do_log(L_WARN, "Discarded all objects, " "%d chunks and %d atoms left.\n", used_chunks, used_atoms); } diskIsClean = 1; } in_discardObjects = 0; return 1; } CacheControlRec no_cache_control = {0, -1, -1, -1, -1}; int objectIsStale(ObjectPtr object, CacheControlPtr cache_control) { int stale = 0x7FFFFFFF; int flags; int max_age, s_maxage; time_t date; if(object->flags & OBJECT_INITIAL) return 0; if(object->date >= 0) date = object->date; else if(object->age >= 0) date = object->age; else date = current_time.tv_sec; if(cache_control == NULL) cache_control = &no_cache_control; flags = object->cache_control | cache_control->flags; if(cache_control->max_age >= 0) { if(object->max_age >= 0) max_age = MIN(cache_control->max_age, object->max_age); else max_age = cache_control->max_age; } else max_age = object->max_age; if(cache_control->s_maxage >= 0) { if(object->s_maxage >= 0) s_maxage = MIN(cache_control->s_maxage, object->s_maxage); else s_maxage = cache_control->s_maxage; } else s_maxage = object->s_maxage; if(max_age >= 0) stale = MIN(stale, object->age + max_age); if(cacheIsShared && s_maxage >= 0) stale = MIN(stale, object->age + s_maxage); if(object->expires >= 0 || object->max_age >= 0) stale = MIN(stale, object->age + maxExpiresAge); else stale = MIN(stale, object->age + maxAge); /* RFC 2616 14.9.3: server-side max-age overrides expires */ if(object->expires >= 0 && object->max_age < 0) { /* This protects against clock skew */ stale = MIN(stale, object->age + object->expires - date); } if(object->expires < 0 && object->max_age < 0) { /* No server-side information -- heuristic expiration */ if(object->last_modified >= 0) /* Again, take care of clock skew */ stale = MIN(stale, object->age + (date - object->last_modified) * maxAgeFraction); else stale = MIN(stale, object->age + maxNoModifiedAge); } if(!(flags & CACHE_MUST_REVALIDATE) && !(cacheIsShared && (flags & CACHE_PROXY_REVALIDATE))) { /* Client side can relax transparency */ if(cache_control->min_fresh >= 0) { if(cache_control->max_stale >= 0) stale = MIN(stale - cache_control->min_fresh, stale + cache_control->max_stale); else stale = stale - cache_control->min_fresh; } else if(cache_control->max_stale >= 0) { stale = stale + cache_control->max_stale; } } return current_time.tv_sec > stale; } int objectMustRevalidate(ObjectPtr object, CacheControlPtr cache_control) { int flags; if(cache_control == NULL) cache_control = &no_cache_control; if(object) flags = object->cache_control | cache_control->flags; else flags = cache_control->flags; if(flags & (CACHE_NO | CACHE_NO_HIDDEN | CACHE_NO_STORE)) return 1; if(cacheIsShared && (flags & CACHE_PRIVATE)) return 1; if(!mindlesslyCacheVary && (flags & CACHE_VARY)) return 1; if(dontCacheCookies && (flags & CACHE_COOKIE)) return 1; if(object) return objectIsStale(object, cache_control); return 0; } polipo-1.0.4.1/mingw.h0000644000175000017500000001256711331407220013765 0ustar chrisdchrisd/* Copyright (c) 2006 by Dan Kennedy. Copyright (c) 2006 by Juliusz Chroboczek. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* * Polipo was originally designed to run on Unix-like systems. This * header file (and it's accompanying implementation file mingw.c) contain * code that allows polipo to run on Microsoft Windows too. * * The target MS windows compiler is Mingw (MINimal Gnu for Windows). The * code in this file probably get's us pretty close to MSVC also, but * this has not been tested. To build polipo for Mingw, define the MINGW * symbol. For Unix or Unix-like systems, leave it undefined. */ #ifdef MINGW /* Unfortunately, there's no hiding it. */ #define HAVE_WINSOCK 1 /* At time of writing, a fair bit of stuff doesn't work under Mingw. * Hopefully they will be fixed later (especially the disk-cache). */ #define NO_IPv6 1 #include #define S_IROTH S_IREAD /* Pull in winsock.h for (almost) berkeley sockets. */ #include #define ENOTCONN WSAENOTCONN #define EWOULDBLOCK WSAEWOULDBLOCK #define ENOBUFS WSAENOBUFS #define ECONNRESET WSAECONNRESET #define ESHUTDOWN WSAESHUTDOWN #define EAFNOSUPPORT WSAEAFNOSUPPORT #define EPROTONOSUPPORT WSAEPROTONOSUPPORT #define EINPROGRESS WSAEINPROGRESS #define EISCONN WSAEISCONN /* winsock doesn't feature poll(), so there is a version implemented * in terms of select() in mingw.c. The following definitions * are copied from linux man pages. A poll() macro is defined to * call the version in mingw.c. */ #define POLLIN 0x0001 /* There is data to read */ #define POLLPRI 0x0002 /* There is urgent data to read */ #define POLLOUT 0x0004 /* Writing now will not block */ #define POLLERR 0x0008 /* Error condition */ #define POLLHUP 0x0010 /* Hung up */ #define POLLNVAL 0x0020 /* Invalid request: fd not open */ struct pollfd { SOCKET fd; /* file descriptor */ short events; /* requested events */ short revents; /* returned events */ }; #define poll(x, y, z) mingw_poll(x, y, z) /* These wrappers do nothing special except set the global errno variable if * an error occurs (winsock doesn't do this by default). They set errno * to unix-like values (i.e. WSAEWOULDBLOCK is mapped to EAGAIN), so code * outside of this file "shouldn't" have to worry about winsock specific error * handling. */ #define socket(x, y, z) mingw_socket(x, y, z) #define connect(x, y, z) mingw_connect(x, y, z) #define accept(x, y, z) mingw_accept(x, y, z) #define shutdown(x, y) mingw_shutdown(x, y) #define getpeername(x, y, z) mingw_getpeername(x, y, z) /* Wrapper macros to call misc. functions mingw is missing */ #define sleep(x) mingw_sleep(x) #define inet_aton(x, y) mingw_inet_aton(x, y) #define gettimeofday(x, y) mingw_gettimeofday(x, y) #define stat(x, y) mingw_stat(x, y) #define mkdir(x, y) mkdir(x) /* Winsock uses int instead of the usual socklen_t */ typedef int socklen_t; /* Function prototypes for functions in mingw.c */ unsigned int mingw_sleep(unsigned int); int mingw_inet_aton(const char *, struct in_addr *); int mingw_gettimeofday(struct timeval *, char *); int mingw_poll(struct pollfd *, unsigned int, int); SOCKET mingw_socket(int, int, int); int mingw_connect(SOCKET, struct sockaddr*, socklen_t); SOCKET mingw_accept(SOCKET, struct sockaddr*, socklen_t *); int mingw_shutdown(SOCKET, int); int mingw_getpeername(SOCKET, struct sockaddr*, socklen_t *); /* Three socket specific macros */ #define READ(x, y, z) mingw_read_socket(x, y, z) #define WRITE(x, y, z) mingw_write_socket(x, y, z) #define CLOSE(x) mingw_close_socket(x) int mingw_read_socket(SOCKET, void *, int); int mingw_write_socket(SOCKET, void *, int); int mingw_close_socket(SOCKET); int mingw_setnonblocking(SOCKET, int); int mingw_stat(const char*, struct stat*); #endif #ifndef HAVE_READV_WRITEV /* * The HAVE_READV_WRITEV symbol should be defined if the system features * the vector IO functions readv() and writev() and those functions may * be legally used with sockets. */ struct iovec { void *iov_base; /* Starting address */ size_t iov_len; /* Number of bytes */ }; #define WRITEV(x, y, z) polipo_writev(x, y, z) #define READV(x, y, z) polipo_readv(x, y, z) int polipo_readv(int fd, const struct iovec *vector, int count); int polipo_writev(int fd, const struct iovec *vector, int count); #endif polipo-1.0.4.1/mingw.c0000644000175000017500000003122211331407220013745 0ustar chrisdchrisd/* Copyright (c) 2006 by Dan Kennedy. Copyright (c) 2006 by Juliusz Chroboczek. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifndef MINGW static int dummy ATTRIBUTE((unused)); #else #undef poll #undef socket #undef connect #undef accept #undef shutdown #undef getpeername #undef sleep #undef inet_aton #undef gettimeofday #undef stat /* Windows needs this header file for the implementation of inet_aton() */ #include /* * Check whether "cp" is a valid ascii representation of an Internet address * and convert to a binary address. Returns 1 if the address is valid, 0 if * not. This replaces inet_addr, the return value from which cannot * distinguish between failure and a local broadcast address. * * This implementation of the standard inet_aton() function was copied * (with trivial modifications) from the OpenBSD project. */ int mingw_inet_aton(const char *cp, struct in_addr *addr) { register unsigned int val; register int base, n; register char c; unsigned int parts[4]; register unsigned int *pp = parts; assert(sizeof(val) == 4); c = *cp; while(1) { /* * Collect number up to ``.''. * Values are specified as for C: * 0x=hex, 0=octal, isdigit=decimal. */ if(!isdigit(c)) return (0); val = 0; base = 10; if(c == '0') { c = *++cp; if(c == 'x' || c == 'X') base = 16, c = *++cp; else base = 8; } while(1) { if(isascii(c) && isdigit(c)) { val = (val * base) + (c - '0'); c = *++cp; } else if(base == 16 && isascii(c) && isxdigit(c)) { val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A')); c = *++cp; } else break; } if(c == '.') { /* * Internet format: * a.b.c.d * a.b.c (with c treated as 16 bits) * a.b (with b treated as 24 bits) */ if(pp >= parts + 3) return (0); *pp++ = val; c = *++cp; } else break; } /* * Check for trailing characters. */ if(c != '\0' && (!isascii(c) || !isspace(c))) return (0); /* * Concoct the address according to * the number of parts specified. */ n = pp - parts + 1; switch(n) { case 0: return (0); /* initial nondigit */ case 1: /* a -- 32 bits */ break; case 2: /* a.b -- 8.24 bits */ if((val > 0xffffff) || (parts[0] > 0xff)) return (0); val |= parts[0] << 24; break; case 3: /* a.b.c -- 8.8.16 bits */ if((val > 0xffff) || (parts[0] > 0xff) || (parts[1] > 0xff)) return (0); val |= (parts[0] << 24) | (parts[1] << 16); break; case 4: /* a.b.c.d -- 8.8.8.8 bits */ if((val > 0xff) || (parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff)) return (0); val |= (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8); break; } if(addr) addr->s_addr = htonl(val); return (1); } unsigned int mingw_sleep(unsigned int seconds) { Sleep(seconds * 1000); return 0; } int mingw_gettimeofday(struct timeval *tv, char *tz) { const long long EPOCHFILETIME = (116444736000000000LL); FILETIME ft; LARGE_INTEGER li; long long t; /* This implementation doesn't support the timezone parameter. That's Ok, * as at present polipo always passed NULL as the second arg. We * also need to make sure that we have at least 8 bytes of space to * do the math in - otherwise there will be overflow errors. */ assert(tz == NULL); assert(sizeof(t) == 8); if(tv) { GetSystemTimeAsFileTime(&ft); li.LowPart = ft.dwLowDateTime; li.HighPart = ft.dwHighDateTime; t = li.QuadPart; /* In 100-nanosecond intervals */ t -= EPOCHFILETIME; /* Offset to the Epoch time */ t /= 10; /* In microseconds */ tv->tv_sec = (long)(t / 1000000); tv->tv_usec = (long)(t % 1000000); } return 0; } int mingw_poll(struct pollfd *fds, unsigned int nfds, int timo) { struct timeval timeout, *toptr; fd_set ifds, ofds, efds, *ip, *op; int i, rc; /* Set up the file-descriptor sets in ifds, ofds and efds. */ FD_ZERO(&ifds); FD_ZERO(&ofds); FD_ZERO(&efds); for (i = 0, op = ip = 0; i < nfds; ++i) { fds[i].revents = 0; if(fds[i].events & (POLLIN|POLLPRI)) { ip = &ifds; FD_SET(fds[i].fd, ip); } if(fds[i].events & POLLOUT) { op = &ofds; FD_SET(fds[i].fd, op); } FD_SET(fds[i].fd, &efds); } /* Set up the timeval structure for the timeout parameter */ if(timo < 0) { toptr = 0; } else { toptr = &timeout; timeout.tv_sec = timo / 1000; timeout.tv_usec = (timo - timeout.tv_sec * 1000) * 1000; } #ifdef DEBUG_POLL printf("Entering select() sec=%ld usec=%ld ip=%lx op=%lx\n", (long)timeout.tv_sec, (long)timeout.tv_usec, (long)ip, (long)op); #endif rc = select(0, ip, op, &efds, toptr); #ifdef DEBUG_POLL printf("Exiting select rc=%d\n", rc); #endif if(rc <= 0) return rc; if(rc > 0) { for (i = 0; i < nfds; ++i) { int fd = fds[i].fd; if(fds[i].events & (POLLIN|POLLPRI) && FD_ISSET(fd, &ifds)) fds[i].revents |= POLLIN; if(fds[i].events & POLLOUT && FD_ISSET(fd, &ofds)) fds[i].revents |= POLLOUT; if(FD_ISSET(fd, &efds)) /* Some error was detected ... should be some way to know. */ fds[i].revents |= POLLHUP; #ifdef DEBUG_POLL printf("%d %d %d revent = %x\n", FD_ISSET(fd, &ifds), FD_ISSET(fd, &ofds), FD_ISSET(fd, &efds), fds[i].revents ); #endif } } return rc; } int mingw_close_socket(SOCKET fd) { int rc; rc = closesocket(fd); return 0; } static void set_errno(int winsock_err) { switch(winsock_err) { case WSAEWOULDBLOCK: errno = EAGAIN; break; default: errno = winsock_err; break; } } int mingw_write_socket(SOCKET fd, void *buf, int n) { int rc = send(fd, buf, n, 0); if(rc == SOCKET_ERROR) { set_errno(WSAGetLastError()); } return rc; } int mingw_read_socket(SOCKET fd, void *buf, int n) { int rc = recv(fd, buf, n, 0); if(rc == SOCKET_ERROR) { set_errno(WSAGetLastError()); } return rc; } /* * Set the "non-blocking" flag on socket fd to the value specified by * the second argument (i.e. if the nonblocking argument is non-zero, the * socket is set to non-blocking mode). Zero is returned if the operation * is successful, other -1. */ int mingw_setnonblocking(SOCKET fd, int nonblocking) { int rc; unsigned long mode = 1; rc = ioctlsocket(fd, FIONBIO, &mode); if(rc != 0) { set_errno(WSAGetLastError()); } return (rc == 0 ? 0 : -1); } /* * A wrapper around the socket() function. The purpose of this wrapper * is to ensure that the global errno symbol is set if an error occurs, * even if we are using winsock. */ SOCKET mingw_socket(int domain, int type, int protocol) { SOCKET fd = socket(domain, type, protocol); if(fd == INVALID_SOCKET) { set_errno(WSAGetLastError()); } return fd; } static void set_connect_errno(int winsock_err) { switch(winsock_err) { case WSAEINVAL: case WSAEALREADY: case WSAEWOULDBLOCK: errno = EINPROGRESS; break; default: errno = winsock_err; break; } } /* * A wrapper around the connect() function. The purpose of this wrapper * is to ensure that the global errno symbol is set if an error occurs, * even if we are using winsock. */ int mingw_connect(SOCKET fd, struct sockaddr *addr, socklen_t addr_len) { int rc = connect(fd, addr, addr_len); assert(rc == 0 || rc == SOCKET_ERROR); if(rc == SOCKET_ERROR) { set_connect_errno(WSAGetLastError()); } return rc; } /* * A wrapper around the accept() function. The purpose of this wrapper * is to ensure that the global errno symbol is set if an error occurs, * even if we are using winsock. */ SOCKET mingw_accept(SOCKET fd, struct sockaddr *addr, socklen_t *addr_len) { SOCKET newfd = accept(fd, addr, addr_len); if(newfd == INVALID_SOCKET) { set_errno(WSAGetLastError()); newfd = -1; } return newfd; } /* * A wrapper around the shutdown() function. The purpose of this wrapper * is to ensure that the global errno symbol is set if an error occurs, * even if we are using winsock. */ int mingw_shutdown(SOCKET fd, int mode) { int rc = shutdown(fd, mode); assert(rc == 0 || rc == SOCKET_ERROR); if(rc == SOCKET_ERROR) { set_errno(WSAGetLastError()); } return rc; } /* * A wrapper around the getpeername() function. The purpose of this wrapper * is to ensure that the global errno symbol is set if an error occurs, * even if we are using winsock. */ int mingw_getpeername(SOCKET fd, struct sockaddr *name, socklen_t *namelen) { int rc = getpeername(fd, name, namelen); assert(rc == 0 || rc == SOCKET_ERROR); if(rc == SOCKET_ERROR) { set_errno(WSAGetLastError()); } return rc; } /* Stat doesn't work on directories if the name ends in a slash. */ int mingw_stat(const char *filename, struct stat *ss) { int len, rc, saved_errno; char *noslash; len = strlen(filename); if(len <= 1 || filename[len - 1] != '/') return stat(filename, ss); noslash = malloc(len); if(noslash == NULL) return -1; memcpy(noslash, filename, len - 1); noslash[len - 1] = '\0'; rc = stat(noslash, ss); saved_errno = errno; free(noslash); errno = saved_errno; return rc; } #endif /* #ifdef MINGW */ #ifndef HAVE_READV_WRITEV int polipo_writev(int fd, const struct iovec *vector, int count) { int rc; /* Return Code */ if(count == 1) { rc = WRITE(fd, vector->iov_base, vector->iov_len); } else { int n = 0; /* Total bytes to write */ char *buf = 0; /* Buffer to copy to before writing */ int i; /* Counter var for looping over vector[] */ int offset = 0; /* Offset for copying to buf */ /* Figure out the required buffer size */ for(i = 0; i < count; i++) { n += vector[i].iov_len; } /* Allocate the buffer. If the allocation fails, bail out */ buf = malloc(n); if(!buf) { errno = ENOMEM; return -1; } /* Copy the contents of the vector array to the buffer */ for(i = 0; i < count; i++) { memcpy(&buf[offset], vector[i].iov_base, vector[i].iov_len); offset += vector[i].iov_len; } assert(offset == n); /* Write the entire buffer to the socket and free the allocation */ rc = WRITE(fd, buf, n); free(buf); } return rc; } int polipo_readv(int fd, const struct iovec *vector, int count) { int ret = 0; /* Return value */ int i; for(i = 0; i < count; i++) { int n = vector[i].iov_len; int rc = READ(fd, vector[i].iov_base, n); if(rc == n) { ret += rc; } else { if(rc < 0) { ret = (ret == 0 ? rc : ret); } else { ret += rc; } break; } } return ret; } #endif polipo-1.0.4.1/md5import.h0000644000175000017500000000031611331407220014551 0ustar chrisdchrisd#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L #define HAS_STDINT_H #else typedef unsigned int my_uint32_t; #undef uint32_t #define uint32_t my_uint32_t #endif #include "md5.h" #undef uint32_t polipo-1.0.4.1/md5import.c0000644000175000017500000000042311331407220014543 0ustar chrisdchrisd#ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include #if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L #define HAS_STDINT_H #else typedef unsigned int my_uint32_t; #undef uint32_t #define uint32_t my_uint32_t #endif #include "md5.c" #undef uint32_t polipo-1.0.4.1/md5.h0000644000175000017500000000557611331407220013333 0ustar chrisdchrisd/* *********************************************************************** ** md5.h -- header file for implementation of MD5 ** ** RSA Data Security, Inc. MD5 Message-Digest Algorithm ** ** Created: 2/17/90 RLR ** ** Revised: 12/27/90 SRD,AJ,BSK,JT Reference C version ** ** Revised (for MD5): RLR 4/27/91 ** *********************************************************************** */ /* *********************************************************************** ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** ** ** ** License to copy and use this software is granted provided that ** ** it is identified as the "RSA Data Security, Inc. MD5 Message- ** ** Digest Algorithm" in all material mentioning or referencing this ** ** software or this function. ** ** ** ** License is also granted to make and use derivative works ** ** provided that such works are identified as "derived from the RSA ** ** Data Security, Inc. MD5 Message-Digest Algorithm" in all ** ** material mentioning or referencing the derived work. ** ** ** ** RSA Data Security, Inc. makes no representations concerning ** ** either the merchantability of this software or the suitability ** ** of this software for any particular purpose. It is provided "as ** ** is" without express or implied warranty of any kind. ** ** ** ** These notices must be retained in any copies of any part of this ** ** documentation and/or software. ** *********************************************************************** */ #ifdef HAS_STDINT_H #include #elif defined(HAS_INTTYPES_H) #include #endif /* typedef a 32-bit type */ typedef uint32_t UINT4; /* Data structure for MD5 (Message-Digest) computation */ typedef struct { UINT4 i[2]; /* number of _bits_ handled mod 2^64 */ UINT4 buf[4]; /* scratch buffer */ unsigned char in[64]; /* input buffer */ unsigned char digest[16]; /* actual digest after MD5Final call */ } MD5_CTX; void MD5Init (MD5_CTX *mdContext); void MD5Update (MD5_CTX *, unsigned const char *, unsigned int); void MD5Final (MD5_CTX *); /* *********************************************************************** ** End of md5.h ** ******************************** (cut) ******************************** */ polipo-1.0.4.1/md5.c0000644000175000017500000002727711331407220013330 0ustar chrisdchrisd/* *********************************************************************** ** md5.c -- the source code for MD5 routines ** ** RSA Data Security, Inc. MD5 Message-Digest Algorithm ** ** Created: 2/17/90 RLR ** ** Revised: 1/91 SRD,AJ,BSK,JT Reference C Version ** ** Revised (for MD5): RLR 4/27/91 ** ** -- G modified to have y&~z instead of y&z ** ** -- FF, GG, HH modified to add in last register done ** ** -- Access pattern: round 2 works mod 5, round 3 works mod 3 ** ** -- distinct additive constant for each step ** ** -- round 4 added, working mod 7 ** *********************************************************************** */ /* *********************************************************************** ** Copyright (C) 1990, RSA Data Security, Inc. All rights reserved. ** ** ** ** License to copy and use this software is granted provided that ** ** it is identified as the "RSA Data Security, Inc. MD5 Message- ** ** Digest Algorithm" in all material mentioning or referencing this ** ** software or this function. ** ** ** ** License is also granted to make and use derivative works ** ** provided that such works are identified as "derived from the RSA ** ** Data Security, Inc. MD5 Message-Digest Algorithm" in all ** ** material mentioning or referencing the derived work. ** ** ** ** RSA Data Security, Inc. makes no representations concerning ** ** either the merchantability of this software or the suitability ** ** of this software for any particular purpose. It is provided "as ** ** is" without express or implied warranty of any kind. ** ** ** ** These notices must be retained in any copies of any part of this ** ** documentation and/or software. ** *********************************************************************** */ #include "md5.h" /* *********************************************************************** ** Message-digest routines: ** ** To form the message digest for a message M ** ** (1) Initialize a context buffer mdContext using MD5Init ** ** (2) Call MD5Update on mdContext and M ** ** (3) Call MD5Final on mdContext ** ** The message digest is now in mdContext->digest[0...15] ** *********************************************************************** */ /* forward declaration */ static void Transform (UINT4 *, UINT4 *); #ifdef __STDC__ static const #else static #endif unsigned char PADDING[64] = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* F, G, H and I are basic MD5 functions */ #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (z)) | ((y) & (~z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) #define I(x, y, z) ((y) ^ ((x) | (~z))) /* ROTATE_LEFT rotates x left n bits */ #if defined(FAST_MD5) && defined(__GNUC__) && defined(mc68000) /* * If we're on a 68000 based CPU and using a GNU C compiler with * inline assembly code, we can speed this up a bit. */ inline UINT4 ROTATE_LEFT(UINT4 x, int n) { asm("roll %2,%0" : "=d" (x) : "0" (x), "Ir" (n)); return x; } #else #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) #endif /* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */ /* Rotation is separate from addition to prevent recomputation */ #define FF(a, b, c, d, x, s, ac) \ {(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define GG(a, b, c, d, x, s, ac) \ {(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define HH(a, b, c, d, x, s, ac) \ {(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } #define II(a, b, c, d, x, s, ac) \ {(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \ (a) = ROTATE_LEFT ((a), (s)); \ (a) += (b); \ } /* The routine MD5Init initializes the message-digest context mdContext. All fields are set to zero. */ void MD5Init (mdContext) MD5_CTX *mdContext; { mdContext->i[0] = mdContext->i[1] = (UINT4)0; /* Load magic initialization constants. */ mdContext->buf[0] = (UINT4)0x67452301; mdContext->buf[1] = (UINT4)0xefcdab89; mdContext->buf[2] = (UINT4)0x98badcfe; mdContext->buf[3] = (UINT4)0x10325476; } /* The routine MD5Update updates the message-digest context to account for the presence of each of the characters inBuf[0..inLen-1] in the message whose digest is being computed. */ void MD5Update (mdContext, inBuf, inLen) MD5_CTX *mdContext; unsigned const char *inBuf; unsigned int inLen; { UINT4 in[16]; int mdi; unsigned int i, ii; /* compute number of bytes mod 64 */ mdi = (int)((mdContext->i[0] >> 3) & 0x3F); /* update number of bits */ if ((mdContext->i[0] + ((UINT4)inLen << 3)) < mdContext->i[0]) mdContext->i[1]++; mdContext->i[0] += ((UINT4)inLen << 3); mdContext->i[1] += ((UINT4)inLen >> 29); while (inLen--) { /* add new character to buffer, increment mdi */ mdContext->in[mdi++] = *inBuf++; /* transform if necessary */ if (mdi == 0x40) { for (i = 0, ii = 0; i < 16; i++, ii += 4) in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | (((UINT4)mdContext->in[ii+2]) << 16) | (((UINT4)mdContext->in[ii+1]) << 8) | ((UINT4)mdContext->in[ii]); Transform (mdContext->buf, in); mdi = 0; } } } /* The routine MD5Final terminates the message-digest computation and ends with the desired message digest in mdContext->digest[0...15]. */ void MD5Final (mdContext) MD5_CTX *mdContext; { UINT4 in[16]; int mdi; unsigned int i, ii; unsigned int padLen; /* save number of bits */ in[14] = mdContext->i[0]; in[15] = mdContext->i[1]; /* compute number of bytes mod 64 */ mdi = (int)((mdContext->i[0] >> 3) & 0x3F); /* pad out to 56 mod 64 */ padLen = (mdi < 56) ? (56 - mdi) : (120 - mdi); MD5Update (mdContext, PADDING, padLen); /* append length in bits and transform */ for (i = 0, ii = 0; i < 14; i++, ii += 4) in[i] = (((UINT4)mdContext->in[ii+3]) << 24) | (((UINT4)mdContext->in[ii+2]) << 16) | (((UINT4)mdContext->in[ii+1]) << 8) | ((UINT4)mdContext->in[ii]); Transform (mdContext->buf, in); /* store buffer in digest */ for (i = 0, ii = 0; i < 4; i++, ii += 4) { mdContext->digest[ii] = (unsigned char)(mdContext->buf[i] & 0xFF); mdContext->digest[ii+1] = (unsigned char)((mdContext->buf[i] >> 8) & 0xFF); mdContext->digest[ii+2] = (unsigned char)((mdContext->buf[i] >> 16) & 0xFF); mdContext->digest[ii+3] = (unsigned char)((mdContext->buf[i] >> 24) & 0xFF); } } /* Basic MD5 step. Transforms buf based on in. */ static void Transform (buf, in) UINT4 *buf; UINT4 *in; { UINT4 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; /* Round 1 */ #define S11 7 #define S12 12 #define S13 17 #define S14 22 FF ( a, b, c, d, in[ 0], S11, 0xd76aa478); /* 1 */ FF ( d, a, b, c, in[ 1], S12, 0xe8c7b756); /* 2 */ FF ( c, d, a, b, in[ 2], S13, 0x242070db); /* 3 */ FF ( b, c, d, a, in[ 3], S14, 0xc1bdceee); /* 4 */ FF ( a, b, c, d, in[ 4], S11, 0xf57c0faf); /* 5 */ FF ( d, a, b, c, in[ 5], S12, 0x4787c62a); /* 6 */ FF ( c, d, a, b, in[ 6], S13, 0xa8304613); /* 7 */ FF ( b, c, d, a, in[ 7], S14, 0xfd469501); /* 8 */ FF ( a, b, c, d, in[ 8], S11, 0x698098d8); /* 9 */ FF ( d, a, b, c, in[ 9], S12, 0x8b44f7af); /* 10 */ FF ( c, d, a, b, in[10], S13, 0xffff5bb1); /* 11 */ FF ( b, c, d, a, in[11], S14, 0x895cd7be); /* 12 */ FF ( a, b, c, d, in[12], S11, 0x6b901122); /* 13 */ FF ( d, a, b, c, in[13], S12, 0xfd987193); /* 14 */ FF ( c, d, a, b, in[14], S13, 0xa679438e); /* 15 */ FF ( b, c, d, a, in[15], S14, 0x49b40821); /* 16 */ /* Round 2 */ #define S21 5 #define S22 9 #define S23 14 #define S24 20 GG ( a, b, c, d, in[ 1], S21, 0xf61e2562); /* 17 */ GG ( d, a, b, c, in[ 6], S22, 0xc040b340); /* 18 */ GG ( c, d, a, b, in[11], S23, 0x265e5a51); /* 19 */ GG ( b, c, d, a, in[ 0], S24, 0xe9b6c7aa); /* 20 */ GG ( a, b, c, d, in[ 5], S21, 0xd62f105d); /* 21 */ GG ( d, a, b, c, in[10], S22, 0x2441453); /* 22 */ GG ( c, d, a, b, in[15], S23, 0xd8a1e681); /* 23 */ GG ( b, c, d, a, in[ 4], S24, 0xe7d3fbc8); /* 24 */ GG ( a, b, c, d, in[ 9], S21, 0x21e1cde6); /* 25 */ GG ( d, a, b, c, in[14], S22, 0xc33707d6); /* 26 */ GG ( c, d, a, b, in[ 3], S23, 0xf4d50d87); /* 27 */ GG ( b, c, d, a, in[ 8], S24, 0x455a14ed); /* 28 */ GG ( a, b, c, d, in[13], S21, 0xa9e3e905); /* 29 */ GG ( d, a, b, c, in[ 2], S22, 0xfcefa3f8); /* 30 */ GG ( c, d, a, b, in[ 7], S23, 0x676f02d9); /* 31 */ GG ( b, c, d, a, in[12], S24, 0x8d2a4c8a); /* 32 */ /* Round 3 */ #define S31 4 #define S32 11 #define S33 16 #define S34 23 HH ( a, b, c, d, in[ 5], S31, 0xfffa3942); /* 33 */ HH ( d, a, b, c, in[ 8], S32, 0x8771f681); /* 34 */ HH ( c, d, a, b, in[11], S33, 0x6d9d6122); /* 35 */ HH ( b, c, d, a, in[14], S34, 0xfde5380c); /* 36 */ HH ( a, b, c, d, in[ 1], S31, 0xa4beea44); /* 37 */ HH ( d, a, b, c, in[ 4], S32, 0x4bdecfa9); /* 38 */ HH ( c, d, a, b, in[ 7], S33, 0xf6bb4b60); /* 39 */ HH ( b, c, d, a, in[10], S34, 0xbebfbc70); /* 40 */ HH ( a, b, c, d, in[13], S31, 0x289b7ec6); /* 41 */ HH ( d, a, b, c, in[ 0], S32, 0xeaa127fa); /* 42 */ HH ( c, d, a, b, in[ 3], S33, 0xd4ef3085); /* 43 */ HH ( b, c, d, a, in[ 6], S34, 0x4881d05); /* 44 */ HH ( a, b, c, d, in[ 9], S31, 0xd9d4d039); /* 45 */ HH ( d, a, b, c, in[12], S32, 0xe6db99e5); /* 46 */ HH ( c, d, a, b, in[15], S33, 0x1fa27cf8); /* 47 */ HH ( b, c, d, a, in[ 2], S34, 0xc4ac5665); /* 48 */ /* Round 4 */ #define S41 6 #define S42 10 #define S43 15 #define S44 21 II ( a, b, c, d, in[ 0], S41, 0xf4292244); /* 49 */ II ( d, a, b, c, in[ 7], S42, 0x432aff97); /* 50 */ II ( c, d, a, b, in[14], S43, 0xab9423a7); /* 51 */ II ( b, c, d, a, in[ 5], S44, 0xfc93a039); /* 52 */ II ( a, b, c, d, in[12], S41, 0x655b59c3); /* 53 */ II ( d, a, b, c, in[ 3], S42, 0x8f0ccc92); /* 54 */ II ( c, d, a, b, in[10], S43, 0xffeff47d); /* 55 */ II ( b, c, d, a, in[ 1], S44, 0x85845dd1); /* 56 */ II ( a, b, c, d, in[ 8], S41, 0x6fa87e4f); /* 57 */ II ( d, a, b, c, in[15], S42, 0xfe2ce6e0); /* 58 */ II ( c, d, a, b, in[ 6], S43, 0xa3014314); /* 59 */ II ( b, c, d, a, in[13], S44, 0x4e0811a1); /* 60 */ II ( a, b, c, d, in[ 4], S41, 0xf7537e82); /* 61 */ II ( d, a, b, c, in[11], S42, 0xbd3af235); /* 62 */ II ( c, d, a, b, in[ 2], S43, 0x2ad7d2bb); /* 63 */ II ( b, c, d, a, in[ 9], S44, 0xeb86d391); /* 64 */ buf[0] += a; buf[1] += b; buf[2] += c; buf[3] += d; } /* *********************************************************************** ** End of md5.c ** ******************************** (cut) ******************************** */ polipo-1.0.4.1/main.c0000644000175000017500000001066711331407220013562 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" AtomPtr configFile = NULL; AtomPtr pidFile = NULL; int daemonise = 0; static void usage(char *argv0) { fprintf(stderr, "%s [ -h ] [ -v ] [ -x ] [ -c filename ] [ -- ] [ var=val... ]\n", argv0); fprintf(stderr, " -h: display this message.\n"); fprintf(stderr, " -v: display the list of configuration variables.\n"); fprintf(stderr, " -x: perform expiry on the disk cache.\n"); fprintf(stderr, " -c: specify the configuration file to use.\n"); } int main(int argc, char **argv) { FdEventHandlerPtr listener; int i; int rc; int expire = 0, printConfig = 0; initAtoms(); CONFIG_VARIABLE(daemonise, CONFIG_BOOLEAN, "Run as a daemon"); CONFIG_VARIABLE(pidFile, CONFIG_ATOM, "File with pid of running daemon."); preinitChunks(); preinitLog(); preinitObject(); preinitIo(); preinitDns(); preinitServer(); preinitHttp(); preinitDiskcache(); preinitLocal(); preinitForbidden(); preinitSocks(); i = 1; while(i < argc) { if(argv[i][0] != '-') break; if(strcmp(argv[i], "--") == 0) { i++; break; } else if(strcmp(argv[i], "-h") == 0) { usage(argv[0]); exit(0); } else if(strcmp(argv[i], "-v") == 0) { printConfig = 1; i++; } else if(strcmp(argv[i], "-x") == 0) { expire = 1; i++; } else if(strcmp(argv[i], "-c") == 0) { i++; if(i >= argc) { usage(argv[0]); exit(1); } if(configFile) releaseAtom(configFile); configFile = internAtom(argv[i]); i++; } else { usage(argv[0]); exit(1); } } if(configFile) configFile = expandTilde(configFile); if(configFile == NULL) { configFile = expandTilde(internAtom("~/.polipo")); if(configFile) if(access(configFile->string, F_OK) < 0) { releaseAtom(configFile); configFile = NULL; } } if(configFile == NULL) { if(access("/etc/polipo/config", F_OK) >= 0) configFile = internAtom("/etc/polipo/config"); if(configFile && access(configFile->string, F_OK) < 0) { releaseAtom(configFile); configFile = NULL; } } rc = parseConfigFile(configFile); if(rc < 0) exit(1); while(i < argc) { rc = parseConfigLine(argv[i], "command line", 0, 0); if(rc < 0) exit(1); i++; } initChunks(); initLog(); initObject(); if(!expire && !printConfig) initEvents(); initIo(); initDns(); initHttp(); initServer(); initDiskcache(); initForbidden(); initSocks(); if(printConfig) { printConfigVariables(stdout, 0); exit(0); } if(expire) { expireDiskObjects(); exit(0); } if(daemonise) do_daemonise(loggingToStderr()); if(pidFile) writePid(pidFile->string); listener = create_listener(proxyAddress->string, proxyPort, httpAccept, NULL); if(!listener) { if(pidFile) unlink(pidFile->string); exit(1); } eventLoop(); if(pidFile) unlink(pidFile->string); return 0; } polipo-1.0.4.1/log.h0000644000175000017500000001054111331407220013413 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define L_ERROR 0x1 #define L_WARN 0x2 #define L_INFO 0x4 #define L_FORBIDDEN 0x8 #define L_UNCACHEABLE 0x10 #define L_SUPERSEDED 0x20 #define L_VARY 0x40 #define D_SERVER_CONN 0x100 #define D_SERVER_REQ 0x200 #define D_CLIENT_CONN 0x400 #define D_CLIENT_REQ 0x800 #define D_ATOM_REFCOUNT 0x1000 #define D_REFCOUNT 0x2000 #define D_LOCK 0x4000 #define D_OBJECT 0x8000 #define D_OBJECT_DATA 0x10000 #define D_SERVER_OFFSET 0x20000 #define D_CLIENT_DATA 0x40000 #define D_DNS 0x80000 #define D_CHILD 0x100000 #define D_IO 0x200000 #define LOGGING_DEFAULT (L_ERROR | L_WARN | L_INFO) #define LOGGING_MAX 0xFF void preinitLog(void); void initLog(void); void reopenLog(void); void flushLog(void); int loggingToStderr(void); void really_do_log(int type, const char *f, ...) ATTRIBUTE ((format (printf, 2, 3))); void really_do_log_v(int type, const char *f, va_list args) ATTRIBUTE ((format (printf, 2, 0))); void really_do_log_n(int type, const char *s, int n); void really_do_log_error(int type, int e, const char *f, ...) ATTRIBUTE ((format (printf, 3, 4))); void really_do_log_error_v(int type, int e, const char *f, va_list args) ATTRIBUTE ((format (printf, 3, 0))); #ifdef __GNUC__ #define DO_BACKTRACE() \ do { \ int n; \ void *buffer[10]; \ n = backtrace(buffer, 5); \ fflush(stderr); \ backtrace_symbols_fd(buffer, n, 2); \ } while(0) #else #define DO_BACKTRACE() /* */ #endif /* These are macros because it's important that they should be optimised away. */ #if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L #define do_log(_type, ...) \ do { \ if((_type) & (LOGGING_MAX)) really_do_log((_type), __VA_ARGS__); \ } while(0) #define do_log_error(_type, _e, ...) \ do { \ if((_type) & (LOGGING_MAX)) \ really_do_log_error((_type), (_e), __VA_ARGS__); \ } while(0) #elif defined(__GNUC__) #define do_log(_type, _args...) \ do { \ if((_type) & (LOGGING_MAX)) really_do_log((_type), _args); \ } while(0) #define do_log_error(_type, _e, _args...) \ do { \ if((_type) & (LOGGING_MAX)) \ really_do_log_error((_type), (_e), _args); \ } while(0) #else /* No variadic macros -- let's hope inline works. */ static inline void do_log(int type, const char *f, ...) { va_list args; va_start(args, f); if((type & (LOGGING_MAX)) != 0) really_do_log_v(type, f, args); va_end(args); } static inline void do_log_error(int type, int e, const char *f, ...) { va_list args; va_start(args, f); if((type & (LOGGING_MAX)) != 0) really_do_log_error_v(type, e, f, args); va_end(args); } #endif #define do_log_n(_type, _s, _n) \ do { \ if((_type) & (LOGGING_MAX)) really_do_log_n((_type), (_s), (_n)); \ } while(0) polipo-1.0.4.1/log.c0000644000175000017500000002421411331407220013410 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifdef HAVE_SYSLOG #include #endif static int logLevel = LOGGING_DEFAULT; static int logSyslog = 0; static AtomPtr logFile = NULL; static FILE *logF; #ifdef HAVE_SYSLOG static AtomPtr logFacility = NULL; static int facility; #endif #define STR(x) XSTR(x) #define XSTR(x) #x static void initSyslog(void); #ifdef HAVE_SYSLOG static char *syslogBuf; static int syslogBufSize; static int syslogBufLength; static int translateFacility(AtomPtr facility); static int translatePriority(int type); static void accumulateSyslogV(int type, const char *f, va_list args); static void accumulateSyslogN(int type, const char *s, int len); #endif void preinitLog() { CONFIG_VARIABLE_SETTABLE(logLevel, CONFIG_HEX, configIntSetter, "Logging level (max = " STR(LOGGING_MAX) ")."); CONFIG_VARIABLE(logFile, CONFIG_ATOM, "Log file (stderr if empty and logSyslog is unset)."); #ifdef HAVE_SYSLOG CONFIG_VARIABLE(logSyslog, CONFIG_BOOLEAN, "Log to syslog."); CONFIG_VARIABLE(logFacility, CONFIG_ATOM, "Syslog facility to use."); logFacility = internAtom("user"); #endif logF = stderr; } int loggingToStderr(void) { return(logF == stderr); } void initLog(void) { if(daemonise && logFile == NULL && !logSyslog) logFile = internAtom("/var/log/polipo"); if(logFile != NULL && logFile->length > 0) { FILE *f; f = fopen(logFile->string, "a"); if(f == NULL) { do_log_error(L_ERROR, errno, "Couldn't open log file %s", logFile->string); exit(1); } setvbuf(f, NULL, _IOLBF, 0); logF = f; } if(logSyslog) { initSyslog(); if(logFile == NULL) { logF = NULL; } } } #ifdef HAVE_SYSLOG static void initSyslog() { if(logSyslog) { facility = translateFacility(logFacility); closelog(); openlog("polipo", LOG_PID, facility); if(!syslogBuf) { syslogBuf = strdup(""); syslogBufSize = 1; } } } /* Map a user-provided name to a syslog facility. This is rendered quite ugly because POSIX hardly defines any, but we should allow any the local system knows about. */ static int translateFacility(AtomPtr facility) { typedef struct { const char *name; int facility; } FacilitiesRec; /* List of all known valid syslog facilities. This list is terminated by a NULL facility name. */ FacilitiesRec facilities[] = { /* These are all the facilities found in glibc 2.5. */ #ifdef LOG_AUTH { "auth", LOG_AUTH }, #endif #ifdef LOG_AUTHPRIV { "authpriv", LOG_AUTHPRIV }, #endif #ifdef LOG_CRON { "cron", LOG_CRON }, #endif #ifdef LOG_DAEMON { "daemon", LOG_DAEMON }, #endif #ifdef LOG_FTP { "ftp", LOG_FTP }, #endif #ifdef LOG_KERN { "kern", LOG_KERN }, #endif #ifdef LOG_LPR { "lpr", LOG_LPR }, #endif #ifdef LOG_MAIL { "mail", LOG_MAIL }, #endif #ifdef LOG_NEWS { "news", LOG_NEWS }, #endif #ifdef LOG_SYSLOG { "syslog", LOG_SYSLOG }, #endif #ifdef LOG_uucp { "uucp", LOG_UUCP }, #endif /* These are required by POSIX. */ { "user", LOG_USER }, { "local0", LOG_LOCAL0 }, { "local1", LOG_LOCAL1 }, { "local2", LOG_LOCAL2 }, { "local3", LOG_LOCAL3 }, { "local4", LOG_LOCAL4 }, { "local5", LOG_LOCAL5 }, { "local6", LOG_LOCAL6 }, { "local7", LOG_LOCAL7 }, { NULL, 0 }}; FacilitiesRec *current; /* It would be more fitting to return LOG_DAEMON, but POSIX does not guarantee the existence of that facility. */ if(!facility) { return LOG_USER; } current = facilities; while(current->name) { if(!strcmp(current->name, atomString(facility))) { return current->facility; } current++; } /* This will go to stderr because syslog is not yet initialized. */ do_log(L_ERROR, "Specified logFacility %s nonexistent on this system.", atomString(facility)); return LOG_USER; } /* Translate a Polipo error type into a syslog priority. */ static int translatePriority(int type) { typedef struct { int type; int priority; } PrioritiesRec; /* The list is terminated with a type of zero. */ PrioritiesRec priorities[] = {{ L_ERROR, LOG_ERR }, { L_WARN, LOG_WARNING }, { L_INFO, LOG_NOTICE }, { L_FORBIDDEN, LOG_DEBUG }, { L_UNCACHEABLE, LOG_DEBUG }, { L_SUPERSEDED, LOG_DEBUG }, { L_VARY, LOG_DEBUG }, { 0, 0 }}; PrioritiesRec *current; current = priorities; while(current->type) { if(current->type == type) { return current->priority; } current++; } return LOG_DEBUG; } static int expandSyslog(int len) { int newsize; char *newbuf; if(len < 0) newsize = syslogBufSize * 2; else newsize = syslogBufLength + len + 1; newbuf = realloc(syslogBuf, newsize); if(!newbuf) return -1; syslogBuf = newbuf; syslogBufSize = newsize; return 1; } static void maybeFlushSyslog(int type) { char *linefeed; while(1) { linefeed = memchr(syslogBuf, '\n', syslogBufLength); if(linefeed == NULL) break; *linefeed = '\0'; syslog(translatePriority(type), "%s", syslogBuf); linefeed++; syslogBufLength -= (linefeed - syslogBuf); if(syslogBufLength > 0) memmove(syslogBuf, linefeed, syslogBufLength); } } static void accumulateSyslogV(int type, const char *f, va_list args) { int rc; again: rc = vsnprintf(syslogBuf + syslogBufLength, syslogBufSize - syslogBufLength, f, args); if(rc < 0 || rc >= syslogBufSize - syslogBufLength) { rc = expandSyslog(rc); if(rc < 0) return; goto again; } syslogBufLength += rc; maybeFlushSyslog(type); } static void accumulateSyslogN(int type, const char *s, int len) { while(syslogBufSize - syslogBufLength <= len) expandSyslog(len); memcpy(syslogBuf + syslogBufLength, s, len); syslogBufLength += len; syslogBuf[syslogBufLength] = '\0'; maybeFlushSyslog(type); } #else static void initSyslog() { return; } #endif /* Flush any messages waiting to be logged. */ void flushLog() { if(logF) fflush(logF); #ifdef HAVE_SYSLOG /* There shouldn't really be anything here, but let's be paranoid. We can't pick a good value for `type', so just invent one. */ if(logSyslog && syslogBuf[0] != '\0') { accumulateSyslogN(L_INFO, "\n", 1); } assert(syslogBufLength == 0); #endif } void reopenLog() { if(logFile) { FILE *f; f = fopen(logFile->string, "a"); if(f == NULL) { do_log_error(L_ERROR, errno, "Couldn't reopen log file %s", logFile->string); exit(1); } setvbuf(f, NULL, _IOLBF, 0); fclose(logF); logF = f; } if(logSyslog) { initSyslog(); } } void really_do_log(int type, const char *f, ...) { va_list args; va_start(args, f); if(type & LOGGING_MAX & logLevel) really_do_log_v(type, f, args); va_end(args); } void really_do_log_v(int type, const char *f, va_list args) { if(type & LOGGING_MAX & logLevel) { if(logF) vfprintf(logF, f, args); #ifdef HAVE_SYSLOG if(logSyslog) accumulateSyslogV(type, f, args); #endif } } void really_do_log_error(int type, int e, const char *f, ...) { va_list args; va_start(args, f); if(type & LOGGING_MAX & logLevel) really_do_log_error_v(type, e, f, args); va_end(args); } void really_do_log_error_v(int type, int e, const char *f, va_list args) { if((type & LOGGING_MAX & logLevel) != 0) { char *es = pstrerror(e); if(es == NULL) es = "Unknown error"; if(logF) { vfprintf(logF, f, args); fprintf(logF, ": %s\n", es); } #ifdef HAVE_SYSLOG if(logSyslog) { char msg[256]; size_t n = 0; n = snnvprintf(msg, n, 256, f, args); n = snnprintf(msg, n, 256, ": "); n = snnprint_n(msg, n, 256, es, strlen (es)); n = snnprintf(msg, n, 256, "\n"); /* Overflow? Vanishingly unlikely; truncate at 255. */ if(n < 0 || n > 256) { n = 256; msg[255] = '\0'; } else msg[n] = '\0'; accumulateSyslogN(type, msg, n); } #endif } } void really_do_log_n(int type, const char *s, int n) { if((type & LOGGING_MAX & logLevel) != 0) { if(logF) { fwrite(s, n, 1, logF); } #ifdef HAVE_SYSLOG if(logSyslog) accumulateSyslogN(type, s, n); #endif } } polipo-1.0.4.1/localindex.html0000644000175000017500000000046111331407220015471 0ustar chrisdchrisd Welcome to Polipo

Welcome to Polipo

The Polipo manual.

The configuration interface. polipo-1.0.4.1/local.h0000644000175000017500000000414111331407220013723 0ustar chrisdchrisd/* Copyright (c) 2003, 2004 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ typedef struct _SpecialRequest { ObjectPtr object; int fd; void *buf; int offset; pid_t pid; } SpecialRequestRec, *SpecialRequestPtr; extern int disableConfiguration; extern int disableIndexing; void preinitLocal(void); void alternatingHttpStyle(FILE *out, char *id); int httpLocalRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr, void *); int httpSpecialRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr, void*); int httpSpecialSideRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor, void *closure); int specialRequestHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpSpecialDoSide(HTTPRequestPtr requestor); int httpSpecialClientSideHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest); int httpSpecialDoSideFinish(AtomPtr data, HTTPRequestPtr requestor); polipo-1.0.4.1/local.c0000644000175000017500000006026411331407220013726 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" int disableLocalInterface = 0; int disableConfiguration = 0; int disableIndexing = 1; int disableServersList = 1; AtomPtr atomInitForbidden; AtomPtr atomReopenLog; AtomPtr atomDiscardObjects; AtomPtr atomWriteoutObjects; AtomPtr atomFreeChunkArenas; void preinitLocal() { atomInitForbidden = internAtom("init-forbidden"); atomReopenLog = internAtom("reopen-log"); atomDiscardObjects = internAtom("discard-objects"); atomWriteoutObjects = internAtom("writeout-objects"); atomFreeChunkArenas = internAtom("free-chunk-arenas"); /* These should not be settable for obvious reasons */ CONFIG_VARIABLE(disableLocalInterface, CONFIG_BOOLEAN, "Disable the local configuration pages."); CONFIG_VARIABLE(disableConfiguration, CONFIG_BOOLEAN, "Disable reconfiguring Polipo at runtime."); CONFIG_VARIABLE(disableIndexing, CONFIG_BOOLEAN, "Disable indexing of the local cache."); CONFIG_VARIABLE(disableServersList, CONFIG_BOOLEAN, "Disable the list of known servers."); } static void fillSpecialObject(ObjectPtr, void (*)(FILE*, char*), void*); int httpLocalRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor, void *closure) { if(object->requestor == NULL) object->requestor = requestor; if(!disableLocalInterface && urlIsSpecial(object->key, object->key_size)) return httpSpecialRequest(object, method, from, to, requestor, closure); if(method >= METHOD_POST) { httpClientError(requestor, 405, internAtom("Method not allowed")); requestor->connection->flags &= ~CONN_READER; return 1; } /* objectFillFromDisk already did the real work but we have to make sure we don't get into an infinite loop. */ if(object->flags & OBJECT_INITIAL) { abortObject(object, 404, internAtom("Not found")); } object->age = current_time.tv_sec; object->date = current_time.tv_sec; object->flags &= ~OBJECT_VALIDATING; notifyObject(object); return 1; } void alternatingHttpStyle(FILE *out, char *id) { fprintf(out, "\n", id, id); } static void printConfig(FILE *out, char *dummy) { fprintf(out, "\n" "\n" "Polipo configuration\n" "\n" "

Polipo configuration

\n"); printConfigVariables(out, 1); fprintf(out, "

back

"); fprintf(out, "\n"); } #ifndef NO_DISK_CACHE static void recursiveIndexDiskObjects(FILE *out, char *root) { indexDiskObjects(out, root, 1); } static void plainIndexDiskObjects(FILE *out, char *root) { indexDiskObjects(out, root, 0); } #endif static void serversList(FILE *out, char *dummy) { listServers(out); } static int matchUrl(char *base, ObjectPtr object) { int n = strlen(base); if(object->key_size < n) return 0; if(memcmp(base, object->key, n) != 0) return 0; return (object->key_size == n) || (((char*)object->key)[n] == '?'); } int httpSpecialRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor, void *closure) { char buffer[1024]; int hlen; if(method >= METHOD_POST) { return httpSpecialSideRequest(object, method, from, to, requestor, closure); } if(!(object->flags & OBJECT_INITIAL)) { privatiseObject(object, 0); supersedeObject(object); object->flags &= ~(OBJECT_VALIDATING | OBJECT_INPROGRESS); notifyObject(object); return 1; } hlen = snnprintf(buffer, 0, 1024, "\r\nServer: polipo" "\r\nContent-Type: text/html"); object->date = current_time.tv_sec; object->age = current_time.tv_sec; object->headers = internAtomN(buffer, hlen); object->code = 200; object->message = internAtom("Okay"); object->flags &= ~OBJECT_INITIAL; object->flags |= OBJECT_DYNAMIC; if(object->key_size == 8 && memcmp(object->key, "/polipo/", 8) == 0) { objectPrintf(object, 0, "\n" "\n" "Polipo\n" "\n" "

Polipo

\n" "

Status report.

\n" "

Current configuration.

\n" "

Known servers.

\n" #ifndef NO_DISK_CACHE "

Disk cache index.

\n" #endif "\n"); object->length = object->size; } else if(matchUrl("/polipo/status", object)) { objectPrintf(object, 0, "\n" "\n" "Polipo status report\n" "\n" "

Polipo proxy on %s:%d: status report

\n" "

The %s proxy on %s:%d is %s.

\n" "

There are %d public and %d private objects " "currently in memory using %d KB in %d chunks " "(%d KB allocated).

\n" "

There are %d atoms.

" "

" "
\n" "
" "
\n" "
" "
\n" "
" "
\n" "
" "

\n" "

back

" "\n", proxyName->string, proxyPort, cacheIsShared ? "shared" : "private", proxyName->string, proxyPort, proxyOffline ? "off line" : (relaxTransparency ? "on line (transparency relaxed)" : "on line"), publicObjectCount, privateObjectCount, used_chunks * CHUNK_SIZE / 1024, used_chunks, totalChunkArenaSize() / 1024, used_atoms); object->expires = current_time.tv_sec; object->length = object->size; } else if(matchUrl("/polipo/config", object)) { fillSpecialObject(object, printConfig, NULL); object->expires = current_time.tv_sec + 5; #ifndef NO_DISK_CACHE } else if(matchUrl("/polipo/index", object)) { int len; char *root; if(disableIndexing) { abortObject(object, 403, internAtom("Action not allowed")); notifyObject(object); return 1; } len = MAX(0, object->key_size - 14); root = strdup_n((char*)object->key + 14, len); if(root == NULL) { abortObject(object, 503, internAtom("Couldn't allocate root")); notifyObject(object); return 1; } writeoutObjects(1); fillSpecialObject(object, plainIndexDiskObjects, root); free(root); object->expires = current_time.tv_sec + 5; } else if(matchUrl("/polipo/recursive-index", object)) { int len; char *root; if(disableIndexing) { abortObject(object, 403, internAtom("Action not allowed")); notifyObject(object); return 1; } len = MAX(0, object->key_size - 24); root = strdup_n((char*)object->key + 24, len); if(root == NULL) { abortObject(object, 503, internAtom("Couldn't allocate root")); notifyObject(object); return 1; } writeoutObjects(1); fillSpecialObject(object, recursiveIndexDiskObjects, root); free(root); object->expires = current_time.tv_sec + 20; #endif } else if(matchUrl("/polipo/servers", object)) { if(disableServersList) { abortObject(object, 403, internAtom("Action not allowed")); notifyObject(object); return 1; } fillSpecialObject(object, serversList, NULL); object->expires = current_time.tv_sec + 2; } else { abortObject(object, 404, internAtom("Not found")); } object->flags &= ~OBJECT_VALIDATING; notifyObject(object); return 1; } int httpSpecialSideRequest(ObjectPtr object, int method, int from, int to, HTTPRequestPtr requestor, void *closure) { HTTPConnectionPtr client = requestor->connection; assert(client->request == requestor); if(method != METHOD_POST) { httpClientError(requestor, 405, internAtom("Method not allowed")); requestor->connection->flags &= ~CONN_READER; return 1; } return httpSpecialDoSide(requestor); } int httpSpecialDoSide(HTTPRequestPtr requestor) { HTTPConnectionPtr client = requestor->connection; if(client->reqlen - client->reqbegin >= client->bodylen) { AtomPtr data; data = internAtomN(client->reqbuf + client->reqbegin, client->reqlen - client->reqbegin); client->reqbegin = 0; client->reqlen = 0; if(data == NULL) { do_log(L_ERROR, "Couldn't allocate data.\n"); httpClientError(requestor, 500, internAtom("Couldn't allocate data")); return 1; } httpSpecialDoSideFinish(data, requestor); return 1; } if(client->reqlen - client->reqbegin >= CHUNK_SIZE) { httpClientError(requestor, 500, internAtom("POST too large")); return 1; } if(client->reqbegin > 0 && client->reqlen > client->reqbegin) { memmove(client->reqbuf, client->reqbuf + client->reqbegin, client->reqlen - client->reqbegin); } client->reqlen -= client->reqbegin; client->reqbegin = 0; do_stream(IO_READ | IO_NOTNOW, client->fd, client->reqlen, client->reqbuf, CHUNK_SIZE, httpSpecialClientSideHandler, client); return 1; } int httpSpecialClientSideHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; int push; if((request->object->flags & OBJECT_ABORTED) || !(request->object->flags & OBJECT_INPROGRESS)) { httpClientDiscardBody(connection); httpClientError(request, 503, internAtom("Post aborted")); return 1; } if(status < 0) { do_log_error(L_ERROR, -status, "Reading from client"); if(status == -EDOGRACEFUL) httpClientFinish(connection, 1); else httpClientFinish(connection, 2); return 1; } push = MIN(srequest->offset - connection->reqlen, connection->bodylen - connection->reqoffset); if(push > 0) { connection->reqlen += push; httpSpecialDoSide(request); } do_log(L_ERROR, "Incomplete client request.\n"); connection->flags &= ~CONN_READER; httpClientRawError(connection, 502, internAtom("Incomplete client request"), 1); return 1; } int httpSpecialDoSideFinish(AtomPtr data, HTTPRequestPtr requestor) { ObjectPtr object = requestor->object; if(matchUrl("/polipo/config", object)) { AtomListPtr list = NULL; int i, rc; if(disableConfiguration) { abortObject(object, 403, internAtom("Action not allowed")); goto out; } list = urlDecode(data->string, data->length); if(list == NULL) { abortObject(object, 400, internAtom("Couldn't parse variable to set")); goto out; } for(i = 0; i < list->length; i++) { rc = parseConfigLine(list->list[i]->string, NULL, 0, 1); if(rc < 0) { abortObject(object, 400, rc == -1 ? internAtom("Couldn't parse variable to set") : internAtom("Variable is not settable")); destroyAtomList(list); goto out; } } destroyAtomList(list); object->date = current_time.tv_sec; object->age = current_time.tv_sec; object->headers = internAtom("\r\nLocation: /polipo/config?"); object->code = 303; object->message = internAtom("Done"); object->flags &= ~OBJECT_INITIAL; object->length = 0; } else if(matchUrl("/polipo/status", object)) { AtomListPtr list = NULL; int i; if(disableConfiguration) { abortObject(object, 403, internAtom("Action not allowed")); goto out; } list = urlDecode(data->string, data->length); if(list == NULL) { abortObject(object, 400, internAtom("Couldn't parse action")); goto out; } for(i = 0; i < list->length; i++) { char *equals = memchr(list->list[i]->string, '=', list->list[i]->length); AtomPtr name = equals ? internAtomN(list->list[i]->string, equals - list->list[i]->string) : retainAtom(list->list[i]); if(name == atomInitForbidden) initForbidden(); else if(name == atomReopenLog) reopenLog(); else if(name == atomDiscardObjects) discardObjects(1, 0); else if(name == atomWriteoutObjects) writeoutObjects(1); else if(name == atomFreeChunkArenas) free_chunk_arenas(); else { abortObject(object, 400, internAtomF("Unknown action %s", name->string)); releaseAtom(name); destroyAtomList(list); goto out; } releaseAtom(name); } destroyAtomList(list); object->date = current_time.tv_sec; object->age = current_time.tv_sec; object->headers = internAtom("\r\nLocation: /polipo/status?"); object->code = 303; object->message = internAtom("Done"); object->flags &= ~OBJECT_INITIAL; object->length = 0; } else { abortObject(object, 405, internAtom("Method not allowed")); } out: releaseAtom(data); notifyObject(object); requestor->connection->flags &= ~CONN_READER; return 1; } #ifdef HAVE_FORK static void fillSpecialObject(ObjectPtr object, void (*fn)(FILE*, char*), void* closure) { int rc; int filedes[2]; pid_t pid; sigset_t ss, old_mask; if(object->flags & OBJECT_INPROGRESS) return; rc = pipe(filedes); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't create pipe"); abortObject(object, 503, internAtomError(errno, "Couldn't create pipe")); return; } fflush(stdout); fflush(stderr); flushLog(); /* Block signals that we handle specially until the child can disable the handlers. */ interestingSignals(&ss); /* I'm a little confused. POSIX doesn't allow EINTR here, but I think that both Linux and SVR4 do. */ do { rc = sigprocmask(SIG_BLOCK, &ss, &old_mask); } while (rc < 0 && errno == EINTR); if(rc < 0) { do_log_error(L_ERROR, errno, "Sigprocmask failed"); abortObject(object, 503, internAtomError(errno, "Sigprocmask failed")); close(filedes[0]); close(filedes[1]); return; } pid = fork(); if(pid < 0) { do_log_error(L_ERROR, errno, "Couldn't fork"); abortObject(object, 503, internAtomError(errno, "Couldn't fork")); close(filedes[0]); close(filedes[1]); do { rc = sigprocmask(SIG_SETMASK, &old_mask, NULL); } while (rc < 0 && errno == EINTR); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't restore signal mask"); polipoExit(); } return; } if(pid > 0) { SpecialRequestPtr request; close(filedes[1]); do { rc = sigprocmask(SIG_SETMASK, &old_mask, NULL); } while (rc < 0 && errno == EINTR); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't restore signal mask"); polipoExit(); return; } request = malloc(sizeof(SpecialRequestRec)); if(request == NULL) { kill(pid, SIGTERM); close(filedes[0]); abortObject(object, 503, internAtom("Couldn't allocate request\n")); notifyObject(object); /* specialRequestHandler will take care of the rest. */ } else { request->buf = get_chunk(); if(request->buf == NULL) { kill(pid, SIGTERM); close(filedes[0]); free(request); abortObject(object, 503, internAtom("Couldn't allocate request\n")); notifyObject(object); } } object->flags |= OBJECT_INPROGRESS; retainObject(object); request->object = object; request->fd = filedes[0]; request->pid = pid; request->offset = 0; /* Under any sensible scheduler, the child will run before the parent. So no need for IO_NOTNOW. */ do_stream(IO_READ, filedes[0], 0, request->buf, CHUNK_SIZE, specialRequestHandler, request); } else { /* child */ close(filedes[0]); uninitEvents(); do { rc = sigprocmask(SIG_SETMASK, &old_mask, NULL); } while (rc < 0 && errno == EINTR); if(rc < 0) exit(1); if(filedes[1] != 1) dup2(filedes[1], 1); (*fn)(stdout, closure); exit(0); } } int specialRequestHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { SpecialRequestPtr request = srequest->data; int rc; int killed = 0; if(status < 0) { kill(request->pid, SIGTERM); killed = 1; request->object->flags &= ~OBJECT_INPROGRESS; abortObject(request->object, 502, internAtomError(-status, "Couldn't read from client")); goto done; } if(srequest->offset > 0) { rc = objectAddData(request->object, request->buf, request->offset, srequest->offset); if(rc < 0) { kill(request->pid, SIGTERM); killed = 1; request->object->flags &= ~OBJECT_INPROGRESS; abortObject(request->object, 503, internAtom("Couldn't add data to connection")); goto done; } request->offset += srequest->offset; } if(status) { request->object->flags &= ~OBJECT_INPROGRESS; request->object->length = request->object->size; goto done; } /* If we're the only person interested in this object, let's abort it now. */ if(request->object->refcount <= 1) { kill(request->pid, SIGTERM); killed = 1; request->object->flags &= ~OBJECT_INPROGRESS; abortObject(request->object, 500, internAtom("Aborted")); goto done; } notifyObject(request->object); do_stream(IO_READ | IO_NOTNOW, request->fd, 0, request->buf, CHUNK_SIZE, specialRequestHandler, request); return 1; done: close(request->fd); dispose_chunk(request->buf); releaseNotifyObject(request->object); /* That's a blocking wait. It shouldn't block for long, as we've either already killed the child, or else we got EOF from it. */ do { rc = waitpid(request->pid, &status, 0); } while(rc < 0 && errno == EINTR); if(rc < 0) { do_log(L_ERROR, "Wait for %d: %d\n", (int)request->pid, errno); } else { int normal = (WIFEXITED(status) && WEXITSTATUS(status) == 0) || (killed && WIFSIGNALED(status) && WTERMSIG(status) == SIGTERM); char *reason = WIFEXITED(status) ? "with status" : WIFSIGNALED(status) ? "on signal" : "with unknown status"; int value = WIFEXITED(status) ? WEXITSTATUS(status) : WIFSIGNALED(status) ? WTERMSIG(status) : status; do_log(normal ? D_CHILD : L_ERROR, "Child %d exited %s %d.\n", (int)request->pid, reason, value); } free(request); return 1; } #else static void fillSpecialObject(ObjectPtr object, void (*fn)(FILE*, char*), void* closure) { FILE *tmp = NULL; char *buf = NULL; int rc, len, offset; if(object->flags & OBJECT_INPROGRESS) return; buf = get_chunk(); if(buf == NULL) { abortObject(object, 503, internAtom("Couldn't allocate chunk")); goto done; } tmp = tmpfile(); if(tmp == NULL) { abortObject(object, 503, internAtom(pstrerror(errno))); goto done; } (*fn)(tmp, closure); fflush(tmp); rewind(tmp); offset = 0; while(1) { len = fread(buf, 1, CHUNK_SIZE, tmp); if(len <= 0 && ferror(tmp)) { abortObject(object, 503, internAtom(pstrerror(errno))); goto done; } if(len <= 0) break; rc = objectAddData(object, buf, offset, len); if(rc < 0) { abortObject(object, 503, internAtom("Couldn't add data to object")); goto done; } offset += len; } object->length = offset; done: if(buf) dispose_chunk(buf); if(tmp) fclose(tmp); notifyObject(object); } #endif polipo-1.0.4.1/io.h0000644000175000017500000001205711331407220013245 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* request->operation */ #define IO_READ 0 #define IO_WRITE 1 #define IO_MASK 0xFF /* Do not initiate operation now -- wait for the poll loop. */ #define IO_NOTNOW 0x100 /* Call the progress handler once if no data arrives immediately. */ #define IO_IMMEDIATE 0x200 /* Emit a chunk length before every write operation */ #define IO_CHUNKED 0x400 /* Emit a zero-length chunk at the end if chunked */ #define IO_END 0x800 /* Internal -- header is really buf3 */ #define IO_BUF3 0x1000 /* Internal -- header is really buf_location */ #define IO_BUF_LOCATION 0x2000 typedef struct _StreamRequest { short operation; short fd; int offset; int len; int len2; union { struct { int hlen; char *header; } h; struct { int len3; char *buf3; } b; struct { char **buf_location; } l; } u; char *buf; char *buf2; int (*handler)(int, FdEventHandlerPtr, struct _StreamRequest*); void *data; } StreamRequestRec, *StreamRequestPtr; typedef struct _ConnectRequest { int fd; int af; struct _Atom *addr; int firstindex; int index; int port; int (*handler)(int, FdEventHandlerPtr, struct _ConnectRequest*); void *data; } ConnectRequestRec, *ConnectRequestPtr; typedef struct _AcceptRequest { int fd; int (*handler)(int, FdEventHandlerPtr, struct _AcceptRequest*); void *data; } AcceptRequestRec, *AcceptRequestPtr; void preinitIo(); void initIo(); FdEventHandlerPtr do_stream(int operation, int fd, int offset, char *buf, int len, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data); FdEventHandlerPtr do_stream_h(int operation, int fd, int offset, char *header, int hlen, char *buf, int len, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data); FdEventHandlerPtr do_stream_2(int operation, int fd, int offset, char *buf, int len, char *buf2, int len2, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data); FdEventHandlerPtr do_stream_3(int operation, int fd, int offset, char *buf, int len, char *buf2, int len2, char *buf3, int len3, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data); FdEventHandlerPtr do_stream_buf(int operation, int fd, int offset, char **buf_location, int len, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data); FdEventHandlerPtr schedule_stream(int operation, int fd, int offset, char *header, int hlen, char *buf, int len, char *buf2, int len2, char *buf3, int len3, char **buf_location, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data); int do_scheduled_stream(int, FdEventHandlerPtr); int streamRequestDone(StreamRequestPtr); FdEventHandlerPtr do_connect(struct _Atom *addr, int index, int port, int (*handler)(int, FdEventHandlerPtr, ConnectRequestPtr), void *data); int do_scheduled_connect(int, FdEventHandlerPtr event); FdEventHandlerPtr do_accept(int fd, int (*handler)(int, FdEventHandlerPtr, AcceptRequestPtr), void* data); FdEventHandlerPtr schedule_accept(int fd, int (*handler)(int, FdEventHandlerPtr, AcceptRequestPtr), void* data); int do_scheduled_accept(int, FdEventHandlerPtr event); FdEventHandlerPtr create_listener(char *address, int port, int (*handler)(int, FdEventHandlerPtr, AcceptRequestPtr), void *data); int setNonblocking(int fd, int nonblocking); int setNodelay(int fd, int nodelay); int setV6only(int fd, int v6only); int lingeringClose(int fd); typedef struct _NetAddress { int prefix; int af; unsigned char data[16]; } NetAddressRec, *NetAddressPtr; NetAddressPtr parseNetAddress(AtomListPtr list); int netAddressMatch(int fd, NetAddressPtr list) ATTRIBUTE ((pure)); polipo-1.0.4.1/io.c0000644000175000017500000007453411331407220013250 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifdef HAVE_IPv6 #ifdef IPV6_PREFER_TEMPADDR #define HAVE_IPV6_PREFER_TEMPADDR 1 #endif #endif #ifdef HAVE_IPV6_PREFER_TEMPADDR int useTemporarySourceAddress = 1; #endif void preinitIo() { #ifdef HAVE_IPV6_PREFER_TEMPADDR CONFIG_VARIABLE_SETTABLE(useTemporarySourceAddress, CONFIG_TRISTATE, configIntSetter, "Prefer IPv6 temporary source address."); #endif #ifdef HAVE_WINSOCK /* Load the winsock dll */ WSADATA wsaData; WORD wVersionRequested = MAKEWORD(2, 2); int err = WSAStartup( wVersionRequested, &wsaData ); if (err != 0) { do_log_error(L_ERROR, err, "Couldn't load winsock dll"); exit(-1); } #endif return; } void initIo() { return; } FdEventHandlerPtr do_stream(int operation, int fd, int offset, char *buf, int len, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { assert(len > offset || (operation & (IO_END | IO_IMMEDIATE))); return schedule_stream(operation, fd, offset, NULL, 0, buf, len, NULL, 0, NULL, 0, NULL, handler, data); } FdEventHandlerPtr do_stream_2(int operation, int fd, int offset, char *buf, int len, char *buf2, int len2, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { assert(len + len2 > offset || (operation & (IO_END | IO_IMMEDIATE))); return schedule_stream(operation, fd, offset, NULL, 0, buf, len, buf2, len2, NULL, 0, NULL, handler, data); } FdEventHandlerPtr do_stream_3(int operation, int fd, int offset, char *buf, int len, char *buf2, int len2, char *buf3, int len3, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { assert(len + len2 > offset || (operation & (IO_END | IO_IMMEDIATE))); return schedule_stream(operation, fd, offset, NULL, 0, buf, len, buf2, len2, buf3, len3, NULL, handler, data); } FdEventHandlerPtr do_stream_h(int operation, int fd, int offset, char *header, int hlen, char *buf, int len, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { assert(hlen + len > offset || (operation & (IO_END | IO_IMMEDIATE))); return schedule_stream(operation, fd, offset, header, hlen, buf, len, NULL, 0, NULL, 0, NULL, handler, data); } FdEventHandlerPtr do_stream_buf(int operation, int fd, int offset, char **buf_location, int len, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { assert((len > offset || (operation & (IO_END | IO_IMMEDIATE))) && len <= CHUNK_SIZE); return schedule_stream(operation, fd, offset, NULL, 0, *buf_location, len, NULL, 0, NULL, 0, buf_location, handler, data); } static int chunkHeaderLen(int i) { if(i <= 0) return 0; if(i < 0x10) return 3; else if(i < 0x100) return 4; else if(i < 0x1000) return 5; else if(i < 0x10000) return 6; else abort(); } static int chunkHeader(char *buf, int buflen, int i) { int n; if(i <= 0) return 0; n = snprintf(buf, buflen, "%x\r\n", i); return n; } FdEventHandlerPtr schedule_stream(int operation, int fd, int offset, char *header, int hlen, char *buf, int len, char *buf2, int len2, char *buf3, int len3, char **buf_location, int (*handler)(int, FdEventHandlerPtr, StreamRequestPtr), void *data) { StreamRequestRec request; FdEventHandlerPtr event; int done; request.operation = operation; request.fd = fd; if(len3) { assert(hlen == 0 && buf_location == NULL); request.u.b.len3 = len3; request.u.b.buf3 = buf3; request.operation |= IO_BUF3; } else if(buf_location) { assert(hlen == 0); request.u.l.buf_location = buf_location; request.operation |= IO_BUF_LOCATION; } else { request.u.h.hlen = hlen; request.u.h.header = header; } request.buf = buf; request.len = len; request.buf2 = buf2; request.len2 = len2; if((operation & IO_CHUNKED) || (!(request.operation & (IO_BUF3 | IO_BUF_LOCATION)) && hlen > 0)) { assert(offset == 0); request.offset = -hlen; if(operation & IO_CHUNKED) request.offset += -chunkHeaderLen(len + len2); } else { request.offset = offset; } request.handler = handler; request.data = data; event = makeFdEvent(fd, (operation & IO_MASK) == IO_WRITE ? POLLOUT : POLLIN, do_scheduled_stream, sizeof(StreamRequestRec), &request); if(!event) { done = (*handler)(-ENOMEM, NULL, &request); assert(done); return NULL; } if(!(operation & IO_NOTNOW)) { done = event->handler(0, event); if(done) { free(event); return NULL; } } if(operation & IO_IMMEDIATE) { assert(hlen == 0 && !(operation & IO_CHUNKED)); done = (*handler)(0, event, &request); if(done) { free(event); return NULL; } } event = registerFdEventHelper(event); return event; } static const char *endChunkTrailer = "\r\n0\r\n\r\n"; int do_scheduled_stream(int status, FdEventHandlerPtr event) { StreamRequestPtr request = (StreamRequestPtr)&event->data; int rc, done, i; struct iovec iov[6]; int chunk_header_len, chunk_trailer_len; char chunk_header[10]; int len12 = request->len + request->len2; int len123 = request->len + request->len2 + ((request->operation & IO_BUF3) ? request->u.b.len3 : 0); if(status) { done = request->handler(status, event, request); return done; } i = 0; if(request->offset < 0) { assert((request->operation & (IO_MASK | IO_BUF3 | IO_BUF_LOCATION)) == IO_WRITE); if(request->operation & IO_CHUNKED) { chunk_header_len = chunkHeaderLen(len123); chunk_trailer_len = 2; } else { chunk_header_len = 0; chunk_trailer_len = 0; } if(request->offset < -chunk_header_len) { assert(request->offset >= -(request->u.h.hlen + chunk_header_len)); iov[i].iov_base = request->u.h.header; iov[i].iov_len = -request->offset - chunk_header_len; i++; } if(chunk_header_len > 0) { chunkHeader(chunk_header, 10, len123); if(request->offset < -chunk_header_len) { iov[i].iov_base = chunk_header; iov[i].iov_len = chunk_header_len; } else { iov[i].iov_base = chunk_header + chunk_header_len + request->offset; iov[i].iov_len = -request->offset; } i++; } } if(request->len > 0) { if(request->buf == NULL && (request->operation & IO_BUF_LOCATION)) { assert(*request->u.l.buf_location == NULL); request->buf = *request->u.l.buf_location = get_chunk(); if(request->buf == NULL) { done = request->handler(-ENOMEM, event, request); return done; } } if(request->offset <= 0) { iov[i].iov_base = request->buf; iov[i].iov_len = request->len; i++; } else if(request->offset < request->len) { iov[i].iov_base = request->buf + request->offset; iov[i].iov_len = request->len - request->offset; i++; } } if(request->len2 > 0) { if(request->offset <= request->len) { iov[i].iov_base = request->buf2; iov[i].iov_len = request->len2; i++; } else if(request->offset < request->len + request->len2) { iov[i].iov_base = request->buf2 + request->offset - request->len; iov[i].iov_len = request->len2 - request->offset + request->len; i++; } } if((request->operation & IO_BUF3) && request->u.b.len3 > 0) { if(request->offset <= len12) { iov[i].iov_base = request->u.b.buf3; iov[i].iov_len = request->u.b.len3; i++; } else if(request->offset < len12 + request->u.b.len3) { iov[i].iov_base = request->u.b.buf3 + request->offset - len12; iov[i].iov_len = request->u.b.len3 - request->offset + len12; i++; } } if((request->operation & IO_CHUNKED)) { int l; const char *trailer; if(request->operation & IO_END) { if(len123 == 0) { trailer = endChunkTrailer + 2; l = 5; } else { trailer = endChunkTrailer; l = 7; } } else { trailer = endChunkTrailer; l = 2; } if(request->offset <= len123) { iov[i].iov_base = (char*)trailer; iov[i].iov_len = l; i++; } else if(request->offset < len123 + l) { iov[i].iov_base = (char*)endChunkTrailer + request->offset - len123; iov[i].iov_len = l - request->offset + len123; i++; } } assert(i > 0); if((request->operation & IO_MASK) == IO_WRITE) { if(i > 1) rc = WRITEV(request->fd, iov, i); else rc = WRITE(request->fd, iov[0].iov_base, iov[0].iov_len); } else { if(i > 1) rc = READV(request->fd, iov, i); else rc = READ(request->fd, iov[0].iov_base, iov[0].iov_len); } if(rc > 0) { request->offset += rc; if(request->offset < 0) return 0; done = request->handler(0, event, request); return done; } else if(rc == 0 || errno == EPIPE) { done = request->handler(1, event, request); } else if(errno == EAGAIN || errno == EINTR) { return 0; } else if(errno == EFAULT || errno == EBADF) { abort(); } else { done = request->handler(-errno, event, request); } assert(done); return done; } int streamRequestDone(StreamRequestPtr request) { int len123 = request->len + request->len2 + ((request->operation & IO_BUF3) ? request->u.b.len3 : 0); if(request->offset < 0) return 0; else if(request->offset < len123) return 0; else if(request->operation & IO_CHUNKED) { if(request->operation & IO_END) { if(request->offset < len123 + (len123 ? 7 : 5)) return 0; } else { if(request->offset < len123 + 2) return 0; } } return 1; } static int serverSocket(int af) { int fd, rc; if(af == 4) { fd = socket(PF_INET, SOCK_STREAM, 0); } else if(af == 6) { #ifdef HAVE_IPv6 fd = socket(PF_INET6, SOCK_STREAM, 0); #else fd = -1; errno = EAFNOSUPPORT; #endif } else { abort(); } if(fd >= 0) { rc = setNonblocking(fd, 1); if(rc < 0) { int errno_save = errno; CLOSE(fd); errno = errno_save; return -1; } #ifdef HAVE_IPV6_PREFER_TEMPADDR if (af == 6 && useTemporarySourceAddress != 1) { int value; value = (useTemporarySourceAddress == 2) ? 1 : 0; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_PREFER_TEMPADDR, &value, sizeof(value)); if (rc < 0) { /* no error, warning only */ do_log_error(L_WARN, errno, "Couldn't set IPV6CTL_USETEMPADDR"); } } #endif } return fd; } FdEventHandlerPtr do_connect(AtomPtr addr, int index, int port, int (*handler)(int, FdEventHandlerPtr, ConnectRequestPtr), void *data) { ConnectRequestRec request; FdEventHandlerPtr event; int done, fd, af; assert(addr->length > 0 && addr->string[0] == DNS_A); assert(addr->length % sizeof(HostAddressRec) == 1); if(index >= (addr->length - 1)/ sizeof(HostAddressRec)) index = 0; request.firstindex = index; request.port = port; request.handler = handler; request.data = data; again: af = addr->string[1 + index * sizeof(HostAddressRec)]; fd = serverSocket(af); request.fd = fd; request.af = af; request.addr = addr; request.index = index; if(fd < 0) { int n = (addr->length - 1) / sizeof(HostAddressRec); if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { if((index + 1) % n != request.firstindex) { index = (index + 1) % n; goto again; } } do_log_error(L_ERROR, errno, "Couldn't create socket"); done = (*handler)(-errno, NULL, &request); assert(done); return NULL; } /* POLLIN is apparently needed on Windows */ event = registerFdEvent(fd, POLLIN | POLLOUT, do_scheduled_connect, sizeof(ConnectRequestRec), &request); if(event == NULL) { done = (*handler)(-ENOMEM, NULL, &request); assert(done); return NULL; } done = event->handler(0, event); if(done) { unregisterFdEvent(event); return NULL; } return event; } int do_scheduled_connect(int status, FdEventHandlerPtr event) { ConnectRequestPtr request = (ConnectRequestPtr)&event->data; AtomPtr addr = request->addr; int done; int rc; HostAddressPtr host; struct sockaddr_in servaddr; #ifdef HAVE_IPv6 struct sockaddr_in6 servaddr6; #endif assert(addr->length > 0 && addr->string[0] == DNS_A); assert(addr->length % sizeof(HostAddressRec) == 1); assert(request->index < (addr->length - 1) / sizeof(HostAddressRec)); if(status) { done = request->handler(status, event, request); if(done) { releaseAtom(addr); request->addr = NULL; return 1; } return 0; } again: host = (HostAddressPtr)&addr->string[1 + request->index * sizeof(HostAddressRec)]; if(host->af != request->af) { int newfd; /* Ouch. Our socket has a different protocol than the host address. */ CLOSE(request->fd); newfd = serverSocket(host->af); if(newfd < 0) { if(errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { int n = request->addr->length / sizeof(HostAddressRec); if((request->index + 1) % n != request->firstindex) { request->index = (request->index + 1) % n; goto again; } } request->fd = -1; done = request->handler(-errno, event, request); assert(done); return 1; } if(newfd != request->fd) { request->fd = dup2(newfd, request->fd); CLOSE(newfd); if(request->fd < 0) { done = request->handler(-errno, event, request); assert(done); return 1; } } request->af = host->af; } switch(host->af) { case 4: memset(&servaddr, 0, sizeof(servaddr)); servaddr.sin_family = AF_INET; servaddr.sin_port = htons(request->port); memcpy(&servaddr.sin_addr, &host->data, sizeof(struct in_addr)); rc = connect(request->fd, (struct sockaddr*)&servaddr, sizeof(servaddr)); break; case 6: #ifdef HAVE_IPv6 memset(&servaddr6, 0, sizeof(servaddr6)); servaddr6.sin6_family = AF_INET6; servaddr6.sin6_port = htons(request->port); memcpy(&servaddr6.sin6_addr, &host->data, sizeof(struct in6_addr)); rc = connect(request->fd, (struct sockaddr*)&servaddr6, sizeof(servaddr6)); #else rc = -1; errno = EAFNOSUPPORT; #endif break; default: abort(); } if(rc >= 0 || errno == EISCONN) { done = request->handler(1, event, request); assert(done); releaseAtom(request->addr); request->addr = NULL; return 1; } if(errno == EINPROGRESS || errno == EINTR) { return 0; } else if(errno == EFAULT || errno == EBADF) { abort(); } else { int n = request->addr->length / sizeof(HostAddressRec); if((request->index + 1) % n != request->firstindex) { request->index = (request->index + 1) % n; goto again; } done = request->handler(-errno, event, request); assert(done); releaseAtom(request->addr); request->addr = NULL; return 1; } } FdEventHandlerPtr do_accept(int fd, int (*handler)(int, FdEventHandlerPtr, AcceptRequestPtr), void *data) { FdEventHandlerPtr event; int done; event = schedule_accept(fd, handler, data); if(event == NULL) { done = (*handler)(-ENOMEM, NULL, NULL); assert(done); } /* But don't invoke it now - this will delay accept if under load. */ return event; } FdEventHandlerPtr schedule_accept(int fd, int (*handler)(int, FdEventHandlerPtr, AcceptRequestPtr), void *data) { FdEventHandlerPtr event; AcceptRequestRec request; int done; request.fd = fd; request.handler = handler; request.data = data; event = registerFdEvent(fd, POLLOUT|POLLIN, do_scheduled_accept, sizeof(request), &request); if(!event) { done = (*handler)(-ENOMEM, NULL, NULL); assert(done); } return event; } int do_scheduled_accept(int status, FdEventHandlerPtr event) { AcceptRequestPtr request = (AcceptRequestPtr)&event->data; int rc, done; unsigned len; struct sockaddr_in addr; if(status) { done = request->handler(status, event, request); if(done) return done; } len = sizeof(struct sockaddr_in); rc = accept(request->fd, (struct sockaddr*)&addr, &len); if(rc >= 0) done = request->handler(rc, event, request); else done = request->handler(-errno, event, request); return done; } FdEventHandlerPtr create_listener(char *address, int port, int (*handler)(int, FdEventHandlerPtr, AcceptRequestPtr), void *data) { int fd, rc; int one = 1; int done; struct sockaddr_in addr; #ifdef HAVE_IPv6 int inet6 = 1; struct sockaddr_in6 addr6; #else int inet6 = 0; #endif if(inet6 && address) { struct in_addr buf; rc = inet_aton(address, &buf); if(rc == 1) inet6 = 0; } fd = -1; errno = EAFNOSUPPORT; #ifdef HAVE_IPv6 if(inet6) { fd = socket(PF_INET6, SOCK_STREAM, 0); } #endif if(fd < 0 && (errno == EPROTONOSUPPORT || errno == EAFNOSUPPORT)) { inet6 = 0; fd = socket(PF_INET, SOCK_STREAM, 0); } if(fd < 0) { done = (*handler)(-errno, NULL, NULL); assert(done); return NULL; } rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); if(rc < 0) do_log_error(L_WARN, errno, "Couldn't set SO_REUSEADDR"); if(inet6) { #ifdef HAVE_IPv6 rc = setV6only(fd, 0); if(rc < 0) /* Reportedly OpenBSD returns an error for that. So only log it as a debugging message. */ do_log_error(D_CLIENT_CONN, errno, "Couldn't reset IPV6_V6ONLY"); memset(&addr6, 0, sizeof(addr6)); rc = inet_pton(AF_INET6, address, &addr6.sin6_addr); if(rc != 1) { done = (*handler)(rc == 0 ? -ESYNTAX : -errno, NULL, NULL); assert(done); return NULL; } addr6.sin6_family = AF_INET6; addr6.sin6_port = htons(port); rc = bind(fd, (struct sockaddr*)&addr6, sizeof(addr6)); #else rc = -1; errno = EAFNOSUPPORT; #endif } else { memset(&addr, 0, sizeof(addr)); rc = inet_aton(address, &addr.sin_addr); if(rc != 1) { done = (*handler)(rc == 0 ? -ESYNTAX : -errno, NULL, NULL); assert(done); return NULL; } addr.sin_family = AF_INET; addr.sin_port = htons(port); rc = bind(fd, (struct sockaddr*)&addr, sizeof(addr)); } if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't bind"); CLOSE(fd); done = (*handler)(-errno, NULL, NULL); assert(done); return NULL; } rc = setNonblocking(fd, 1); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't set non blocking mode"); CLOSE(fd); done = (*handler)(-errno, NULL, NULL); assert(done); return NULL; } rc = listen(fd, 32); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't listen"); CLOSE(fd); done = (*handler)(-errno, NULL, NULL); assert(done); return NULL; } do_log(L_INFO, "Established listening socket on port %d.\n", port); return schedule_accept(fd, handler, data); } #ifndef SOL_TCP /* BSD */ #define SOL_TCP IPPROTO_TCP #endif int setNonblocking(int fd, int nonblocking) { #ifdef MINGW return mingw_setnonblocking(fd, nonblocking); #else int rc; rc = fcntl(fd, F_GETFL, 0); if(rc < 0) return -1; rc = fcntl(fd, F_SETFL, nonblocking?(rc | O_NONBLOCK):(rc & ~O_NONBLOCK)); if(rc < 0) return -1; return 0; #endif } int setNodelay(int fd, int nodelay) { int val = nodelay ? 1 : 0; int rc; rc = setsockopt(fd, SOL_TCP, TCP_NODELAY, (char *)&val, sizeof(val)); if(rc < 0) return -1; return 0; } #ifdef IPV6_V6ONLY int setV6only(int fd, int v6only) { int val = v6only ? 1 : 0; int rc; rc = setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&val, sizeof(val)); if(rc < 0) return -1; return 0; } #else int setV6only(int fd, int v6only) { return 0; } #endif typedef struct _LingeringClose { int fd; FdEventHandlerPtr handler; TimeEventHandlerPtr timeout; } LingeringCloseRec, *LingeringClosePtr; static int lingeringCloseTimeoutHandler(TimeEventHandlerPtr event) { LingeringClosePtr l = *(LingeringClosePtr*)event->data; assert(l->timeout == event); l->timeout = NULL; if(l->handler) pokeFdEvent(l->fd, -ESHUTDOWN, POLLIN | POLLOUT); else { CLOSE(l->fd); free(l); } return 1; } static int lingeringCloseHandler(int status, FdEventHandlerPtr event) { LingeringClosePtr l = *(LingeringClosePtr*)event->data; char buf[17]; int rc; assert(l->handler == event); l->handler = NULL; if(status && status != -EDOGRACEFUL) goto done; rc = READ(l->fd, &buf, 17); if(rc == 0 || (rc < 0 && errno != EAGAIN && errno != EINTR)) goto done; /* The client is still sending data. Ignore it in order to let TCP's flow control do its work. The timeout will close the connection. */ return 1; done: if(l->timeout) { cancelTimeEvent(l->timeout); l->timeout = NULL; } CLOSE(l->fd); free(l); return 1; } int lingeringClose(int fd) { int rc; LingeringClosePtr l; rc = shutdown(fd, 1); if(rc < 0) { if(errno != ENOTCONN) { do_log_error(L_ERROR, errno, "Shutdown failed"); } else if(errno == EFAULT || errno == EBADF) { abort(); } CLOSE(fd); return 1; } l = malloc(sizeof(LingeringCloseRec)); if(l == NULL) goto fail; l->fd = fd; l->handler = NULL; l->timeout = NULL; l->timeout = scheduleTimeEvent(10, lingeringCloseTimeoutHandler, sizeof(LingeringClosePtr), &l); if(l->timeout == NULL) { free(l); goto fail; } l->handler = registerFdEvent(fd, POLLIN, lingeringCloseHandler, sizeof(LingeringClosePtr), &l); if(l->handler == NULL) { do_log(L_ERROR, "Couldn't schedule lingering close handler.\n"); /* But don't close -- the timeout will do its work. */ } return 1; fail: do_log(L_ERROR, "Couldn't schedule lingering close.\n"); CLOSE(fd); return 1; } NetAddressPtr parseNetAddress(AtomListPtr list) { NetAddressPtr nl; int i, rc, rc6; char buf[100]; struct in_addr ina; #ifdef HAVE_IPv6 struct in6_addr ina6; #endif nl = malloc((list->length + 1) * sizeof(NetAddressRec)); if(nl == NULL) { do_log(L_ERROR, "Couldn't allocate network list.\n"); return NULL; } for(i = 0; i < list->length; i++) { int prefix; char *s = list->list[i]->string, *p; int n = list->list[i]->length; char *suffix; while(*s == ' ' || *s == '\t') { s++; n--; } if(n >= 100) { do_log(L_ERROR, "Network name too long.\n"); goto fail; } p = memchr(s, '/', n); if(p) { memcpy(buf, s, p - s); buf[p - s] = '\0'; prefix = strtol(p + 1, &suffix, 10); } else { char *s1, *s2; prefix = -1; strcpy(buf, s); s1 = strchr(s, ' '); s2 = strchr(s, '\t'); if(s1 == NULL) suffix = s2; else if(s2 == NULL) suffix = s1; else if(s1 < s2) suffix = s1; else suffix = s2; if(suffix == NULL) suffix = s + n; } if(!isWhitespace(suffix)) { do_log(L_ERROR, "Couldn't parse network %s.\n", buf); goto fail; } rc = 0; rc6 = 0; rc = inet_aton(buf, &ina); #ifdef HAVE_IPv6 if(rc == 0) { rc6 = inet_pton(AF_INET6, buf, &ina6); } #endif if(rc == 0 && rc6 == 0) { do_log(L_ERROR, "Couldn't parse network %s.\n", buf); goto fail; } nl[i].prefix = prefix; if(rc) { nl[i].af = 4; memcpy(nl[i].data, &ina, 4); } else { #ifdef HAVE_IPv6 nl[i].af = 6; memcpy(nl[i].data, &ina6, 16); #else abort(); #endif } } nl[i].af = 0; return nl; fail: free(nl); return NULL; } /* Returns 1 if the first n bits of a and b are equal */ static int bitmatch(const unsigned char *a, const unsigned char *b, int n) { if(n >= 8) { if(memcmp(a, b, n / 8) != 0) return 0; } if(n % 8 != 0) { int mask = (~0) << (8 - n % 8); if((a[n / 8] & mask) != (b[n / 8] & mask)) return 0; } return 1; } /* Returns 1 if the address in data is in list */ static int match(int af, unsigned char *data, NetAddressPtr list) { int i; #ifdef HAVE_IPv6 static const unsigned char v6mapped[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF }; #endif i = 0; while(list[i].af != 0) { if(af == 4 && list[i].af == 4) { if(bitmatch(data, list[i].data, list[i].prefix >= 0 ? list[i].prefix : 32)) return 1; #ifdef HAVE_IPv6 } else if(af == 6 && list[i].af == 6) { if(bitmatch(data, list[i].data, list[i].prefix >= 0 ? list[i].prefix : 128)) return 1; } else if(af == 6 && list[i].af == 4) { if(bitmatch(data, v6mapped, 96)) { if(bitmatch(data + 12, list[i].data, list[i].prefix >= 0 ? list[i].prefix : 32)) return 1; } } else if(af == 4 && list[i].af == 6) { if(bitmatch(list[i].data, v6mapped, 96)) { if(bitmatch(data, list[i].data + 12, list[i].prefix >= 96 ? list[i].prefix - 96 : 32)) return 1; } #endif } else { abort(); } i++; } return 0; } int netAddressMatch(int fd, NetAddressPtr list) { int rc; unsigned int len; struct sockaddr_in sain; #ifdef HAVE_IPv6 struct sockaddr_in6 sain6; #endif len = sizeof(sain); rc = getpeername(fd, (struct sockaddr*)&sain, &len); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't get peer name"); return -1; } if(sain.sin_family == AF_INET) { return match(4, (unsigned char*)&sain.sin_addr, list); #ifdef HAVE_IPv6 } else if(sain.sin_family == AF_INET6) { len = sizeof(sain6); rc = getpeername(fd, (struct sockaddr*)&sain6, &len); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't get peer name"); return -1; } if(sain6.sin6_family != AF_INET6) { do_log(L_ERROR, "Inconsistent peer name"); return -1; } return match(6, (unsigned char*)&sain6.sin6_addr, list); #endif } else { do_log(L_ERROR, "Unknown address family %d\n", sain.sin_family); return -1; } return 0; } polipo-1.0.4.1/http_parse.h0000644000175000017500000000507311331407220015007 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ typedef struct HTTPRange { int from; int to; int full_length; } HTTPRangeRec, *HTTPRangePtr; extern int censorReferer; extern AtomPtr atomContentType, atomContentEncoding; void preinitHttpParser(void); void initHttpParser(void); int httpParseClientFirstLine(const char *buf, int offset, int *method_return, AtomPtr *url_return, int *version_return); int httpParseServerFirstLine(const char *buf, int *status_return, int *version_return, AtomPtr *message_return); int findEndOfHeaders(const char *buf, int from, int to, int *body_return); int httpParseHeaders(int, AtomPtr, const char *, int, HTTPRequestPtr, AtomPtr*, int*, CacheControlPtr, HTTPConditionPtr *, int*, time_t*, time_t*, time_t*, time_t*, time_t*, int*, int*, char**, AtomPtr*, HTTPRangePtr, HTTPRangePtr, char**, AtomPtr*, AtomPtr*); int httpFindHeader(AtomPtr header, const char *headers, int hlen, int *value_begin_return, int *value_end_return); int parseUrl(const char *url, int len, int *x_return, int *y_return, int *port_return, int *z_return); int urlIsLocal(const char *url, int len); int urlIsSpecial(const char *url, int len); int parseChunkSize(const char *buf, int i, int end, int *chunk_size_return); int checkVia(AtomPtr, AtomPtr); polipo-1.0.4.1/http_parse.c0000644000175000017500000013716111331407220015006 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" static int getNextWord(const char *buf, int i, int *x_return, int *y_return); static int getNextToken(const char *buf, int i, int *x_return, int *y_return); static int getNextTokenInList(const char *buf, int i, int *x_return, int *y_return, int *z_return, int *t_return, int *end_return); static AtomPtr atomConnection, atomProxyConnection, atomContentLength, atomHost, atomAcceptRange, atomTE, atomReferer, atomProxyAuthenticate, atomProxyAuthorization, atomKeepAlive, atomTrailer, atomUpgrade, atomDate, atomExpires, atomIfModifiedSince, atomIfUnmodifiedSince, atomIfRange, atomLastModified, atomIfMatch, atomIfNoneMatch, atomAge, atomTransferEncoding, atomETag, atomCacheControl, atomPragma, atomContentRange, atomRange, atomVia, atomVary, atomExpect, atomAuthorization, atomSetCookie, atomCookie, atomCookie2, atomXPolipoDate, atomXPolipoAccess, atomXPolipoLocation, atomXPolipoBodyOffset; AtomPtr atomContentType, atomContentEncoding; int censorReferer = 0; int laxHttpParser = 1; static AtomListPtr censoredHeaders; void preinitHttpParser() { CONFIG_VARIABLE_SETTABLE(censorReferer, CONFIG_TRISTATE, configIntSetter, "Censor referer headers."); censoredHeaders = makeAtomList(NULL, 0); if(censoredHeaders == NULL) { do_log(L_ERROR, "Couldn't allocate censored atoms.\n"); exit(1); } CONFIG_VARIABLE(censoredHeaders, CONFIG_ATOM_LIST_LOWER, "Headers to censor."); CONFIG_VARIABLE_SETTABLE(laxHttpParser, CONFIG_BOOLEAN, configIntSetter, "Ignore unknown HTTP headers."); } void initHttpParser() { #define A(name, value) name = internAtom(value); if(!name) goto fail; /* These must be in lower-case */ A(atomConnection, "connection"); A(atomProxyConnection, "proxy-connection"); A(atomContentLength, "content-length"); A(atomHost, "host"); A(atomAcceptRange, "accept-range"); A(atomTE, "te"); A(atomReferer, "referer"); A(atomProxyAuthenticate, "proxy-authenticate"); A(atomProxyAuthorization, "proxy-authorization"); A(atomKeepAlive, "keep-alive"); A(atomTrailer, "trailer"); A(atomUpgrade, "upgrade"); A(atomDate, "date"); A(atomExpires, "expires"); A(atomIfModifiedSince, "if-modified-since"); A(atomIfUnmodifiedSince, "if-unmodified-since"); A(atomIfRange, "if-range"); A(atomLastModified, "last-modified"); A(atomIfMatch, "if-match"); A(atomIfNoneMatch, "if-none-match"); A(atomAge, "age"); A(atomTransferEncoding, "transfer-encoding"); A(atomETag, "etag"); A(atomCacheControl, "cache-control"); A(atomPragma, "pragma"); A(atomContentRange, "content-range"); A(atomRange, "range"); A(atomVia, "via"); A(atomContentType, "content-type"); A(atomContentEncoding, "content-encoding"); A(atomVary, "vary"); A(atomExpect, "expect"); A(atomAuthorization, "authorization"); A(atomSetCookie, "set-cookie"); A(atomCookie, "cookie"); A(atomCookie2, "cookie2"); A(atomXPolipoDate, "x-polipo-date"); A(atomXPolipoAccess, "x-polipo-access"); A(atomXPolipoLocation, "x-polipo-location"); A(atomXPolipoBodyOffset, "x-polipo-body-offset"); #undef A return; fail: do_log(L_ERROR, "Couldn't allocate atom.\n"); exit(1); } static int getNextWord(const char *restrict buf, int i, int *x_return, int *y_return) { int x, y; while(buf[i] == ' ') i++; if(buf[i] == '\n' || buf[i] == '\r') return -1; x = i; while(buf[i] > 32 && buf[i] < 127) i++; y = i; *x_return = x; *y_return = y; return 0; } static int skipComment(const char *restrict buf, int i) { assert(buf[i] == '('); i++; while(1) { if(buf[i] == '\\' && buf[i + 1] == ')') i+=2; else if(buf[i] == ')') return i + 1; else if(buf[i] == '\n') { if(buf[i + 1] == ' ' || buf[i + 1] == '\t') i += 2; else return -1; } else if(buf[i] == '\r') { if(buf[i + 1] != '\n') return -1; if(buf[i + 2] == ' ' || buf[i + 2] == '\t') i += 3; else return -1; } else { i++; } } return i; } static int skipWhitespace(const char *restrict buf, int i) { while(1) { if(buf[i] == ' ' || buf[i] == '\t') i++; else if(buf[i] == '(') { i = skipComment(buf, i); if(i < 0) return -1; } else if(buf[i] == '\n') { if(buf[i + 1] == ' ' || buf[i + 1] == '\t') i += 2; else return i; } else if(buf[i] == '\r' && buf[i + 1] == '\n') { if(buf[i + 2] == ' ' || buf[i + 2] == '\t') i += 3; else return i; } else return i; } } static int getNextToken(const char *restrict buf, int i, int *x_return, int *y_return) { int x, y; again: while(buf[i] == ' ' || buf[i] == '\t') i++; if(buf[i] == '(') { i++; while(buf[i] != ')') { if(buf[i] == '\n' || buf[i] == '\r') return -1; if(buf[i] == '\\' && buf[i + 1] != '\n' && buf[i + 1] != '\r') buf += 2; else buf++; } goto again; } if(buf[i] == '\n') { if(buf[i + 1] == ' ' || buf[i + 1] == '\t') { i += 2; goto again; } else { return -1; } } if(buf[i] == '\r') { if(buf[i + 1] == '\n' && (buf[i + 2] == ' ' || buf[i + 2] == '\t')) { i += 3; goto again; } else { return -1; } } x = i; while(buf[i] > 32 && buf[i] < 127) { switch(buf[i]) { case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '/': case '[': case ']': case '?': case '=': case '{': case '}': case ' ': case '\t': goto out; default: i++; } } out: y = i; *x_return = x; *y_return = y; return y; } static int getNextETag(const char * restrict buf, int i, int *x_return, int *y_return, int *weak_return) { int weak = 0; int x, y; while(buf[i] == ' ' || buf[i] == '\t') i++; if(buf[i] == 'W' && buf[i + 1] == '/') { weak = 1; i += 2; } if(buf[i] == '"') i++; else return -1; x = i; while(buf[i] != '"') { if(buf[i] == '\r' && buf[i] == '\n') return -1; i++; } y = i; i++; *x_return = x; *y_return = y; *weak_return = weak; return i; } static int getNextTokenInList(const char *restrict buf, int i, int *x_return, int *y_return, int *z_return, int *t_return, int *end_return) { int j, x, y, z = -1, t = -1, end; j = getNextToken(buf, i, &x, &y); if(j < 0) return -1; while(buf[j] == ' ' || buf[j] == '\t') j++; if(buf[j] == '=') { j++; while(buf[j] == ' ' || buf[j] == '\t') j++; z = j; while(buf[j] != ',' && buf[j] != '\n' && buf[j] != '\r') j++; } if(buf[j] == '\n' || buf[j] == '\r') { if(buf[j] == '\r') { if(buf[j + 1] != '\n') return -1; j += 2; } else j++; end = 1; if(buf[j] == ' ' || buf[j] == '\t') { while(buf[j] == ' ' || buf[j] == '\t') j++; end = 0; } } else if(buf[j] == ',') { j++; while(buf[j] == ' ' || buf[j] == '\t') j++; end = 0; } else { return -1; } *x_return = x; *y_return = y; if(z_return) *z_return = z; if(t_return) *t_return = t; *end_return = end; return j; } static inline int token_compare(const char *buf, int start, int end, const char *s) { return (strcasecmp_n(s, buf + start, end - start) == 0); } static int skipEol(const char *restrict buf, int i) { while(buf[i] == ' ') i++; if(buf[i] == '\n') return i + 1; else if(buf[i] == '\r') { if(buf[i + 1] == '\n') return i + 2; else return -1; } else { return -1; } } static int skipToEol(const char *restrict buf, int i, int *start_return) { while(buf[i] != '\n' && buf[i] != '\r') i++; if(buf[i] == '\n') { *start_return = i; return i + 1; } else if(buf[i] == '\r') { if(buf[i + 1] == '\n') { *start_return = i; return i + 2; } else { return -1; } } return -1; } static int getHeaderValue(const char *restrict buf, int start, int *value_start_return, int *value_end_return) { int i, j, k; while(buf[start] == ' ' || buf[start] == '\t') start++; i = start; again: j = skipToEol(buf, i, &k); if(j < 0) return -1; if(buf[j] == ' ' || buf[j] == '\t') { i = j + 1; goto again; } *value_start_return = start; *value_end_return = k; return j; } int httpParseClientFirstLine(const char *restrict buf, int offset, int *method_return, AtomPtr *url_return, int *version_return) { int i = 0; int x, y; int method; AtomPtr url; int version = HTTP_UNKNOWN; int eol; i = offset; i = getNextWord(buf, i, &x, &y); if(i < 0) return -1; if(y == x + 3 && memcmp(buf + x, "GET", 3) == 0) method = METHOD_GET; else if(y == x + 4 && memcmp(buf + x, "HEAD", 4) == 0) method = METHOD_HEAD; else if(y == x + 4 && memcmp(buf + x, "POST", 4) == 0) method = METHOD_POST; else if(y == x + 3 && memcmp(buf + x, "PUT", 3) == 0) method = METHOD_PUT; else if(y == x + 7 && memcmp(buf + x, "CONNECT", 7) == 0) method = METHOD_CONNECT; else method = METHOD_UNKNOWN; i = getNextWord(buf, y + 1, &x, &y); if(i < 0) return -1; url = internAtomN(buf + x, y - x); i = getNextWord(buf, y + 1, &x, &y); if(i < 0) { releaseAtom(url); return -1; } if(y == x + 8) { if(memcmp(buf + x, "HTTP/1.", 7) != 0) version = HTTP_UNKNOWN; else if(buf[x + 7] == '0') version = HTTP_10; else if(buf[x + 7] >= '1' && buf[x + 7] <= '9') version = HTTP_11; else version = HTTP_UNKNOWN; } eol = skipEol(buf, y); if(eol < 0) return -1; *method_return = method; if(url_return) *url_return = url; else releaseAtom(url); *version_return = version; return eol; } int httpParseServerFirstLine(const char *restrict buf, int *status_return, int *version_return, AtomPtr *message_return) { int i = 0; int x, y, eol; int status; int version = HTTP_UNKNOWN; i = getNextWord(buf, 0, &x, &y); if(i < 0) return -1; if(y == x + 8 && memcmp(buf + x, "HTTP/1.0", 8) == 0) version = HTTP_10; else if(y >= x + 8 && memcmp(buf + x, "HTTP/1.", 7) == 0) version = HTTP_11; else version = HTTP_UNKNOWN; i = getNextWord(buf, y + 1, &x, &y); if(i < 0) return -1; if(y == x + 3) status = atol(buf + x); else return -1; i = skipToEol(buf, y, &eol); if(i < 0) return -1; *status_return = status; *version_return = version; if(message_return) { /* Netscape enterprise bug */ if(eol > y) *message_return = internAtomN(buf + y + 1, eol - y - 1); else *message_return = internAtom("No message"); } return i; } static int parseInt(const char *restrict buf, int start, int *val_return) { int i = start, val = 0; if(!digit(buf[i])) return -1; while(digit(buf[i])) { val = val * 10 + (buf[i] - '0'); i++; } *val_return = val; return i; } /* Returned *name_start_return is -1 at end of headers, -2 if the line couldn't be parsed. */ static int parseHeaderLine(const char *restrict buf, int start, int *name_start_return, int *name_end_return, int *value_start_return, int *value_end_return) { int i; int name_start, name_end, value_start, value_end; if(buf[start] == '\n') { *name_start_return = -1; return start + 1; } if(buf[start] == '\r' && buf[start + 1] == '\n') { *name_start_return = -1; return start + 2; } i = getNextToken(buf, start, &name_start, &name_end); if(i < 0 || buf[i] != ':') goto syntax; i++; while(buf[i] == ' ' || buf[i] == '\t') i++; i = getHeaderValue(buf, i, &value_start, &value_end); if(i < 0) goto syntax; *name_start_return = name_start; *name_end_return = name_end; *value_start_return = value_start; *value_end_return = value_end; return i; syntax: i = start; while(1) { if(buf[i] == '\n') { i++; break; } if(buf[i] == '\r' && buf[i + 1] == '\n') { i += 2; break; } i++; } *name_start_return = -2; return i; } int findEndOfHeaders(const char *restrict buf, int from, int to, int *body_return) { int i = from; int eol = 0; while(i < to) { if(buf[i] == '\n') { if(eol) { *body_return = i + 1; return eol; } eol = i; i++; } else if(buf[i] == '\r') { if(i < to - 1 && buf[i + 1] == '\n') { if(eol) { *body_return = eol; return i + 2; } eol = i; i += 2; } else { eol = 0; i++; } } else { eol = 0; i++; } } return -1; } static int parseContentRange(const char *restrict buf, int i, int *from_return, int *to_return, int *full_len_return) { int j; int from, to, full_len; i = skipWhitespace(buf, i); if(i < 0) return -1; if(!token_compare(buf, i, i + 5, "bytes")) return -1; i += 5; i = skipWhitespace(buf, i); if(buf[i] == '*') { from = 0; to = -1; i++; } else { i = parseInt(buf, i, &from); if(i < 0) return -1; if(buf[i] != '-') return -1; i++; i = parseInt(buf, i, &to); if(i < 0) return -1; to = to + 1; } if(buf[i] != '/') return -1; i++; if(buf[i] == '*') full_len = -1; else { i = parseInt(buf, i, &full_len); if(i < 0) return -1; } j = skipEol(buf, i); if(j < 0) return -1; *from_return = from; *to_return = to; *full_len_return = full_len; return i; } static int parseRange(const char *restrict buf, int i, int *from_return, int *to_return) { int j; int from, to; i = skipWhitespace(buf, i); if(i < 0) return -1; if(!token_compare(buf, i, i + 6, "bytes=")) return -1; i += 6; i = skipWhitespace(buf, i); if(buf[i] == '-') { from = 0; } else { i = parseInt(buf, i, &from); if(i < 0) return -1; } if(buf[i] != '-') return -1; i++; j = parseInt(buf, i, &to); if(j < 0) to = -1; else { to = to + 1; i = j; } j = skipEol(buf, i); if(j < 0) return -1; *from_return = from; *to_return = to; return i; } static int urlSameHost(const char *url1, int len1, const char *url2, int len2) { int i; if(len1 < 7 || len2 < 7) return 0; if(memcmp(url1 + 4, "://", 3) != 0 || memcmp(url2 + 4, "://", 3) != 0) return 0; i = 7; while(i < len1 && i < len2 && url1[i] != '/' && url2[i] != '/') { if((url1[i] | 0x20) != (url2[i] | 0x20)) break; i++; } if((i == len1 || url1[i] == '/') && ((i == len2 || url2[i] == '/'))) return 1; return 0; } static char * resize_hbuf(char *hbuf, int *size, char *hbuf_small) { int new_size = 2 * *size; char *new_hbuf; if(new_size <= *size) goto fail; if(hbuf == hbuf_small) { new_hbuf = malloc(new_size); if(new_hbuf == NULL) goto fail; memcpy(new_hbuf, hbuf, *size); } else { new_hbuf = realloc(hbuf, new_size); if(new_hbuf == NULL) goto fail; } *size = new_size; return new_hbuf; fail: if(hbuf != hbuf_small) free(hbuf); *size = 0; return NULL; } int httpParseHeaders(int client, AtomPtr url, const char *buf, int start, HTTPRequestPtr request, AtomPtr *headers_return, int *len_return, CacheControlPtr cache_control_return, HTTPConditionPtr *condition_return, int *te_return, time_t *date_return, time_t *last_modified_return, time_t *expires_return, time_t *polipo_age_return, time_t *polipo_access_return, int *polipo_body_offset_return, int *age_return, char **etag_return, AtomPtr *expect_return, HTTPRangePtr range_return, HTTPRangePtr content_range_return, char **location_return, AtomPtr *via_return, AtomPtr *auth_return) { int local = url ? urlIsLocal(url->string, url->length) : 0; char hbuf_small[512]; char *hbuf = hbuf_small; int hbuf_size = 512, hbuf_length = 0; int i, j, name_start, name_end, value_start, value_end, token_start, token_end, end; AtomPtr name = NULL; time_t date = -1, last_modified = -1, expires = -1, polipo_age = -1, polipo_access = -1, polipo_body_offset = -1; int len = -1; CacheControlRec cache_control; char *endptr; int te = TE_IDENTITY; int age = -1; char *etag = NULL, *ifrange = NULL; int persistent = (!request || (request->connection->version != HTTP_10)); char *location = NULL; AtomPtr via = NULL; AtomPtr auth = NULL; AtomPtr expect = NULL; HTTPConditionPtr condition; time_t ims = -1, inms = -1; char *im = NULL, *inm = NULL; AtomListPtr hopToHop = NULL; HTTPRangeRec range = {-1, -1, -1}, content_range = {-1, -1, -1}; int haveCacheControl = 0; #define RESIZE_HBUF() \ do { \ hbuf = resize_hbuf(hbuf, &hbuf_size, hbuf_small); \ if(hbuf == NULL) \ goto fail; \ } while(0) cache_control.flags = 0; cache_control.max_age = -1; cache_control.s_maxage = -1; cache_control.min_fresh = -1; cache_control.max_stale = -1; i = start; while(1) { i = parseHeaderLine(buf, i, &name_start, &name_end, &value_start, &value_end); if(i < 0) { do_log(L_ERROR, "Couldn't find end of header line.\n"); goto fail; } if(name_start == -1) break; if(name_start < 0) continue; name = internAtomLowerN(buf + name_start, name_end - name_start); if(name == atomConnection) { j = getNextTokenInList(buf, value_start, &token_start, &token_end, NULL, NULL, &end); while(1) { if(j < 0) { do_log(L_ERROR, "Couldn't parse Connection: "); do_log_n(L_ERROR, buf + value_start, value_end - value_start); do_log(L_ERROR, ".\n"); goto fail; } if(token_compare(buf, token_start, token_end, "close")) { persistent = 0; } else if(token_compare(buf, token_start, token_end, "keep-alive")) { persistent = 1; } else { if(hopToHop == NULL) hopToHop = makeAtomList(NULL, 0); if(hopToHop == NULL) { do_log(L_ERROR, "Couldn't allocate atom list.\n"); goto fail; } atomListCons(internAtomLowerN(buf + token_start, token_end - token_start), hopToHop); } if(end) break; j = getNextTokenInList(buf, j, &token_start, &token_end, NULL, NULL, &end); } } else if(name == atomCacheControl) haveCacheControl = 1; releaseAtom(name); name = NULL; } i = start; while(1) { i = parseHeaderLine(buf, i, &name_start, &name_end, &value_start, &value_end); if(i < 0) { do_log(L_ERROR, "Couldn't find end of header line.\n"); goto fail; } if(name_start == -1) break; if(name_start < 0) { do_log(L_WARN, "Couldn't parse header line.\n"); if(laxHttpParser) continue; else goto fail; } name = internAtomLowerN(buf + name_start, name_end - name_start); if(name == atomProxyConnection) { j = getNextTokenInList(buf, value_start, &token_start, &token_end, NULL, NULL, &end); while(1) { if(j < 0) { do_log(L_WARN, "Couldn't parse Proxy-Connection:"); do_log_n(L_WARN, buf + value_start, value_end - value_start); do_log(L_WARN, ".\n"); persistent = 0; break; } if(token_compare(buf, token_start, token_end, "close")) { persistent = 0; } else if(token_compare(buf, token_start, token_end, "keep-alive")) { persistent = 1; } if(end) break; j = getNextTokenInList(buf, j, &token_start, &token_end, NULL, NULL, &end); } } else if(name == atomContentLength) { j = skipWhitespace(buf, value_start); if(j < 0) { do_log(L_WARN, "Couldn't parse Content-Length: \n"); do_log_n(L_WARN, buf + value_start, value_end - value_start); do_log(L_WARN, ".\n"); len = -1; } else { len = strtol(buf + value_start, &endptr, 10); if(endptr <= buf + value_start) { do_log(L_WARN, "Couldn't parse Content-Length: \n"); do_log_n(L_WARN, buf + value_start, value_end - value_start); do_log(L_WARN, ".\n"); len = -1; } } } else if((!local && name == atomProxyAuthorization) || (local && name == atomAuthorization)) { if(auth_return) { auth = internAtomN(buf + value_start, value_end - value_start); if(auth == NULL) { do_log(L_ERROR, "Couldn't allocate authorization.\n"); goto fail; } } } else if(name == atomReferer) { int h; if(censorReferer == 0 || (censorReferer == 1 && url != NULL && urlSameHost(url->string, url->length, buf + value_start, value_end - value_start))) { while(hbuf_length > hbuf_size - 2) RESIZE_HBUF(); hbuf[hbuf_length++] = '\r'; hbuf[hbuf_length++] = '\n'; do { h = snnprint_n(hbuf, hbuf_length, hbuf_size, buf + name_start, value_end - name_start); if(h < 0) RESIZE_HBUF(); } while(h < 0); hbuf_length = h; } } else if(name == atomTrailer || name == atomUpgrade) { do_log(L_ERROR, "Trailers or upgrade present.\n"); goto fail; } else if(name == atomDate || name == atomExpires || name == atomIfModifiedSince || name == atomIfUnmodifiedSince || name == atomLastModified || name == atomXPolipoDate || name == atomXPolipoAccess) { time_t t; j = parse_time(buf, value_start, value_end, &t); if(j < 0) { if(name != atomExpires) { do_log(L_WARN, "Couldn't parse %s: ", name->string); do_log_n(L_WARN, buf + value_start, value_end - value_start); do_log(L_WARN, "\n"); } t = -1; } if(name == atomDate) { if(t >= 0) date = t; } else if(name == atomExpires) { if(t >= 0) expires = t; else expires = 0; } else if(name == atomLastModified) last_modified = t; else if(name == atomIfModifiedSince) ims = t; else if(name == atomIfUnmodifiedSince) inms = t; else if(name == atomXPolipoDate) polipo_age = t; else if(name == atomXPolipoAccess) polipo_access = t; } else if(name == atomAge) { j = skipWhitespace(buf, value_start); if(j < 0) { age = -1; } else { age = strtol(buf + value_start, &endptr, 10); if(endptr <= buf + value_start) age = -1; } if(age < 0) { do_log(L_WARN, "Couldn't parse age: \n"); do_log_n(L_WARN, buf + value_start, value_end - value_start); do_log(L_WARN, " -- ignored.\n"); } } else if(name == atomXPolipoBodyOffset) { j = skipWhitespace(buf, value_start); if(j < 0) { do_log(L_ERROR, "Couldn't parse body offset.\n"); goto fail; } else { polipo_body_offset = strtol(buf + value_start, &endptr, 10); if(endptr <= buf + value_start) { do_log(L_ERROR, "Couldn't parse body offset.\n"); goto fail; } } } else if(name == atomTransferEncoding) { if(token_compare(buf, value_start, value_end, "identity")) te = TE_IDENTITY; else if(token_compare(buf, value_start, value_end, "chunked")) te = TE_CHUNKED; else te = TE_UNKNOWN; } else if(name == atomETag || name == atomIfNoneMatch || name == atomIfMatch || name == atomIfRange) { int x, y; int weak; char *e; j = getNextETag(buf, value_start, &x, &y, &weak); if(j < 0) { if(buf[value_start] != '\r' && buf[value_start] != '\n') do_log(L_ERROR, "Couldn't parse ETag.\n"); } else if(weak) { do_log(L_WARN, "Server returned weak ETag -- ignored.\n"); } else { e = strdup_n(buf + x, y - x); if(e == NULL) goto fail; if(name == atomETag) { if(!etag) etag = e; else free(e); } else if(name == atomIfNoneMatch) { if(!inm) inm = e; else free(e); } else if(name == atomIfMatch) { if(!im) im = e; else free(e); } else if(name == atomIfRange) { if(!ifrange) ifrange = e; else free(e); } else { abort(); } } } else if(name == atomCacheControl) { int v_start, v_end; j = getNextTokenInList(buf, value_start, &token_start, &token_end, &v_start, &v_end, &end); while(1) { if(j < 0) { do_log(L_WARN, "Couldn't parse Cache-Control.\n"); cache_control.flags |= CACHE_NO; break; } if(token_compare(buf, token_start, token_end, "no-cache")) { cache_control.flags |= CACHE_NO; } else if(token_compare(buf, token_start, token_end, "public")) { cache_control.flags |= CACHE_PUBLIC; } else if(token_compare(buf, token_start, token_end, "private")) { cache_control.flags |= CACHE_PRIVATE; } else if(token_compare(buf, token_start, token_end, "no-store")) { cache_control.flags |= CACHE_NO_STORE; } else if(token_compare(buf, token_start, token_end, "no-transform")) { cache_control.flags |= CACHE_NO_TRANSFORM; } else if(token_compare(buf, token_start, token_end, "must-revalidate") || token_compare(buf, token_start, token_end, "must-validate")) { /* losers */ cache_control.flags |= CACHE_MUST_REVALIDATE; } else if(token_compare(buf, token_start, token_end, "proxy-revalidate")) { cache_control.flags |= CACHE_PROXY_REVALIDATE; } else if(token_compare(buf, token_start, token_end, "only-if-cached")) { cache_control.flags |= CACHE_ONLY_IF_CACHED; } else if(token_compare(buf, token_start, token_end, "max-age") || token_compare(buf, token_start, token_end, "maxage")) { /* losers */ int a; if(v_start <= 0 || !digit(buf[v_start])) { do_log(L_WARN, "Couldn't parse Cache-Control: "); do_log_n(L_WARN, buf + token_start, (v_end >= 0 ? v_end : token_end) - token_start); do_log(L_WARN, "\n"); } else { a = atoi(buf + v_start); cache_control.max_age = a; } } else if(token_compare(buf, token_start, token_end, "s-maxage")) { int a; if(v_start <= 0 || !digit(buf[v_start])) { do_log(L_WARN, "Couldn't parse Cache-Control: "); do_log_n(L_WARN, buf + token_start, (v_end >= 0 ? v_end : token_end) - token_start); do_log(L_WARN, "\n"); } else { a = atoi(buf + v_start); cache_control.max_age = a; } } else if(token_compare(buf, token_start, token_end, "min-fresh")) { int a; if(v_start <= 0 || !digit(buf[v_start])) { do_log(L_WARN, "Couldn't parse Cache-Control: "); do_log_n(L_WARN, buf + token_start, (v_end >= 0 ? v_end : token_end) - token_start); do_log(L_WARN, "\n"); } else { a = atoi(buf + v_start); cache_control.max_age = a; } } else if(token_compare(buf, token_start, token_end, "max-stale")) { int a; if(v_start <= 0 || !digit(buf[v_start])) { do_log(L_WARN, "Couldn't parse Cache-Control: "); do_log_n(L_WARN, buf + token_start, (v_end >= 0 ? v_end : token_end) - token_start); do_log(L_WARN, "\n"); } else { a = atoi(buf + v_start); cache_control.max_stale = a; } } else { do_log(L_WARN, "Unsupported Cache-Control directive "); do_log_n(L_WARN, buf + token_start, (v_end >= 0 ? v_end : token_end) - token_start); do_log(L_WARN, " -- ignored.\n"); } if(end) break; j = getNextTokenInList(buf, j, &token_start, &token_end, &v_start, &v_end, &end); } } else if(name == atomContentRange) { if(!client) { j = parseContentRange(buf, value_start, &content_range.from, &content_range.to, &content_range.full_length); if(j < 0) { do_log(L_ERROR, "Couldn't parse Content-Range: "); do_log_n(L_ERROR, buf + value_start, value_end - value_start); do_log(L_ERROR, "\n"); goto fail; } } else { do_log(L_ERROR, "Content-Range from client.\n"); goto fail; } } else if(name == atomRange) { if(client) { j = parseRange(buf, value_start, &range.from, &range.to); if(j < 0) { do_log(L_WARN, "Couldn't parse Range -- ignored.\n"); range.from = -1; range.to = -1; } } else { do_log(L_WARN, "Range from server -- ignored\n"); } } else if(name == atomXPolipoLocation) { if(location_return) { location = strdup_n(buf + value_start, value_end - value_start); if(location == NULL) { do_log(L_ERROR, "Couldn't allocate location.\n"); goto fail; } } } else if(name == atomVia) { if(via_return) { AtomPtr new_via, full_via; new_via = internAtomN(buf + value_start, value_end - value_start); if(new_via == NULL) { do_log(L_ERROR, "Couldn't allocate via.\n"); goto fail; } if(via) { full_via = internAtomF("%s, %s", via->string, new_via->string); releaseAtom(new_via); if(full_via == NULL) { do_log(L_ERROR, "Couldn't allocate via"); goto fail; } releaseAtom(via); via = full_via; } else { via = new_via; } } } else if(name == atomExpect) { if(expect_return) { expect = internAtomLowerN(buf + value_start, value_end - value_start); if(expect == NULL) { do_log(L_ERROR, "Couldn't allocate expect.\n"); goto fail; } } } else { if(!client && name == atomContentType) { if(token_compare(buf, value_start, value_end, "multipart/byteranges")) { do_log(L_ERROR, "Server returned multipart/byteranges -- yuck!\n"); goto fail; } } if(name == atomVary) { if(!token_compare(buf, value_start, value_end, "host") && !token_compare(buf, value_start, value_end, "*")) { /* What other vary headers should be ignored? */ do_log(L_VARY, "Vary header present ("); do_log_n(L_VARY, buf + value_start, value_end - value_start); do_log(L_VARY, ").\n"); } cache_control.flags |= CACHE_VARY; } else if(name == atomAuthorization) { cache_control.flags |= CACHE_AUTHORIZATION; } if(name == atomPragma) { /* Pragma is only defined for the client, and the only standard value is no-cache (RFC 1945, 10.12). However, we honour a Pragma: no-cache for both the client and the server when there's no Cache-Control header. In all cases, we pass the Pragma header to the next hop. */ if(!haveCacheControl) { j = getNextTokenInList(buf, value_start, &token_start, &token_end, NULL, NULL, &end); while(1) { if(j < 0) { do_log(L_WARN, "Couldn't parse Pragma.\n"); cache_control.flags |= CACHE_NO; break; } if(token_compare(buf, token_start, token_end, "no-cache")) cache_control.flags = CACHE_NO; if(end) break; j = getNextTokenInList(buf, j, &token_start, &token_end, NULL, NULL, &end); } } } if(!client && (name == atomSetCookie || name == atomCookie || name == atomCookie2)) cache_control.flags |= CACHE_COOKIE; if(hbuf) { if(name != atomConnection && name != atomHost && name != atomAcceptRange && name != atomTE && name != atomProxyAuthenticate && name != atomKeepAlive && (!hopToHop || !atomListMember(name, hopToHop)) && !atomListMember(name, censoredHeaders)) { int h; while(hbuf_length > hbuf_size - 2) RESIZE_HBUF(); hbuf[hbuf_length++] = '\r'; hbuf[hbuf_length++] = '\n'; do { h = snnprint_n(hbuf, hbuf_length, hbuf_size, buf + name_start, value_end - name_start); if(h < 0) RESIZE_HBUF(); } while(h < 0); hbuf_length = h; } } } releaseAtom(name); name = NULL; } if(headers_return) { AtomPtr pheaders = NULL; pheaders = internAtomN(hbuf, hbuf_length); if(!pheaders) goto fail; *headers_return = pheaders; } if(hbuf != hbuf_small) free(hbuf); hbuf = NULL; hbuf_size = 0; if(request) if(!persistent) request->flags &= ~REQUEST_PERSISTENT; if(te != TE_IDENTITY) len = -1; if(len_return) *len_return = len; if(cache_control_return) *cache_control_return = cache_control; if(condition_return) { if(ims >= 0 || inms >= 0 || im || inm || ifrange) { condition = httpMakeCondition(); if(condition) { condition->ims = ims; condition->inms = inms; condition->im = im; condition->inm = inm; condition->ifrange = ifrange; } else { do_log(L_ERROR, "Couldn't allocate condition.\n"); if(im) free(im); if(inm) free(inm); } } else { condition = NULL; } *condition_return = condition; } else { assert(!im && !inm); } if(te_return) *te_return = te; if(date_return) *date_return = date; if(last_modified_return) *last_modified_return = last_modified; if(expires_return) *expires_return = expires; if(polipo_age_return) *polipo_age_return = polipo_age; if(polipo_access_return) *polipo_access_return = polipo_access; if(polipo_body_offset_return) *polipo_body_offset_return = polipo_body_offset; if(age_return) *age_return = age; if(etag_return) *etag_return = etag; else { if(etag) free(etag); } if(range_return) *range_return = range; if(content_range_return) *content_range_return = content_range; if(location_return) { *location_return = location; } else { if(location) free(location); } if(via_return) *via_return = via; else { if(via) releaseAtom(via); } if(expect_return) *expect_return = expect; else { if(expect) releaseAtom(expect); } if(auth_return) *auth_return = auth; else { if(auth) releaseAtom(auth); } if(hopToHop) destroyAtomList(hopToHop); return i; fail: if(hbuf && hbuf != hbuf_small) free(hbuf); if(name) releaseAtom(name); if(etag) free(etag); if(location) free(location); if(via) releaseAtom(via); if(expect) releaseAtom(expect); if(auth) releaseAtom(auth); if(hopToHop) destroyAtomList(hopToHop); return -1; #undef RESIZE_HBUF } int httpFindHeader(AtomPtr header, const char *headers, int hlen, int *value_begin_return, int *value_end_return) { int len = header->length; int i = 0; while(i + len + 1 < hlen) { if(headers[i + len] == ':' && lwrcmp(headers + i, header->string, len) == 0) { int j = i + len + 1, k; while(j < hlen && headers[j] == ' ') j++; k = j; while(k < hlen && headers[k] != '\n' && headers[k] != '\r') k++; *value_begin_return = j; *value_end_return = k; return 1; } else { while(i < hlen && headers[i] != '\n' && headers[i] != '\r') i++; i++; if(i < hlen && headers[i] == '\n') i++; } } return 0; } int parseUrl(const char *url, int len, int *x_return, int *y_return, int *port_return, int *z_return) { int x, y, z, port = -1, i = 0; if(len >= 7 && lwrcmp(url, "http://", 7) == 0) { x = 7; if(x < len && url[x] == '[') { /* RFC 2732 */ for(i = x + 1; i < len; i++) { if(url[i] == ']') { i++; break; } if((url[i] != ':') && !letter(url[i]) && !digit(url[i])) break; } } else { for(i = x; i < len; i++) if(url[i] == ':' || url[i] == '/') break; } y = i; if(i < len && url[i] == ':') { int j; j = atoi_n(url, i + 1, len, &port); if(j < 0) { port = 80; } else { i = j; } } else { port = 80; } } else { x = -1; y = -1; } z = i; *x_return = x; *y_return = y; *port_return = port; *z_return = z; return 0; } int urlIsLocal(const char *url, int len) { return (len > 0 && url[0] == '/'); } int urlIsSpecial(const char *url, int len) { return (len >= 8 && memcmp(url, "/polipo/", 8) == 0); } int parseChunkSize(const char *restrict buf, int i, int end, int *chunk_size_return) { int v, d; v = h2i(buf[i]); if(v < 0) return -1; i++; while(i < end) { d = h2i(buf[i]); if(d < 0) break; v = v * 16 + d; i++; } while(i < end) { if(buf[i] == ' ' || buf[i] == '\t') i++; else break; } if(i >= end - 1) return 0; if(buf[i] != '\r' || buf[i + 1] != '\n') return -1; i += 2; if(v == 0) { if(i >= end - 1) return 0; if(buf[i] != '\r') { do_log(L_ERROR, "Trailers present!\n"); return -1; } i++; if(buf[i] != '\n') return -1; i++; } *chunk_size_return = v; return i; } int checkVia(AtomPtr name, AtomPtr via) { int i; char *v; if(via == NULL || via->length == 0) return 1; v = via->string; i = 0; while(i < via->length) { while(v[i] == ' ' || v[i] == '\t' || v[i] == ',' || v[i] == '\r' || v[i] == '\n' || digit(v[i]) || v[i] == '.') i++; if(i + name->length > via->length) break; if(memcmp(v + i, name->string, name->length) == 0) { char c = v[i + name->length]; if(c == '\0' || c == ' ' || c == '\t' || c == ',' || c == '\r' || c == '\n') return 0; } i++; while(letter(v[i]) || digit(v[i]) || v[i] == '.') i++; } return 1; } polipo-1.0.4.1/http.h0000644000175000017500000001265111331407220013615 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ typedef struct _HTTPCondition { time_t ims; time_t inms; char *im; char *inm; char *ifrange; } HTTPConditionRec, *HTTPConditionPtr; typedef struct _HTTPRequest { int flags; struct _HTTPConnection *connection; ObjectPtr object; int method; int from; int to; CacheControlRec cache_control; HTTPConditionPtr condition; AtomPtr via; struct _ConditionHandler *chandler; ObjectPtr can_mutate; int error_code; struct _Atom *error_message; struct _Atom *error_headers; AtomPtr headers; struct timeval time0, time1; struct _HTTPRequest *request; struct _HTTPRequest *next; } HTTPRequestRec, *HTTPRequestPtr; /* request->flags */ #define REQUEST_PERSISTENT 1 #define REQUEST_REQUESTED 2 #define REQUEST_WAIT_CONTINUE 4 #define REQUEST_FORCE_ERROR 8 #define REQUEST_PIPELINED 16 typedef struct _HTTPConnection { int flags; int fd; char *buf; int len; int offset; HTTPRequestPtr request; HTTPRequestPtr request_last; int serviced; int version; int time; TimeEventHandlerPtr timeout; int te; char *reqbuf; int reqlen; int reqbegin; int reqoffset; int bodylen; int reqte; /* For server connections */ int chunk_remaining; struct _HTTPServer *server; int pipelined; int connecting; } HTTPConnectionRec, *HTTPConnectionPtr; /* connection->flags */ #define CONN_READER 1 #define CONN_WRITER 2 #define CONN_SIDE_READER 4 #define CONN_BIGBUF 8 #define CONN_BIGREQBUF 16 /* request->method */ #define METHOD_UNKNOWN -1 #define METHOD_NONE -1 #define METHOD_GET 0 #define METHOD_HEAD 1 #define METHOD_CONDITIONAL_GET 2 #define METHOD_CONNECT 3 #define METHOD_POST 4 #define METHOD_PUT 5 #define REQUEST_SIDE(request) ((request)->method >= METHOD_POST) /* server->version */ #define HTTP_10 0 #define HTTP_11 1 #define HTTP_UNKNOWN -1 /* connection->te */ #define TE_IDENTITY 0 #define TE_CHUNKED 1 #define TE_UNKNOWN -1 /* connection->connecting */ #define CONNECTING_DNS 1 #define CONNECTING_CONNECT 2 #define CONNECTING_SOCKS 3 /* the results of a conditional request. 200, 304 and 412. */ #define CONDITION_MATCH 0 #define CONDITION_NOT_MODIFIED 1 #define CONDITION_FAILED 2 extern int disableProxy; extern AtomPtr proxyName; extern int proxyPort; extern int clientTimeout, serverTimeout, serverIdleTimeout; extern int bigBufferSize; extern AtomPtr proxyAddress; extern int proxyOffline; extern int relaxTransparency; extern AtomPtr authRealm; extern AtomPtr authCredentials; extern AtomPtr parentAuthCredentials; extern AtomListPtr allowedClients; extern NetAddressPtr allowedNets; extern IntListPtr allowedPorts; extern IntListPtr tunnelAllowedPorts; extern int expectContinue; extern AtomPtr atom100Continue; extern int disableVia; extern int dontTrustVaryETag; void preinitHttp(void); void initHttp(void); int httpTimeoutHandler(TimeEventHandlerPtr); int httpSetTimeout(HTTPConnectionPtr connection, int secs); int httpWriteObjectHeaders(char *buf, int offset, int len, ObjectPtr object, int from, int to); int httpPrintCacheControl(char*, int, int, int, CacheControlPtr); char *httpMessage(int) ATTRIBUTE((pure)); int htmlString(char *buf, int n, int len, char *s, int slen); void htmlPrint(FILE *out, char *s, int slen); HTTPConnectionPtr httpMakeConnection(void); void httpDestroyConnection(HTTPConnectionPtr connection); void httpConnectionDestroyBuf(HTTPConnectionPtr connection); void httpConnectionDestroyReqbuf(HTTPConnectionPtr connection); HTTPRequestPtr httpMakeRequest(void); void httpDestroyRequest(HTTPRequestPtr request); void httpQueueRequest(HTTPConnectionPtr, HTTPRequestPtr); HTTPRequestPtr httpDequeueRequest(HTTPConnectionPtr connection); int httpConnectionBigify(HTTPConnectionPtr); int httpConnectionBigifyReqbuf(HTTPConnectionPtr); int httpConnectionUnbigify(HTTPConnectionPtr); int httpConnectionUnbigifyReqbuf(HTTPConnectionPtr); HTTPConditionPtr httpMakeCondition(void); void httpDestroyCondition(HTTPConditionPtr condition); int httpCondition(ObjectPtr, HTTPConditionPtr); int httpWriteErrorHeaders(char *buf, int size, int offset, int do_body, int code, AtomPtr message, int close, AtomPtr, char *url, int url_len, char *etag); AtomListPtr urlDecode(char*, int); void httpTweakCachability(ObjectPtr); int httpHeaderMatch(AtomPtr header, AtomPtr headers1, AtomPtr headers2); polipo-1.0.4.1/http.c0000644000175000017500000007403011331407220013607 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" int disableProxy = 0; AtomPtr proxyName = NULL; int proxyPort = 8123; int clientTimeout = 120; int serverTimeout = 90; int serverIdleTimeout = 45; int bigBufferSize = (32 * 1024); AtomPtr authRealm = NULL; AtomPtr authCredentials = NULL; AtomPtr parentAuthCredentials = NULL; AtomListPtr allowedClients = NULL; NetAddressPtr allowedNets = NULL; IntListPtr allowedPorts = NULL; IntListPtr tunnelAllowedPorts = NULL; int expectContinue = 1; int dontTrustVaryETag = 1; AtomPtr atom100Continue; int disableVia = 1; /* 0 means that all failures lead to errors. 1 means that failures to connect are reported in a Warning header when stale objects are served. 2 means that only missing data is fetched from the net, stale data is served without revalidation (browser-side Cache-Control directives are still honoured). 3 means that no connections are ever attempted. */ int proxyOffline = 0; int relaxTransparency = 0; AtomPtr proxyAddress = NULL; static int timeoutSetter(ConfigVariablePtr var, void *value); void preinitHttp() { proxyAddress = internAtom("127.0.0.1"); CONFIG_VARIABLE_SETTABLE(disableProxy, CONFIG_BOOLEAN, configIntSetter, "Whether to be a web server only."); CONFIG_VARIABLE_SETTABLE(proxyOffline, CONFIG_BOOLEAN, configIntSetter, "Avoid contacting remote servers."); CONFIG_VARIABLE_SETTABLE(relaxTransparency, CONFIG_TRISTATE, configIntSetter, "Avoid contacting remote servers."); CONFIG_VARIABLE(proxyPort, CONFIG_INT, "The TCP port on which the proxy listens."); CONFIG_VARIABLE(proxyAddress, CONFIG_ATOM_LOWER, "The IP address on which the proxy listens."); CONFIG_VARIABLE_SETTABLE(proxyName, CONFIG_ATOM_LOWER, configAtomSetter, "The name by which the proxy is known."); CONFIG_VARIABLE_SETTABLE(clientTimeout, CONFIG_TIME, timeoutSetter, "Client-side timeout."); CONFIG_VARIABLE_SETTABLE(serverTimeout, CONFIG_TIME, timeoutSetter, "Server-side timeout."); CONFIG_VARIABLE_SETTABLE(serverIdleTimeout, CONFIG_TIME, timeoutSetter, "Server-side idle timeout."); CONFIG_VARIABLE(authRealm, CONFIG_ATOM, "Authentication realm."); CONFIG_VARIABLE(authCredentials, CONFIG_PASSWORD, "username:password."); CONFIG_VARIABLE(parentAuthCredentials, CONFIG_PASSWORD, "username:password."); CONFIG_VARIABLE(allowedClients, CONFIG_ATOM_LIST_LOWER, "Networks from which clients are allowed to connect."); CONFIG_VARIABLE(tunnelAllowedPorts, CONFIG_INT_LIST, "Ports to which tunnelled connections are allowed."); CONFIG_VARIABLE(allowedPorts, CONFIG_INT_LIST, "Ports to which connections are allowed."); CONFIG_VARIABLE(expectContinue, CONFIG_TRISTATE, "Send Expect-Continue to servers."); CONFIG_VARIABLE(bigBufferSize, CONFIG_INT, "Size of big buffers (max size of headers)."); CONFIG_VARIABLE_SETTABLE(disableVia, CONFIG_BOOLEAN, configIntSetter, "Don't use Via headers."); CONFIG_VARIABLE(dontTrustVaryETag, CONFIG_TRISTATE, "Whether to trust the ETag when there's Vary."); preinitHttpParser(); } static int timeoutSetter(ConfigVariablePtr var, void *value) { configIntSetter(var, value); if(clientTimeout <= serverTimeout) clientTimeout = serverTimeout + 1; return 1; } void initHttp() { char *buf = NULL; int namelen; int n; struct hostent *host; initHttpParser(); atom100Continue = internAtom("100-continue"); if(clientTimeout <= serverTimeout) { clientTimeout = serverTimeout + 1; do_log(L_WARN, "Value of clientTimeout too small -- setting to %d.\n", clientTimeout); } if(authCredentials != NULL && authRealm == NULL) authRealm = internAtom("Polipo"); if(allowedClients) { allowedNets = parseNetAddress(allowedClients); if(allowedNets == NULL) exit(1); } if(allowedPorts == NULL) { allowedPorts = makeIntList(0); if(allowedPorts == NULL) { do_log(L_ERROR, "Couldn't allocate allowedPorts.\n"); exit(1); } intListCons(80, 100, allowedPorts); intListCons(1024, 0xFFFF, allowedPorts); } if(tunnelAllowedPorts == NULL) { tunnelAllowedPorts = makeIntList(0); if(tunnelAllowedPorts == NULL) { do_log(L_ERROR, "Couldn't allocate tunnelAllowedPorts.\n"); exit(1); } intListCons(22, 22, tunnelAllowedPorts); /* ssh */ intListCons(80, 80, tunnelAllowedPorts); /* HTTP */ intListCons(109, 110, tunnelAllowedPorts); /* POP 2 and 3*/ intListCons(143, 143, tunnelAllowedPorts); /* IMAP 2/4 */ intListCons(443, 443, tunnelAllowedPorts); /* HTTP/SSL */ intListCons(873, 873, tunnelAllowedPorts); /* rsync */ intListCons(993, 993, tunnelAllowedPorts); /* IMAP/SSL */ intListCons(995, 995, tunnelAllowedPorts); /* POP/SSL */ intListCons(2401, 2401, tunnelAllowedPorts); /* CVS */ intListCons(5222, 5223, tunnelAllowedPorts); /* Jabber */ intListCons(9418, 9418, tunnelAllowedPorts); /* Git */ } if(proxyName) return; buf = get_chunk(); if(buf == NULL) { do_log(L_ERROR, "Couldn't allocate chunk for host name.\n"); goto fail; } n = gethostname(buf, CHUNK_SIZE); if(n != 0) { do_log_error(L_WARN, errno, "Gethostname"); strcpy(buf, "polipo"); goto success; } /* gethostname doesn't necessarily NUL-terminate on overflow */ buf[CHUNK_SIZE - 1] = '\0'; if(strcmp(buf, "(none)") == 0 || strcmp(buf, "localhost") == 0 || strcmp(buf, "localhost.localdomain") == 0) { do_log(L_WARN, "Couldn't determine host name -- using ``polipo''.\n"); strcpy(buf, "polipo"); goto success; } if(strchr(buf, '.') != NULL) goto success; host = gethostbyname(buf); if(host == NULL) { goto success; } if(host->h_addrtype != AF_INET) goto success; host = gethostbyaddr(host->h_addr_list[0], host->h_length, AF_INET); if(!host || !host->h_name || strcmp(host->h_name, "localhost") == 0 || strcmp(host->h_name, "localhost.localdomain") == 0) goto success; namelen = strlen(host->h_name); if(namelen >= CHUNK_SIZE) { do_log(L_ERROR, "Host name too long.\n"); goto success; } memcpy(buf, host->h_name, namelen + 1); success: proxyName = internAtom(buf); if(proxyName == NULL) { do_log(L_ERROR, "Couldn't allocate proxy name.\n"); goto fail; } dispose_chunk(buf); return; fail: if(buf) dispose_chunk(buf); exit(1); return; } int httpSetTimeout(HTTPConnectionPtr connection, int secs) { TimeEventHandlerPtr new; if(connection->timeout) cancelTimeEvent(connection->timeout); connection->timeout = NULL; if(secs > 0) { new = scheduleTimeEvent(secs, httpTimeoutHandler, sizeof(connection), &connection); if(!new) { do_log(L_ERROR, "Couldn't schedule timeout for connection 0x%lx\n", (unsigned long)connection); return -1; } } else { new = NULL; } connection->timeout = new; return 1; } int httpTimeoutHandler(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; if(connection->fd >= 0) { int rc; rc = shutdown(connection->fd, 2); if(rc < 0 && errno != ENOTCONN) do_log_error(L_ERROR, errno, "Timeout: shutdown failed"); pokeFdEvent(connection->fd, -EDOTIMEOUT, POLLIN | POLLOUT); } connection->timeout = NULL; return 1; } int httpWriteObjectHeaders(char *buf, int offset, int len, ObjectPtr object, int from, int to) { int n = offset; if(from <= 0 && to < 0) { if(object->length >= 0) { n = snnprintf(buf, n, len, "\r\nContent-Length: %d", object->length); } } else { if(to >= 0) { n = snnprintf(buf, n, len, "\r\nContent-Length: %d", to - from); } } if(from > 0 || to > 0) { if(object->length >= 0) { if(from >= to) { n = snnprintf(buf, n, len, "\r\nContent-Range: bytes */%d", object->length); } else { n = snnprintf(buf, n, len, "\r\nContent-Range: bytes %d-%d/%d", from, to - 1, object->length); } } else { if(to >= 0) { n = snnprintf(buf, n, len, "\r\nContent-Range: bytes %d-/*", from); } else { n = snnprintf(buf, n, len, "\r\nContent-Range: bytes %d-%d/*", from, to); } } } if(object->etag) { n = snnprintf(buf, n, len, "\r\nETag: \"%s\"", object->etag); } if((object->flags & OBJECT_LOCAL) || object->date >= 0) { n = snnprintf(buf, n, len, "\r\nDate: "); n = format_time(buf, n, len, (object->flags & OBJECT_LOCAL) ? current_time.tv_sec : object->date); if(n < 0) goto fail; } if(object->last_modified >= 0) { n = snnprintf(buf, n, len, "\r\nLast-Modified: "); n = format_time(buf, n, len, object->last_modified); if(n < 0) goto fail; } if(object->expires >= 0) { n = snnprintf(buf, n, len, "\r\nExpires: "); n = format_time(buf, n, len, object->expires); if(n < 0) goto fail; } n = httpPrintCacheControl(buf, n, len, object->cache_control, NULL); if(n < 0) goto fail; if(!disableVia && object->via) n = snnprintf(buf, n, len, "\r\nVia: %s", object->via->string); if(object->headers) n = snnprint_n(buf, n, len, object->headers->string, object->headers->length); if(n < len) return n; else return -1; fail: return -1; } static int cachePrintSeparator(char *buf, int offset, int len, int subsequent) { int n = offset; if(subsequent) n = snnprintf(buf, offset, len, ", "); else n = snnprintf(buf, offset, len, "\r\nCache-Control: "); return n; } int httpPrintCacheControl(char *buf, int offset, int len, int flags, CacheControlPtr cache_control) { int n = offset; int sub = 0; #define PRINT_SEP() \ do {\ n = cachePrintSeparator(buf, n, len, sub); \ sub = 1; \ } while(0) if(cache_control) flags |= cache_control->flags; if(flags & CACHE_NO) { PRINT_SEP(); n = snnprintf(buf, n, len, "no-cache"); } if(flags & CACHE_PUBLIC) { PRINT_SEP(); n = snnprintf(buf, n, len, "public"); } if(flags & CACHE_PRIVATE) { PRINT_SEP(); n = snnprintf(buf, n, len, "private"); } if(flags & CACHE_NO_STORE) { PRINT_SEP(); n = snnprintf(buf, n, len, "no-store"); } if(flags & CACHE_NO_TRANSFORM) { PRINT_SEP(); n = snnprintf(buf, n, len, "no-transform"); } if(flags & CACHE_MUST_REVALIDATE) { PRINT_SEP(); n = snnprintf(buf, n, len, "must-revalidate"); } if(flags & CACHE_PROXY_REVALIDATE) { PRINT_SEP(); n = snnprintf(buf, n, len, "proxy-revalidate"); } if(flags & CACHE_ONLY_IF_CACHED) { PRINT_SEP(); n = snnprintf(buf, n, len, "only-if-cached"); } if(cache_control) { if(cache_control->max_age >= 0) { PRINT_SEP(); n = snnprintf(buf, n, len, "max-age=%d", cache_control->max_age); } if(cache_control->s_maxage >= 0) { PRINT_SEP(); n = snnprintf(buf, n, len, "s-maxage=%d", cache_control->s_maxage); } if(cache_control->min_fresh > 0) { PRINT_SEP(); n = snnprintf(buf, n, len, "min-fresh=%d", cache_control->min_fresh); } if(cache_control->max_stale > 0) { PRINT_SEP(); n = snnprintf(buf, n, len, "max-stale=%d", cache_control->min_fresh); } } return n; #undef PRINT_SEP } char * httpMessage(int code) { switch(code) { case 200: return "Okay"; case 206: return "Partial content"; case 300: return "Multiple choices"; case 301: return "Moved permanently"; case 302: return "Found"; case 303: return "See other"; case 304: return "Not changed"; case 307: return "Temporary redirect"; case 401: return "Authentication Required"; case 403: return "Forbidden"; case 404: return "Not found"; case 405: return "Method not allowed"; case 407: return "Proxy authentication required"; default: return "Unknown error code"; } } int htmlString(char *buf, int n, int len, char *s, int slen) { int i = 0; while(i < slen && n + 5 < len) { switch(s[i]) { case '&': buf[n++] = '&'; buf[n++] = 'a'; buf[n++] = 'm'; buf[n++] = 'p'; buf[n++] = ';'; break; case '<': buf[n++] = '&'; buf[n++] = 'l'; buf[n++] = 't'; buf[n++] = ';'; break; case '>': buf[n++] = '&'; buf[n++] = 'g'; buf[n++] = 't'; buf[n++] = ';'; break; case '"': buf[n++] = '&'; buf[n++] = 'q'; buf[n++] = 'u'; buf[n++] = 'o'; buf[n++] = 't'; buf[n++] = ';'; break; case '\0': break; default: buf[n++] = s[i]; } i++; } return n; } void htmlPrint(FILE *out, char *s, int slen) { int i; for(i = 0; i < slen; i++) { switch(s[i]) { case '&': fputs("&", out); break; case '<': fputs("<", out); break; case '>': fputs(">", out); break; default: fputc(s[i], out); } } } HTTPConnectionPtr httpMakeConnection() { HTTPConnectionPtr connection; connection = malloc(sizeof(HTTPConnectionRec)); if(connection == NULL) return NULL; connection->flags = 0; connection->fd = -1; connection->buf = NULL; connection->len = 0; connection->offset = 0; connection->request = NULL; connection->request_last = NULL; connection->serviced = 0; connection->version = HTTP_UNKNOWN; connection->time = current_time.tv_sec; connection->timeout = NULL; connection->te = TE_IDENTITY; connection->reqbuf = NULL; connection->reqlen = 0; connection->reqbegin = 0; connection->reqoffset = 0; connection->bodylen = -1; connection->reqte = TE_IDENTITY; connection->chunk_remaining = 0; connection->server = NULL; connection->pipelined = 0; connection->connecting = 0; connection->server = NULL; return connection; } void httpDestroyConnection(HTTPConnectionPtr connection) { assert(connection->flags == 0); httpConnectionDestroyBuf(connection); assert(!connection->request); assert(!connection->request_last); httpConnectionDestroyReqbuf(connection); assert(!connection->timeout); assert(!connection->server); free(connection); } void httpConnectionDestroyBuf(HTTPConnectionPtr connection) { if(connection->buf) { if(connection->flags & CONN_BIGBUF) free(connection->buf); else dispose_chunk(connection->buf); } connection->flags &= ~CONN_BIGBUF; connection->buf = NULL; } void httpConnectionDestroyReqbuf(HTTPConnectionPtr connection) { if(connection->reqbuf) { if(connection->flags & CONN_BIGREQBUF) free(connection->reqbuf); else dispose_chunk(connection->reqbuf); } connection->flags &= ~CONN_BIGREQBUF; connection->reqbuf = NULL; } HTTPRequestPtr httpMakeRequest() { HTTPRequestPtr request; request = malloc(sizeof(HTTPRequestRec)); if(request == NULL) return NULL; request->flags = 0; request->connection = NULL; request->object = NULL; request->method = METHOD_UNKNOWN; request->from = 0; request->to = -1; request->cache_control = no_cache_control; request->condition = NULL; request->via = NULL; request->chandler = NULL; request->can_mutate = NULL; request->error_code = 0; request->error_message = NULL; request->error_headers = NULL; request->headers = NULL; request->time0 = null_time; request->time1 = null_time; request->request = NULL; request->next = NULL; return request; } void httpDestroyRequest(HTTPRequestPtr request) { if(request->object) releaseObject(request->object); if(request->condition) httpDestroyCondition(request->condition); releaseAtom(request->via); assert(request->chandler == NULL); releaseAtom(request->error_message); releaseAtom(request->headers); releaseAtom(request->error_headers); assert(request->request == NULL); assert(request->next == NULL); free(request); } void httpQueueRequest(HTTPConnectionPtr connection, HTTPRequestPtr request) { assert(request->next == NULL && request->connection == NULL); request->connection = connection; if(connection->request_last) { assert(connection->request); connection->request_last->next = request; connection->request_last = request; } else { assert(!connection->request_last); connection->request = request; connection->request_last = request; } } HTTPRequestPtr httpDequeueRequest(HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; if(request) { assert(connection->request_last); connection->request = request->next; if(!connection->request) connection->request_last = NULL; request->next = NULL; } return request; } int httpConnectionBigify(HTTPConnectionPtr connection) { char *bigbuf; assert(!(connection->flags & CONN_BIGBUF)); if(bigBufferSize <= CHUNK_SIZE) return 0; bigbuf = malloc(bigBufferSize); if(bigbuf == NULL) return -1; if(connection->len > 0) memcpy(bigbuf, connection->buf, connection->len); if(connection->buf) dispose_chunk(connection->buf); connection->buf = bigbuf; connection->flags |= CONN_BIGBUF; return 1; } int httpConnectionBigifyReqbuf(HTTPConnectionPtr connection) { char *bigbuf; assert(!(connection->flags & CONN_BIGREQBUF)); if(bigBufferSize <= CHUNK_SIZE) return 0; bigbuf = malloc(bigBufferSize); if(bigbuf == NULL) return -1; if(connection->reqlen > 0) memcpy(bigbuf, connection->reqbuf, connection->reqlen); if(connection->reqbuf) dispose_chunk(connection->reqbuf); connection->reqbuf = bigbuf; connection->flags |= CONN_BIGREQBUF; return 1; } int httpConnectionUnbigify(HTTPConnectionPtr connection) { char *buf; assert(connection->flags & CONN_BIGBUF); assert(connection->len < CHUNK_SIZE); buf = get_chunk(); if(buf == NULL) return -1; if(connection->len > 0) memcpy(buf, connection->buf, connection->len); free(connection->buf); connection->buf = buf; connection->flags &= ~CONN_BIGBUF; return 1; } int httpConnectionUnbigifyReqbuf(HTTPConnectionPtr connection) { char *buf; assert(connection->flags & CONN_BIGREQBUF); assert(connection->reqlen < CHUNK_SIZE); buf = get_chunk(); if(buf == NULL) return -1; if(connection->reqlen > 0) memcpy(buf, connection->reqbuf, connection->reqlen); free(connection->reqbuf); connection->reqbuf = buf; connection->flags &= ~CONN_BIGREQBUF; return 1; } HTTPConditionPtr httpMakeCondition() { HTTPConditionPtr condition; condition = malloc(sizeof(HTTPConditionRec)); if(condition == NULL) return NULL; condition->ims = -1; condition->inms = -1; condition->im = NULL; condition->inm = NULL; condition->ifrange = NULL; return condition; } void httpDestroyCondition(HTTPConditionPtr condition) { if(condition->inm) free(condition->inm); if(condition->im) free(condition->im); if(condition->ifrange) free(condition->ifrange); free(condition); } int httpCondition(ObjectPtr object, HTTPConditionPtr condition) { int rc = CONDITION_MATCH; assert(!(object->flags & OBJECT_INITIAL)); if(!condition) return CONDITION_MATCH; if(condition->ims >= 0) { if(object->last_modified < 0 || condition->ims < object->last_modified) return rc; else rc = CONDITION_NOT_MODIFIED; } if(condition->inms >= 0) { if(object->last_modified < 0 || condition->inms >= object->last_modified) return rc; else rc = CONDITION_FAILED; } if(condition->inm) { if(!object->etag || strcmp(object->etag, condition->inm) != 0) return rc; else rc = CONDITION_NOT_MODIFIED; } if(condition->im) { if(!object->etag || strcmp(object->etag, condition->im) != 0) rc = CONDITION_FAILED; else return rc; } return rc; } int httpWriteErrorHeaders(char *buf, int size, int offset, int do_body, int code, AtomPtr message, int close, AtomPtr headers, char *url, int url_len, char *etag) { int n, m, i; char *body; char htmlMessage[100]; char timeStr[100]; assert(code != 0); i = htmlString(htmlMessage, 0, 100, message->string, message->length); if(i < 0) strcpy(htmlMessage, "(Couldn't format message)"); else htmlMessage[MIN(i, 99)] = '\0'; if(code != 304) { body = get_chunk(); if(!body) { do_log(L_ERROR, "Couldn't allocate body buffer.\n"); return -1; } m = snnprintf(body, 0, CHUNK_SIZE, "" "\n" "\nProxy %s: %3d %s." "\n" "\n

%3d %s

" "\n

The following %s", code >= 400 ? "error" : "result", code, htmlMessage, code, htmlMessage, code >= 400 ? "error occurred" : "status was returned"); if(url_len > 0) { m = snnprintf(body, m, CHUNK_SIZE, " while trying to access "); m = htmlString(body, m, CHUNK_SIZE, url, url_len); m = snnprintf(body, m, CHUNK_SIZE, ""); } { /* On BSD systems, tv_sec is a long. */ const time_t ct = current_time.tv_sec; /*Mon, 24 Sep 2004 17:46:35 GMT*/ strftime(timeStr, sizeof(timeStr), "%a, %d %b %Y %H:%M:%S %Z", localtime(&ct)); } m = snnprintf(body, m, CHUNK_SIZE, ":

" "\n%3d %s

" "\n
Generated %s by Polipo on %s:%d." "\n\r\n", code, htmlMessage, timeStr, proxyName->string, proxyPort); if(m <= 0 || m >= CHUNK_SIZE) { do_log(L_ERROR, "Couldn't write error body.\n"); dispose_chunk(body); return -1; } } else { body = NULL; m = 0; } n = snnprintf(buf, 0, size, "HTTP/1.1 %3d %s" "\r\nConnection: %s" "\r\nDate: ", code, atomString(message), close ? "close" : "keep-alive"); n = format_time(buf, n, size, current_time.tv_sec); if(code != 304) { n = snnprintf(buf, n, size, "\r\nContent-Type: text/html" "\r\nContent-Length: %d", m); } else { if(etag) n = snnprintf(buf, n, size, "\r\nETag: \"%s\"", etag); } if(code != 304 && code != 412) { n = snnprintf(buf, n, size, "\r\nExpires: 0" "\r\nCache-Control: no-cache" "\r\nPragma: no-cache"); } if(headers) n = snnprint_n(buf, n, size, headers->string, headers->length); n = snnprintf(buf, n, size, "\r\n\r\n"); if(n < 0 || n >= size) { do_log(L_ERROR, "Couldn't write error.\n"); dispose_chunk(body); return -1; } if(code != 304 && do_body) { if(m > 0) memcpy(buf + n, body, m); n += m; } if(body) dispose_chunk(body); return n; } AtomListPtr urlDecode(char *buf, int n) { char mybuf[500]; int i, j = 0; AtomListPtr list; AtomPtr atom; list = makeAtomList(NULL, 0); if(list == NULL) return NULL; i = 0; while(i < n) { if(buf[i] == '%') { int a, b; if(i + 3 > n) goto fail; a = h2i(buf[i + 1]); b = h2i(buf[i + 2]); if(a < 0 || b < 0) goto fail; mybuf[j++] = (char)((a << 4) | b); i += 3; if(j > 500) goto fail; } else if(buf[i] == '&') { atom = internAtomN(mybuf, j); if(atom == NULL) goto fail; atomListCons(atom, list); j = 0; i++; } else { mybuf[j++] = buf[i++]; if(j > 500) goto fail; } } atom = internAtomN(mybuf, j); if(atom == NULL) goto fail; atomListCons(atom, list); return list; fail: destroyAtomList(list); return NULL; } void httpTweakCachability(ObjectPtr object) { int code = object->code; if((object->cache_control & CACHE_AUTHORIZATION) && !(object->cache_control & CACHE_PUBLIC)) object->cache_control |= (CACHE_NO_HIDDEN | OBJECT_LINEAR); /* This is not required by RFC 2616 -- but see RFC 3143 2.1.1. We manically avoid caching replies that we don't know how to handle, even if Expires or Cache-Control says otherwise. As to known uncacheable replies, we obey Cache-Control and default to allowing sharing but not caching. */ if(code != 200 && code != 206 && code != 300 && code != 301 && code != 302 && code != 303 && code != 304 && code != 307 && code != 403 && code != 404 && code != 405 && code != 416) { object->cache_control |= (CACHE_NO_HIDDEN | CACHE_MISMATCH | OBJECT_LINEAR); } else if(code != 200 && code != 206 && code != 300 && code != 301 && code != 304 && code != 410) { if(object->expires < 0 && !(object->cache_control & CACHE_PUBLIC)) { object->cache_control |= CACHE_NO_HIDDEN; } } else if(dontCacheRedirects && (code == 301 || code == 302)) { object->cache_control |= CACHE_NO_HIDDEN; } if(urlIsUncachable(object->key, object->key_size)) { object->cache_control |= CACHE_NO_HIDDEN; } if((object->cache_control & CACHE_NO_STORE) != 0) { object->cache_control |= CACHE_NO_HIDDEN; } if(object->cache_control & CACHE_VARY) { if(!object->etag || dontTrustVaryETag >= 2) { object->cache_control |= CACHE_MISMATCH; } } } int httpHeaderMatch(AtomPtr header, AtomPtr headers1, AtomPtr headers2) { int rc1, b1, e1, rc2, b2, e2; /* Short cut if both sets of headers are identical */ if(headers1 == headers2) return 1; rc1 = httpFindHeader(header, headers1->string, headers1->length, &b1, &e1); rc2 = httpFindHeader(header, headers2->string, headers2->length, &b2, &e2); if(rc1 == 0 && rc2 == 0) return 1; if(rc1 == 0 || rc2 == 0) return 0; if(e1 - b1 != e2 - b2) return 0; if(memcmp(headers1->string + b1, headers2->string + b2, e1 - b1) != 0) return 0; return 1; } polipo-1.0.4.1/ftsimport.h0000644000175000017500000000010711331407220014656 0ustar chrisdchrisd#ifdef HAVE_FTS #include #else #include "fts_compat.h" #endif polipo-1.0.4.1/ftsimport.c0000644000175000017500000000024211331407220014651 0ustar chrisdchrisd#ifndef _GNU_SOURCE #define _GNU_SOURCE #endif #include "polipo.h" #ifndef HAVE_FTS #include "fts_compat.c" #else static int dummy ATTRIBUTE((unused)); #endif polipo-1.0.4.1/fts_compat.h0000644000175000017500000000356511331407220015001 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _FTS_COMPAT_H #define _FTS_COMPAT_H #ifndef FTS_MAX_DEPTH #define FTS_MAX_DEPTH 4 #endif #define FTS_LOGICAL 1 #define FTS_F 1 #define FTS_D 2 #define FTS_DP 3 #define FTS_DC 4 #define FTS_NS 5 #define FTS_NSOK 6 #define FTS_DNR 7 #define FTS_SLNONE 8 #define FTS_DEFAULT 9 #define FTS_ERR 10 struct _FTSENT { unsigned short fts_info; char *fts_path; char *fts_accpath; struct stat *fts_statp; int fts_errno; }; typedef struct _FTSENT FTSENT; struct _FTS { int depth; DIR *dir[FTS_MAX_DEPTH]; char *cwd0, *cwd; struct _FTSENT ftsent; struct stat stat; char *dname; }; typedef struct _FTS FTS; FTS* fts_open(char * const *path_argv, int options, int (*compar)(const FTSENT **, const FTSENT **)); int fts_close(FTS *fts); FTSENT * fts_read(FTS *fts); #endif polipo-1.0.4.1/fts_compat.c0000644000175000017500000001734711331407220014777 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* This file implements just enough of the fts family of functions to make Polipo happy. */ #include #include #include #include #include #include #include #include #include "fts_compat.h" static char * getcwd_a() { char buf[256]; char *ret; ret = getcwd(buf, 256); if(ret == NULL) return NULL; return strdup(buf); } static char * mkfilename(const char *path, char *filename) { int n = strlen(path); char *buf = malloc(n + 1 + strlen(filename) + 1); if(buf == NULL) return NULL; memcpy(buf, path, n); if(buf[n - 1] != '/') buf[n++] = '/'; strcpy(buf + n, filename); return buf; } static int split(const char *path, int *slash_return, int *dlen, int *blen) { int len; int slash; len = strlen(path); while(len > 0 && path[len - 1] == '/') len--; if(len == 0) return -1; slash = len - 1; while(slash >= 0 && path[slash] != '/') slash--; if(slash_return) *slash_return = slash; if(dlen) *dlen = slash + 1; if(blen) *blen = len - slash - 1; return 1; } static char * basename_a(const char *path) { int blen, slash; char *b; int rc; rc = split(path, &slash, NULL, &blen); if(rc < 0 || blen == 0) return NULL; b = malloc(blen + 1); if(b == NULL) return NULL; memcpy(b, path + slash + 1, blen); b[blen] = '\0'; return b; } static char * dirname_a(const char *path) { int dlen; int rc; char *d; rc = split(path, NULL, &dlen, NULL); if(rc < 0) return NULL; d = malloc(dlen + 1); if(d == NULL) return NULL; memcpy(d, path, dlen); d[dlen] = '\0'; return d; } #if defined(__svr4__) || defined(SVR4) static int dirfd(DIR *dir) { return dir->dd_fd; } #endif /* * Make the directory identified by the argument the current directory. */ #ifdef MINGW int change_to_dir(DIR *dir) { errno = ENOSYS; return -1; } #else int change_to_dir(DIR *dir) { return fchdir(dirfd(dir)); } #endif FTS* fts_open(char * const *path_argv, int options, int (*compar)(const FTSENT **, const FTSENT **)) { FTS *fts; DIR *dir; char *cwd; int rc; if(options != FTS_LOGICAL || compar != NULL || path_argv[1] != NULL) { errno = ENOSYS; return NULL; } dir = opendir(path_argv[0]); if(dir == NULL) return NULL; fts = calloc(sizeof(FTS), 1); if(fts == NULL) { int save = errno; closedir(dir); errno = save; return NULL; } cwd = getcwd_a(); if(cwd == NULL) { int save = errno; free(fts); closedir(dir); errno = save; return NULL; } rc = change_to_dir(dir); if(rc < 0) { int save = errno; free(cwd); free(fts); closedir(dir); errno = save; return NULL; } fts->depth = 0; fts->dir[0] = dir; fts->cwd0 = cwd; fts->cwd = strdup(path_argv[0]); return fts; } int fts_close(FTS *fts) { int save = 0; int rc; if(fts->ftsent.fts_path) { free(fts->ftsent.fts_path); fts->ftsent.fts_path = NULL; } if(fts->dname) { free(fts->dname); fts->dname = NULL; } rc = chdir(fts->cwd0); if(rc < 0) save = errno; while(fts->depth >= 0) { closedir(fts->dir[fts->depth]); fts->depth--; } free(fts->cwd0); if(fts->cwd) free(fts->cwd); free(fts); if(rc < 0) { errno = save; return -1; } return 0; } FTSENT * fts_read(FTS *fts) { struct dirent *dirent; int rc; char *name; char buf[1024]; if(fts->ftsent.fts_path) { free(fts->ftsent.fts_path); fts->ftsent.fts_path = NULL; } if(fts->dname) { free(fts->dname); fts->dname = NULL; } again: dirent = readdir(fts->dir[fts->depth]); if(dirent == NULL) { char *newcwd = NULL; closedir(fts->dir[fts->depth]); fts->dir[fts->depth] = NULL; fts->depth--; if(fts->depth >= 0) { fts->dname = basename_a(fts->cwd); if(fts->dname == NULL) goto error; newcwd = dirname_a(fts->cwd); if(newcwd == NULL) goto error; } if(fts->cwd) free(fts->cwd); fts->cwd = NULL; if(fts->depth < 0) return NULL; rc = change_to_dir(fts->dir[fts->depth]); if(rc < 0) { free(newcwd); goto error; } fts->cwd = newcwd; name = fts->dname; fts->ftsent.fts_info = FTS_DP; goto done; } name = dirent->d_name; again2: rc = stat(name, &fts->stat); if(rc < 0) { fts->ftsent.fts_info = FTS_NS; goto error2; } if(S_ISDIR(fts->stat.st_mode)) { char *newcwd; DIR *dir; if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0) goto again; if(fts->depth >= FTS_MAX_DEPTH) { errno = ENFILE; goto error; } dir = opendir(dirent->d_name); if(dir == NULL) { if(errno == EACCES) { fts->ftsent.fts_info = FTS_DNR; goto error2; } else goto error; } newcwd = mkfilename(fts->cwd, dirent->d_name); rc = change_to_dir(dir); if(rc < 0) { free(newcwd); goto error; } free(fts->cwd); fts->cwd = newcwd; fts->ftsent.fts_info = FTS_D; fts->depth++; fts->dir[fts->depth] = dir; goto done; } else if(S_ISREG(fts->stat.st_mode)) { fts->ftsent.fts_info = FTS_F; goto done; #ifdef S_ISLNK } else if(S_ISLNK(fts->stat.st_mode)) { int rc; rc = readlink(name, buf, 1024); if(rc < 0) goto error; if(rc >= 1023) { errno = ENAMETOOLONG; goto error; } buf[rc] = '\0'; name = buf; if(access(buf, F_OK) >= 0) goto again2; fts->ftsent.fts_info = FTS_SLNONE; goto done; #endif } else { fts->ftsent.fts_info = FTS_DEFAULT; goto done; } done: if(fts->cwd == NULL) fts->cwd = getcwd_a(); if(fts->cwd == NULL) goto error; fts->ftsent.fts_path = mkfilename(fts->cwd, name); if(fts->ftsent.fts_path == NULL) goto error; fts->ftsent.fts_accpath = name; fts->ftsent.fts_statp = &fts->stat; return &fts->ftsent; error: fts->ftsent.fts_info = FTS_ERR; error2: fts->ftsent.fts_errno = errno; return &fts->ftsent; } polipo-1.0.4.1/forbidden.sample0000644000175000017500000000070211331407220015616 0ustar chrisdchrisd# Sample forbidden URLs file for polipo. -*-sh-*- # Put this in /etc/polipo/forbidden or in ~/.polipo-forbidden. # Forbid all hosts belonging to a given domain name: #counter.com #hitbox.com #doubleclick.net #www.cashcount.com # Forbid all hosts contaning a string matching a given regex. Note # that you need to quote dots, so that a regex is not misinterpreted # as a domain name. #^http://[^/]*counter\.com #/ads/ #/phpAdsNew #counting\.php polipo-1.0.4.1/forbidden.h0000644000175000017500000000375011331407220014572 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ extern AtomPtr forbiddenUrl; extern int forbiddenRedirectCode; typedef struct _RedirectRequest { AtomPtr url; struct _RedirectRequest *next; int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*); void *data; } RedirectRequestRec, *RedirectRequestPtr; void preinitForbidden(void); void initForbidden(void); int urlIsUncachable(char *url, int length); int urlForbidden(AtomPtr url, int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*), void *closure); void redirectorKill(void); int redirectorStreamHandler1(int status, FdEventHandlerPtr event, StreamRequestPtr srequest); int redirectorStreamHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest); void redirectorTrigger(void); int runRedirector(pid_t *pid_return, int *read_fd_return, int *write_fd_return); polipo-1.0.4.1/forbidden.c0000644000175000017500000005123511331407220014566 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifndef NO_FORBIDDEN #include typedef struct _Domain { int length; char domain[1]; } DomainRec, *DomainPtr; AtomPtr forbiddenFile = NULL; AtomPtr forbiddenUrl = NULL; int forbiddenRedirectCode = 302; AtomPtr redirector = NULL; int redirectorRedirectCode = 302; DomainPtr *forbiddenDomains = NULL; regex_t *forbiddenRegex = NULL; AtomPtr uncachableFile = NULL; DomainPtr *uncachableDomains = NULL; regex_t *uncachableRegex = NULL; /* these three are only used internally by {parse,read}DomainFile */ /* to avoid having to pass it all as parameters */ static DomainPtr *domains; static char *regexbuf; static int rlen, rsize, dlen, dsize; #ifndef NO_REDIRECTOR static pid_t redirector_pid = 0; static int redirector_read_fd = -1, redirector_write_fd = -1; #define REDIRECTOR_BUFFER_SIZE 1024 static char *redirector_buffer = NULL; RedirectRequestPtr redirector_request_first = NULL, redirector_request_last = NULL; #endif static int atomSetterForbidden(ConfigVariablePtr, void*); void preinitForbidden(void) { CONFIG_VARIABLE_SETTABLE(forbiddenUrl, CONFIG_ATOM, configAtomSetter, "URL to which forbidden requests " "should be redirected."); CONFIG_VARIABLE_SETTABLE(forbiddenRedirectCode, CONFIG_INT, configIntSetter, "Redirect code, 301 or 302."); CONFIG_VARIABLE_SETTABLE(forbiddenFile, CONFIG_ATOM, atomSetterForbidden, "File specifying forbidden URLs."); #ifndef NO_REDIRECTOR CONFIG_VARIABLE_SETTABLE(redirector, CONFIG_ATOM, atomSetterForbidden, "Squid-style redirector."); CONFIG_VARIABLE_SETTABLE(redirectorRedirectCode, CONFIG_INT, configIntSetter, "Redirect code to use with redirector."); #endif CONFIG_VARIABLE_SETTABLE(uncachableFile, CONFIG_ATOM, atomSetterForbidden, "File specifying uncachable URLs."); } static int atomSetterForbidden(ConfigVariablePtr var, void *value) { initForbidden(); return configAtomSetter(var, value); } int readDomainFile(char *filename) { FILE *in; char buf[512]; char *rs; int i, j, is_regex, start; in = fopen(filename, "r"); if(in == NULL) { if(errno != ENOENT) do_log_error(L_ERROR, errno, "Couldn't open file %s", filename); return -1; } while(1) { rs = fgets(buf, 512, in); if(rs == NULL) break; for(i = 0; i < 512; i++) { if(buf[i] != ' ' && buf[i] != '\t') break; } start = i; for(i = start; i < 512; i++) { if(buf[i] == '#' || buf[i] == '\r' || buf[i] == '\n') break; } while(i > start) { if(buf[i - 1] != ' ' && buf[i - 1] != '\t') break; i--; } if(i <= start) continue; /* The significant part of the line is now between start and i */ is_regex = 0; for(j = start; j < i; j++) { if(buf[j] == '\\' || buf[j] == '*' || buf[j] == '/') { is_regex = 1; break; } } if(is_regex) { while(rlen + i - start + 8 >= rsize) { char *new_regexbuf; new_regexbuf = realloc(regexbuf, rsize * 2 + 1); if(new_regexbuf == NULL) { do_log(L_ERROR, "Couldn't reallocate regex.\n"); fclose(in); return -1; } regexbuf = new_regexbuf; rsize = rsize * 2 + 1; } if(rlen != 0) rlen = snnprintf(regexbuf, rlen, rsize, "|"); rlen = snnprintf(regexbuf, rlen, rsize, "("); rlen = snnprint_n(regexbuf, rlen, rsize, buf + start, i - start); rlen = snnprintf(regexbuf, rlen, rsize, ")"); } else { DomainPtr new_domain; if(dlen >= dsize - 1) { DomainPtr *new_domains; new_domains = realloc(domains, (dsize * 2 + 1) * sizeof(DomainPtr)); if(new_domains == NULL) { do_log(L_ERROR, "Couldn't reallocate domain list.\n"); fclose(in); return -1; } domains = new_domains; dsize = dsize * 2 + 1; } new_domain = malloc(sizeof(DomainRec) - 1 + i - start); if(new_domain == NULL) { do_log(L_ERROR, "Couldn't allocate domain.\n"); fclose(in); return -1; } new_domain->length = i - start; memcpy(new_domain->domain, buf + start, i - start); domains[dlen++] = new_domain; } } fclose(in); return 1; } void parseDomainFile(AtomPtr file, DomainPtr **domains_return, regex_t **regex_return) { struct stat ss; int rc; if(*domains_return) { DomainPtr *domain = *domains_return; while(*domain) { free(*domain); domain++; } free(*domains_return); *domains_return = NULL; } if(*regex_return) { regfree(*regex_return); *regex_return = NULL; } if(!file || file->length == 0) return; domains = malloc(64 * sizeof(DomainPtr)); if(domains == NULL) { do_log(L_ERROR, "Couldn't allocate domain list.\n"); return; } dlen = 0; dsize = 64; regexbuf = malloc(512); if(regexbuf == NULL) { do_log(L_ERROR, "Couldn't allocate regex.\n"); free(domains); return; } rlen = 0; rsize = 512; rc = stat(file->string, &ss); if(rc < 0) { if(errno != ENOENT) do_log_error(L_WARN, errno, "Couldn't stat file %s", file->string); } else { if(!S_ISDIR(ss.st_mode)) readDomainFile(file->string); else { char *fts_argv[2]; FTS *fts; FTSENT *fe; fts_argv[0] = file->string; fts_argv[1] = NULL; fts = fts_open(fts_argv, FTS_LOGICAL, NULL); if(fts) { while(1) { fe = fts_read(fts); if(!fe) break; if(fe->fts_info != FTS_D && fe->fts_info != FTS_DP && fe->fts_info != FTS_DC && fe->fts_info != FTS_DNR) readDomainFile(fe->fts_accpath); } fts_close(fts); } else { do_log_error(L_ERROR, errno, "Couldn't scan directory %s", file->string); } } } if(dlen > 0) { domains[dlen] = NULL; } else { free(domains); domains = NULL; } regex_t *regex; if(rlen > 0) { regex = malloc(sizeof(regex_t)); rc = regcomp(regex, regexbuf, REG_EXTENDED | REG_NOSUB); if(rc != 0) { do_log(L_ERROR, "Couldn't compile regex: %d.\n", rc); free(regex); regex = NULL; } } else { regex = NULL; } free(regexbuf); *domains_return = domains; *regex_return = regex; return; } void initForbidden(void) { redirectorKill(); if(forbiddenFile) forbiddenFile = expandTilde(forbiddenFile); if(forbiddenFile == NULL) { forbiddenFile = expandTilde(internAtom("~/.polipo-forbidden")); if(forbiddenFile) { if(access(forbiddenFile->string, F_OK) < 0) { releaseAtom(forbiddenFile); forbiddenFile = NULL; } } } if(forbiddenFile == NULL) { if(access("/etc/polipo/forbidden", F_OK) >= 0) forbiddenFile = internAtom("/etc/polipo/forbidden"); } parseDomainFile(forbiddenFile, &forbiddenDomains, &forbiddenRegex); if(uncachableFile) uncachableFile = expandTilde(uncachableFile); if(uncachableFile == NULL) { uncachableFile = expandTilde(internAtom("~/.polipo-uncachable")); if(uncachableFile) { if(access(uncachableFile->string, F_OK) < 0) { releaseAtom(uncachableFile); uncachableFile = NULL; } } } if(uncachableFile == NULL) { if(access("/etc/polipo/uncachable", F_OK) >= 0) uncachableFile = internAtom("/etc/polipo/uncachable"); } parseDomainFile(uncachableFile, &uncachableDomains, &uncachableRegex); return; } int urlIsMatched(char *url, int length, DomainPtr *domains, regex_t *regex) { if(length < 8) return 0; if(memcmp(url, "http://", 7) != 0) return 0; if(domains) { int i; DomainPtr *domain; for(i = 8; i < length; i++) { if(url[i] == '/') break; } domain = domains; while(*domain) { if((*domain)->length <= (i - 7) && (url[i - (*domain)->length - 1] == '.' || url[i - (*domain)->length - 1] == '/') && memcmp(url + i - (*domain)->length, (*domain)->domain, (*domain)->length) == 0) return 1; domain++; } } if(regex) { /* url is not necessarily 0-terminated */ char smallcopy[50]; char *urlcopy; int rc; if(length < 50) { urlcopy = smallcopy; } else { urlcopy = malloc(length + 1); if(urlcopy == NULL) return 0; } memcpy(urlcopy, url, length); urlcopy[length] = '\0'; rc = regexec(regex, urlcopy, 0, NULL, 0); if(urlcopy != smallcopy) free(urlcopy); return !rc; } return 0; } int urlIsUncachable(char *url, int length) { return urlIsMatched(url, length, uncachableDomains, uncachableRegex); } int urlForbidden(AtomPtr url, int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*), void *closure) { int forbidden = urlIsMatched(url->string, url->length, forbiddenDomains, forbiddenRegex); int code = 0; AtomPtr message = NULL, headers = NULL; if(forbidden) { message = internAtomF("Forbidden URL %s", url->string); if(forbiddenUrl) { code = forbiddenRedirectCode; headers = internAtomF("\r\nLocation: %s", forbiddenUrl->string); } else { code = 403; } } #ifndef NO_REDIRECTOR if(code == 0 && redirector) { RedirectRequestPtr request; request = malloc(sizeof(RedirectRequestRec)); if(request == NULL) { do_log(L_ERROR, "Couldn't allocate redirect request.\n"); goto done; } request->url = url; request->handler = handler; request->data = closure; if(redirector_request_first == NULL) redirector_request_first = request; else redirector_request_last->next = request; redirector_request_last = request; request->next = NULL; if(request == redirector_request_first) redirectorTrigger(); return 1; } #endif done: handler(code, url, message, headers, closure); return 1; } #ifndef NO_REDIRECTOR static void logExitStatus(int status) { if(WIFEXITED(status) && WEXITSTATUS(status) == 142) /* See child code in runRedirector */ do_log(L_ERROR, "Couldn't start redirector.\n"); else { char *reason = WIFEXITED(status) ? "with status" : WIFSIGNALED(status) ? "on signal" : "with unknown status"; int value = WIFEXITED(status) ? WEXITSTATUS(status) : WIFSIGNALED(status) ? WTERMSIG(status) : status; do_log(L_ERROR, "Redirector exited %s %d.\n", reason, value); } } void redirectorKill(void) { int rc, status, dead; if(redirector_read_fd >= 0) { rc = waitpid(redirector_pid, &status, WNOHANG); dead = (rc > 0); close(redirector_read_fd); redirector_read_fd = -1; close(redirector_write_fd); redirector_write_fd = -1; if(!dead) { rc = kill(redirector_pid, SIGTERM); if(rc < 0 && errno != ESRCH) { do_log_error(L_ERROR, errno, "Couldn't kill redirector"); redirector_pid = -1; return; } do { rc = waitpid(redirector_pid, &status, 0); } while(rc < 0 && errno == EINTR); if(rc < 0) do_log_error(L_ERROR, errno, "Couldn't wait for redirector's death"); } else logExitStatus(status); redirector_pid = -1; } } static void redirectorDestroyRequest(RedirectRequestPtr request) { assert(redirector_request_first == request); redirector_request_first = request->next; if(redirector_request_first == NULL) redirector_request_last = NULL; free(request); } void redirectorTrigger(void) { RedirectRequestPtr request = redirector_request_first; int rc; if(!request) return; if(redirector_read_fd < 0) { rc = runRedirector(&redirector_pid, &redirector_read_fd, &redirector_write_fd); if(rc < 0) { request->handler(rc, request->url, NULL, NULL, request->data); redirectorDestroyRequest(request); return; } } do_stream_2(IO_WRITE, redirector_write_fd, 0, request->url->string, request->url->length, "\n", 1, redirectorStreamHandler1, request); } int redirectorStreamHandler1(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { RedirectRequestPtr request = (RedirectRequestPtr)srequest->data; if(status) { if(status >= 0) status = -EPIPE; do_log_error(L_ERROR, -status, "Write to redirector failed"); goto fail; } if(!streamRequestDone(srequest)) return 0; do_stream(IO_READ, redirector_read_fd, 0, redirector_buffer, REDIRECTOR_BUFFER_SIZE, redirectorStreamHandler2, request); return 1; fail: request->handler(status < 0 ? status : -EPIPE, request->url, NULL, NULL, request->data); redirectorDestroyRequest(request); redirectorKill(); return 1; } int redirectorStreamHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { RedirectRequestPtr request = (RedirectRequestPtr)srequest->data; char *c; AtomPtr message; AtomPtr headers; int code; if(status < 0) { do_log_error(L_ERROR, -status, "Read from redirector failed"); request->handler(status, request->url, NULL, NULL, request->data); goto kill; } c = memchr(redirector_buffer, '\n', srequest->offset); if(!c) { if(!status && srequest->offset < REDIRECTOR_BUFFER_SIZE) return 0; do_log(L_ERROR, "Redirector returned incomplete reply.\n"); request->handler(-EREDIRECTOR, request->url, NULL, NULL, request->data); goto kill; } *c = '\0'; if(srequest->offset > c + 1 - redirector_buffer) do_log(L_WARN, "Stray bytes in redirector output.\n"); if(c > redirector_buffer + 1 && (c - redirector_buffer != request->url->length || memcmp(redirector_buffer, request->url->string, request->url->length) != 0)) { code = redirectorRedirectCode; message = internAtom("Redirected by external redirector"); if(message == NULL) { request->handler(-ENOMEM, request->url, NULL, NULL, request->data); goto kill; } headers = internAtomF("\r\nLocation: %s", redirector_buffer); if(headers == NULL) { releaseAtom(message); request->handler(-ENOMEM, request->url, NULL, NULL, request->data); goto kill; } } else { code = 0; message = NULL; headers = NULL; } request->handler(code, request->url, message, headers, request->data); goto cont; cont: redirectorDestroyRequest(request); redirectorTrigger(); return 1; kill: redirectorKill(); goto cont; } int runRedirector(pid_t *pid_return, int *read_fd_return, int *write_fd_return) { int rc, rc2, status; pid_t pid; int filedes1[2], filedes2[2]; sigset_t ss, old_mask; assert(redirector); if(redirector_buffer == NULL) { redirector_buffer = malloc(REDIRECTOR_BUFFER_SIZE); if(redirector_buffer == NULL) return -errno; } rc = pipe(filedes1); if(rc < 0) { rc = -errno; goto fail1; } rc = pipe(filedes2); if(rc < 0) { rc = -errno; goto fail2; } fflush(stdout); fflush(stderr); flushLog(); interestingSignals(&ss); do { rc = sigprocmask(SIG_BLOCK, &ss, &old_mask); } while (rc < 0 && errno == EINTR); if(rc < 0) { rc = -errno; goto fail3; } pid = fork(); if(pid < 0) { rc = -errno; goto fail4; } if(pid > 0) { do { rc = sigprocmask(SIG_SETMASK, &old_mask, NULL); } while(rc < 0 && errno == EINTR); if(rc < 0) { rc = -errno; goto fail4; } rc = setNonblocking(filedes1[1], 1); if(rc >= 0) rc = setNonblocking(filedes2[0], 1); if(rc < 0) { rc = -errno; goto fail4; } /* This is completely unnecesary -- if the redirector cannot be started, redirectorStreamHandler1 will get EPIPE straight away --, but it improves error messages somewhat. */ rc = waitpid(pid, &status, WNOHANG); if(rc > 0) { logExitStatus(status); rc = -EREDIRECTOR; goto fail4; } else if(rc < 0) { rc = -errno; goto fail4; } *read_fd_return = filedes2[0]; *write_fd_return = filedes1[1]; *pid_return = pid; /* This comes at the end so that the fail* labels can work */ close(filedes1[0]); close(filedes2[1]); } else { close(filedes1[1]); close(filedes2[0]); uninitEvents(); do { rc = sigprocmask(SIG_SETMASK, &old_mask, NULL); } while (rc < 0 && errno == EINTR); if(rc < 0) exit(142); if(filedes1[0] != 0) dup2(filedes1[0], 0); if(filedes2[1] != 1) dup2(filedes2[1], 1); execlp(redirector->string, redirector->string, NULL); exit(142); /* NOTREACHED */ } return 1; fail4: do { rc2 = sigprocmask(SIG_SETMASK, &old_mask, NULL); } while(rc2 < 0 && errno == EINTR); fail3: close(filedes2[0]); close(filedes2[1]); fail2: close(filedes1[0]); close(filedes1[1]); fail1: free(redirector_buffer); redirector_buffer = NULL; return rc; } #else void redirectorKill(void) { return; } #endif #else void preinitForbidden() { return; } void initForbidden() { return; } int urlIsUncachable(char *url, int length) { return 0; } int urlForbidden(AtomPtr url, int (*handler)(int, AtomPtr, AtomPtr, AtomPtr, void*), void *closure) { handler(0, url, NULL, NULL, closure); return 1; } #endif polipo-1.0.4.1/event.h0000644000175000017500000000660611331407220013762 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ extern struct timeval current_time; extern struct timeval null_time; extern int diskIsClean; typedef struct _TimeEventHandler { struct timeval time; struct _TimeEventHandler *previous, *next; int (*handler)(struct _TimeEventHandler*); char data[1]; } TimeEventHandlerRec, *TimeEventHandlerPtr; typedef struct _FdEventHandler { short fd; short poll_events; struct _FdEventHandler *previous, *next; int (*handler)(int, struct _FdEventHandler*); char data[1]; } FdEventHandlerRec, *FdEventHandlerPtr; typedef struct _ConditionHandler { struct _Condition *condition; struct _ConditionHandler *previous, *next; int (*handler)(int, struct _ConditionHandler*); char data[1]; } ConditionHandlerRec, *ConditionHandlerPtr; typedef struct _Condition { ConditionHandlerPtr handlers; } ConditionRec, *ConditionPtr; void initEvents(void); void uninitEvents(void); void interestingSignals(sigset_t *ss); TimeEventHandlerPtr scheduleTimeEvent(int seconds, int (*handler)(TimeEventHandlerPtr), int dsize, void *data); int timeval_minus_usec(const struct timeval *s1, const struct timeval *s2) ATTRIBUTE((pure)); void cancelTimeEvent(TimeEventHandlerPtr); int allocateFdEventNum(int fd); void deallocateFdEventNum(int i); void timeToSleep(struct timeval *); void runTimeEventQueue(void); FdEventHandlerPtr makeFdEvent(int fd, int poll_events, int (*handler)(int, FdEventHandlerPtr), int dsize, void *data); FdEventHandlerPtr registerFdEvent(int fd, int poll_events, int (*handler)(int, FdEventHandlerPtr), int dsize, void *data); FdEventHandlerPtr registerFdEventHelper(FdEventHandlerPtr event); void unregisterFdEvent(FdEventHandlerPtr event); void pokeFdEvent(int fd, int status, int what); int workToDo(void); void eventLoop(void); ConditionPtr makeCondition(void); void initCondition(ConditionPtr); void signalCondition(ConditionPtr condition); ConditionHandlerPtr conditionWait(ConditionPtr condition, int (*handler)(int, ConditionHandlerPtr), int dsize, void *data); void unregisterConditionHandler(ConditionHandlerPtr); void abortConditionHandler(ConditionHandlerPtr); void polipoExit(void); polipo-1.0.4.1/event.c0000644000175000017500000005034011331407220013747 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifdef HAVE_FORK static volatile sig_atomic_t exitFlag = 0; #else static int exitFlag = 0; #endif static int in_signalCondition = 0; static TimeEventHandlerPtr timeEventQueue; static TimeEventHandlerPtr timeEventQueueLast; struct timeval current_time; struct timeval null_time = {0,0}; static int fdEventSize = 0; static int fdEventNum = 0; static struct pollfd *poll_fds = NULL; static FdEventHandlerPtr *fdEvents = NULL, *fdEventsLast = NULL; int diskIsClean = 1; static int fds_invalid = 0; static inline int timeval_cmp(struct timeval *t1, struct timeval *t2) { if(t1->tv_sec < t2->tv_sec) return -1; else if(t1->tv_sec > t2->tv_sec) return +1; else if(t1->tv_usec < t2->tv_usec) return -1; else if(t1->tv_usec > t2->tv_usec) return +1; else return 0; } static inline void timeval_minus(struct timeval *d, const struct timeval *s1, const struct timeval *s2) { if(s1->tv_usec > s2->tv_usec) { d->tv_usec = s1->tv_usec - s2->tv_usec; d->tv_sec = s1->tv_sec - s2->tv_sec; } else { d->tv_usec = s1->tv_usec + 1000000 - s2->tv_usec; d->tv_sec = s1->tv_sec - s2->tv_sec - 1; } } int timeval_minus_usec(const struct timeval *s1, const struct timeval *s2) { return (s1->tv_sec - s2->tv_sec) * 1000000 + s1->tv_usec - s2->tv_usec; } #ifdef HAVE_FORK static void sigexit(int signo) { if(signo == SIGUSR1) exitFlag = 1; else if(signo == SIGUSR2) exitFlag = 2; else exitFlag = 3; } #endif void initEvents() { #ifdef HAVE_FORK struct sigaction sa; sigset_t ss; sigemptyset(&ss); sa.sa_handler = SIG_IGN; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGPIPE, &sa, NULL); sigemptyset(&ss); sa.sa_handler = sigexit; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGTERM, &sa, NULL); sigemptyset(&ss); sa.sa_handler = sigexit; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGHUP, &sa, NULL); sigemptyset(&ss); sa.sa_handler = sigexit; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGINT, &sa, NULL); sigemptyset(&ss); sa.sa_handler = sigexit; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGUSR1, &sa, NULL); sigemptyset(&ss); sa.sa_handler = sigexit; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGUSR2, &sa, NULL); #endif timeEventQueue = NULL; timeEventQueueLast = NULL; fdEventSize = 0; fdEventNum = 0; poll_fds = NULL; fdEvents = NULL; fdEventsLast = NULL; } void uninitEvents(void) { #ifdef HAVE_FORK struct sigaction sa; sigset_t ss; sigemptyset(&ss); sa.sa_handler = SIG_DFL; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGTERM, &sa, NULL); sigemptyset(&ss); sa.sa_handler = SIG_DFL; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGHUP, &sa, NULL); sigemptyset(&ss); sa.sa_handler = SIG_DFL; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGINT, &sa, NULL); sigemptyset(&ss); sa.sa_handler = SIG_DFL; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGUSR1, &sa, NULL); sigemptyset(&ss); sa.sa_handler = SIG_DFL; sa.sa_mask = ss; sa.sa_flags = 0; sigaction(SIGUSR2, &sa, NULL); #endif } #ifdef HAVE_FORK void interestingSignals(sigset_t *ss) { sigemptyset(ss); sigaddset(ss, SIGTERM); sigaddset(ss, SIGHUP); sigaddset(ss, SIGINT); sigaddset(ss, SIGUSR1); sigaddset(ss, SIGUSR2); } #endif void timeToSleep(struct timeval *time) { if(!timeEventQueue) { time->tv_sec = ~0L; time->tv_usec = ~0L; } else { *time = timeEventQueue->time; } } static TimeEventHandlerPtr enqueueTimeEvent(TimeEventHandlerPtr event) { TimeEventHandlerPtr otherevent; /* We try to optimise two cases -- the event happens very soon, or it happens after most of the other events. */ if(timeEventQueue == NULL || timeval_cmp(&event->time, &timeEventQueue->time) < 0) { /* It's the first event */ event->next = timeEventQueue; event->previous = NULL; if(timeEventQueue) { timeEventQueue->previous = event; } else { timeEventQueueLast = event; } timeEventQueue = event; } else if(timeval_cmp(&event->time, &timeEventQueueLast->time) >= 0) { /* It's the last one */ event->next = NULL; event->previous = timeEventQueueLast; timeEventQueueLast->next = event; timeEventQueueLast = event; } else { /* Walk from the end */ otherevent = timeEventQueueLast; while(otherevent->previous && timeval_cmp(&event->time, &otherevent->previous->time) < 0) { otherevent = otherevent->previous; } event->next = otherevent; event->previous = otherevent->previous; if(otherevent->previous) { otherevent->previous->next = event; } else { timeEventQueue = event; } otherevent->previous = event; } return event; } TimeEventHandlerPtr scheduleTimeEvent(int seconds, int (*handler)(TimeEventHandlerPtr), int dsize, void *data) { struct timeval when; TimeEventHandlerPtr event; if(seconds >= 0) { when = current_time; when.tv_sec += seconds; } else { when.tv_sec = 0; when.tv_usec = 0; } event = malloc(sizeof(TimeEventHandlerRec) - 1 + dsize); if(event == NULL) { do_log(L_ERROR, "Couldn't allocate time event handler -- " "discarding all objects.\n"); exitFlag = 2; return NULL; } event->time = when; event->handler = handler; /* Let the compiler optimise the common case */ if(dsize == sizeof(void*)) memcpy(event->data, data, sizeof(void*)); else if(dsize > 0) memcpy(event->data, data, dsize); return enqueueTimeEvent(event); } void cancelTimeEvent(TimeEventHandlerPtr event) { if(event == timeEventQueue) timeEventQueue = event->next; if(event == timeEventQueueLast) timeEventQueueLast = event->previous; if(event->next) event->next->previous = event->previous; if(event->previous) event->previous->next = event->next; free(event); } int allocateFdEventNum(int fd) { int i; if(fdEventNum < fdEventSize) { i = fdEventNum; fdEventNum++; } else { struct pollfd *new_poll_fds; FdEventHandlerPtr *new_fdEvents, *new_fdEventsLast; int new_size = 3 * fdEventSize / 2 + 1; new_poll_fds = realloc(poll_fds, new_size * sizeof(struct pollfd)); if(!new_poll_fds) return -1; new_fdEvents = realloc(fdEvents, new_size * sizeof(FdEventHandlerPtr)); if(!new_fdEvents) return -1; new_fdEventsLast = realloc(fdEventsLast, new_size * sizeof(FdEventHandlerPtr)); if(!new_fdEventsLast) return -1; poll_fds = new_poll_fds; fdEvents = new_fdEvents; fdEventsLast = new_fdEventsLast; fdEventSize = new_size; i = fdEventNum; fdEventNum++; } poll_fds[i].fd = fd; poll_fds[i].events = POLLERR | POLLHUP | POLLNVAL; poll_fds[i].revents = 0; fdEvents[i] = NULL; fdEventsLast[i] = NULL; fds_invalid = 1; return i; } void deallocateFdEventNum(int i) { if(i < fdEventNum - 1) { memmove(&poll_fds[i], &poll_fds[i + 1], (fdEventNum - i - 1) * sizeof(struct pollfd)); memmove(&fdEvents[i], &fdEvents[i + 1], (fdEventNum - i - 1) * sizeof(FdEventHandlerPtr)); memmove(&fdEventsLast[i], &fdEventsLast[i + 1], (fdEventNum - i - 1) * sizeof(FdEventHandlerPtr)); } fdEventNum--; fds_invalid = 1; } FdEventHandlerPtr makeFdEvent(int fd, int poll_events, int (*handler)(int, FdEventHandlerPtr), int dsize, void *data) { FdEventHandlerPtr event; event = malloc(sizeof(FdEventHandlerRec) - 1 + dsize); if(event == NULL) { do_log(L_ERROR, "Couldn't allocate fd event handler -- " "discarding all objects.\n"); exitFlag = 2; return NULL; } event->fd = fd; event->poll_events = poll_events; event->handler = handler; /* Let the compiler optimise the common cases */ if(dsize == sizeof(void*)) memcpy(event->data, data, sizeof(void*)); else if(dsize == sizeof(StreamRequestRec)) memcpy(event->data, data, sizeof(StreamRequestRec)); else if(dsize > 0) memcpy(event->data, data, dsize); return event; } FdEventHandlerPtr registerFdEventHelper(FdEventHandlerPtr event) { int i; int fd = event->fd; for(i = 0; i < fdEventNum; i++) if(poll_fds[i].fd == fd) break; if(i >= fdEventNum) i = allocateFdEventNum(fd); if(i < 0) { free(event); return NULL; } event->next = NULL; event->previous = fdEventsLast[i]; if(fdEvents[i] == NULL) { fdEvents[i] = event; } else { fdEventsLast[i]->next = event; } fdEventsLast[i] = event; poll_fds[i].events |= event->poll_events; return event; } FdEventHandlerPtr registerFdEvent(int fd, int poll_events, int (*handler)(int, FdEventHandlerPtr), int dsize, void *data) { FdEventHandlerPtr event; event = makeFdEvent(fd, poll_events, handler, dsize, data); if(event == NULL) return NULL; return registerFdEventHelper(event); } static int recomputePollEvents(FdEventHandlerPtr event) { int pe = 0; while(event) { pe |= event->poll_events; event = event->next; } return pe | POLLERR | POLLHUP | POLLNVAL; } static void unregisterFdEventI(FdEventHandlerPtr event, int i) { assert(i < fdEventNum && poll_fds[i].fd == event->fd); if(fdEvents[i] == event) { assert(!event->previous); fdEvents[i] = event->next; } else { event->previous->next = event->next; } if(fdEventsLast[i] == event) { assert(!event->next); fdEventsLast[i] = event->previous; } else { event->next->previous = event->previous; } free(event); if(fdEvents[i] == NULL) { deallocateFdEventNum(i); } else { poll_fds[i].events = recomputePollEvents(fdEvents[i]) | POLLERR | POLLHUP | POLLNVAL; } } void unregisterFdEvent(FdEventHandlerPtr event) { int i; for(i = 0; i < fdEventNum; i++) { if(poll_fds[i].fd == event->fd) { unregisterFdEventI(event, i); return; } } abort(); } void runTimeEventQueue() { TimeEventHandlerPtr event; int done; while(timeEventQueue && timeval_cmp(&timeEventQueue->time, ¤t_time) <= 0) { event = timeEventQueue; timeEventQueue = event->next; if(timeEventQueue) timeEventQueue->previous = NULL; else timeEventQueueLast = NULL; done = event->handler(event); assert(done); free(event); } } static FdEventHandlerPtr findEventHelper(int revents, FdEventHandlerPtr events) { FdEventHandlerPtr event = events; while(event) { if(revents & event->poll_events) return event; event = event->next; } return NULL; } static FdEventHandlerPtr findEvent(int revents, FdEventHandlerPtr events) { FdEventHandlerPtr event; assert(!(revents & POLLNVAL)); if((revents & POLLHUP) || (revents & POLLERR)) { event = findEventHelper(POLLOUT, events); if(event) return event; event = findEventHelper(POLLIN, events); if(event) return event; return NULL; } if(revents & POLLOUT) { event = findEventHelper(POLLOUT, events); if(event) return event; } if(revents & POLLIN) { event = findEventHelper(POLLIN, events); if(event) return event; } return NULL; } typedef struct _FdEventHandlerPoke { int fd; int what; int status; } FdEventHandlerPokeRec, *FdEventHandlerPokePtr; static int pokeFdEventHandler(TimeEventHandlerPtr tevent) { FdEventHandlerPokePtr poke = (FdEventHandlerPokePtr)tevent->data; int fd = poke->fd; int what = poke->what; int status = poke->status; int done; FdEventHandlerPtr event, next; int i; for(i = 0; i < fdEventNum; i++) { if(poll_fds[i].fd == fd) break; } if(i >= fdEventNum) return 1; event = fdEvents[i]; while(event) { next = event->next; if(event->poll_events & what) { done = event->handler(status, event); if(done) { if(fds_invalid) unregisterFdEvent(event); else unregisterFdEventI(event, i); } if(fds_invalid) break; } event = next; } return 1; } void pokeFdEvent(int fd, int status, int what) { TimeEventHandlerPtr handler; FdEventHandlerPokeRec poke; poke.fd = fd; poke.status = status; poke.what = what; handler = scheduleTimeEvent(0, pokeFdEventHandler, sizeof(poke), &poke); if(!handler) { do_log(L_ERROR, "Couldn't allocate handler.\n"); } } int workToDo() { struct timeval sleep_time; int rc; if(exitFlag) return 1; timeToSleep(&sleep_time); gettimeofday(¤t_time, NULL); if(timeval_cmp(&sleep_time, ¤t_time) <= 0) return 1; rc = poll(poll_fds, fdEventNum, 0); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't poll"); return 1; } return(rc >= 1); } void eventLoop() { struct timeval sleep_time, timeout; int rc, i, done, n; FdEventHandlerPtr event; int fd0; gettimeofday(¤t_time, NULL); while(1) { again: if(exitFlag) { if(exitFlag < 3) reopenLog(); if(exitFlag >= 2) { discardObjects(1, 0); if(exitFlag >= 3) return; free_chunk_arenas(); } else { writeoutObjects(1); } initForbidden(); exitFlag = 0; } timeToSleep(&sleep_time); if(sleep_time.tv_sec == -1) { rc = poll(poll_fds, fdEventNum, diskIsClean ? -1 : idleTime * 1000); } else if(timeval_cmp(&sleep_time, ¤t_time) <= 0) { runTimeEventQueue(); continue; } else { gettimeofday(¤t_time, NULL); if(timeval_cmp(&sleep_time, ¤t_time) <= 0) { runTimeEventQueue(); continue; } else { int t; timeval_minus(&timeout, &sleep_time, ¤t_time); t = timeout.tv_sec * 1000 + (timeout.tv_usec + 999) / 1000; rc = poll(poll_fds, fdEventNum, diskIsClean ? t : MIN(idleTime * 1000, t)); } } gettimeofday(¤t_time, NULL); if(rc < 0) { if(errno == EINTR) { continue; } else if(errno == ENOMEM) { free_chunk_arenas(); do_log(L_ERROR, "Couldn't poll: out of memory. " "Sleeping for one second.\n"); sleep(1); } else { do_log_error(L_ERROR, errno, "Couldn't poll"); exitFlag = 3; } continue; } if(rc == 0) { if(!diskIsClean) { timeToSleep(&sleep_time); if(timeval_cmp(&sleep_time, ¤t_time) > 0) writeoutObjects(0); } continue; } /* Rather than tracking all changes to the in-memory cache, we assume that something changed whenever we see any activity. */ diskIsClean = 0; fd0 = (current_time.tv_usec ^ (current_time.tv_usec >> 16)) % fdEventNum; n = rc; for(i = 0; i < fdEventNum; i++) { int j = (i + fd0) % fdEventNum; if(n <= 0) break; if(poll_fds[j].revents) { n--; event = findEvent(poll_fds[j].revents, fdEvents[j]); if(!event) continue; done = event->handler(0, event); if(done) { if(fds_invalid) unregisterFdEvent(event); else unregisterFdEventI(event, j); } if(fds_invalid) { fds_invalid = 0; goto again; } } } } } void initCondition(ConditionPtr condition) { condition->handlers = NULL; } ConditionPtr makeCondition(void) { ConditionPtr condition; condition = malloc(sizeof(ConditionRec)); if(condition == NULL) return NULL; initCondition(condition); return condition; } ConditionHandlerPtr conditionWait(ConditionPtr condition, int (*handler)(int, ConditionHandlerPtr), int dsize, void *data) { ConditionHandlerPtr chandler; assert(!in_signalCondition); chandler = malloc(sizeof(ConditionHandlerRec) - 1 + dsize); if(!chandler) return NULL; chandler->condition = condition; chandler->handler = handler; /* Let the compiler optimise the common case */ if(dsize == sizeof(void*)) memcpy(chandler->data, data, sizeof(void*)); else if(dsize > 0) memcpy(chandler->data, data, dsize); if(condition->handlers) condition->handlers->previous = chandler; chandler->next = condition->handlers; chandler->previous = NULL; condition->handlers = chandler; return chandler; } void unregisterConditionHandler(ConditionHandlerPtr handler) { ConditionPtr condition = handler->condition; assert(!in_signalCondition); if(condition->handlers == handler) condition->handlers = condition->handlers->next; if(handler->next) handler->next->previous = handler->previous; if(handler->previous) handler->previous->next = handler->next; free(handler); } void abortConditionHandler(ConditionHandlerPtr handler) { int done; done = handler->handler(-1, handler); assert(done); unregisterConditionHandler(handler); } void signalCondition(ConditionPtr condition) { ConditionHandlerPtr handler; int done; assert(!in_signalCondition); in_signalCondition++; handler = condition->handlers; while(handler) { ConditionHandlerPtr next = handler->next; done = handler->handler(0, handler); if(done) { if(handler == condition->handlers) condition->handlers = next; if(next) next->previous = handler->previous; if(handler->previous) handler->previous->next = next; else condition->handlers = next; free(handler); } handler = next; } in_signalCondition--; } void polipoExit() { exitFlag = 3; } polipo-1.0.4.1/dns.h0000644000175000017500000000336011331407220013417 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ extern char *nameServer; extern int useGethostbyname; #define DNS_A 0 #define DNS_CNAME 1 typedef struct _GethostbynameRequest { AtomPtr name; AtomPtr addr; AtomPtr error_message; int count; ObjectPtr object; int (*handler)(int, struct _GethostbynameRequest*); void *data; } GethostbynameRequestRec, *GethostbynameRequestPtr; /* Note that this requires no alignment */ typedef struct _HostAddress { char af; /* 4 or 6 */ char data[16]; } HostAddressRec, *HostAddressPtr; void preinitDns(void); void initDns(void); int do_gethostbyname(char *name, int count, int (*handler)(int, GethostbynameRequestPtr), void *data); polipo-1.0.4.1/dns.c0000644000175000017500000014230611331407220013416 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifndef NO_STANDARD_RESOLVER #ifndef NO_FANCY_RESOLVER int dnsUseGethostbyname = 1; #else const int dnsUseGethostbyname = 3; #endif #else #ifndef NO_FANCY_RESOLVER const int dnsUseGethostbyname = 0; #else #error use no resolver at all? #endif #endif #ifndef NO_FANCY_RESOLVER AtomPtr dnsNameServer = NULL; int dnsMaxTimeout = 60; #endif #ifndef NO_STANDARD_RESOLVER int dnsGethostbynameTtl = 1200; #endif int dnsNegativeTtl = 120; #ifdef HAVE_IPv6 int dnsQueryIPv6 = 2; #else const int dnsQueryIPv6 = 0; #endif typedef struct _DnsQuery { unsigned id; AtomPtr name; ObjectPtr object; AtomPtr inet4, inet6; time_t ttl4, ttl6; time_t time; int timeout; TimeEventHandlerPtr timeout_handler; struct _DnsQuery *next; } DnsQueryRec, *DnsQueryPtr; union { struct sockaddr sa; struct sockaddr_in sin; #ifdef HAVE_IPv6 struct sockaddr_in6 sin6; #endif } nameserverAddress_storage; #ifndef NO_FANCY_RESOLVER static AtomPtr atomLocalhost, atomLocalhostDot; #define nameserverAddress nameserverAddress_storage.sa static DnsQueryPtr inFlightDnsQueries; static DnsQueryPtr inFlightDnsQueriesLast; #endif static int really_do_gethostbyname(AtomPtr name, ObjectPtr object); static int really_do_dns(AtomPtr name, ObjectPtr object); #ifndef NO_FANCY_RESOLVER static int stringToLabels(char *buf, int offset, int n, char *string); static int labelsToString(char *buf, int offset, int n, char *d, int m, int *j_return); static int dnsBuildQuery(int id, char *buf, int offset, int n, AtomPtr name, int af); static int dnsReplyHandler(int abort, FdEventHandlerPtr event); static int dnsReplyId(char *buf, int offset, int n, int *id_return); static int dnsDecodeReply(char *buf, int offset, int n, int *id_return, AtomPtr *name_return, AtomPtr *value_return, int *af_return, unsigned *ttl_return); static int dnsHandler(int status, ConditionHandlerPtr chandler); static int dnsGethostbynameFallback(int id, AtomPtr message); static int sendQuery(DnsQueryPtr query); static int idSeed; #endif #ifndef NO_FANCY_RESOLVER static int parseResolvConf(char *filename) { FILE *f; char buf[512]; char *p, *q; int n; AtomPtr nameserver = NULL; f = fopen(filename, "r"); if(f == NULL) { do_log_error(L_ERROR, errno, "DNS: couldn't open %s", filename); return 0; } while(1) { p = fgets(buf, 512, f); if(p == NULL) break; n = strlen(buf); if(buf[n - 1] != '\n') { int c; do_log(L_WARN, "DNS: overly long line in %s -- skipping.\n", filename); do { c = fgetc(f); if(c == EOF) break; } while(c != '\n'); if(c == EOF) break; } while(*p == ' ' || *p == '\t') p++; if(strcasecmp_n("nameserver", p, 10) != 0) continue; p += 10; while(*p == ' ' || *p == '\t') p++; q = p; while(*q == '.' || *q == ':' || digit(*q) || letter(*q)) q++; if(*q != ' ' && *q != '\t' && *q != '\r' && *q != '\n') { do_log(L_WARN, "DNS: couldn't parse line in %s -- skipping.\n", filename); continue; } nameserver = internAtomLowerN(p, q - p); break; } fclose(f); if(nameserver) { dnsNameServer = nameserver; return 1; } else { return 0; } } #endif void preinitDns() { #ifdef HAVE_IPv6 int fd; #endif assert(sizeof(struct in_addr) == 4); #ifdef HAVE_IPv6 assert(sizeof(struct in6_addr) == 16); #endif #ifndef NO_STANDARD_RESOLVER CONFIG_VARIABLE(dnsGethostbynameTtl, CONFIG_TIME, "TTL for gethostbyname addresses."); #endif #ifdef HAVE_IPv6 fd = socket(PF_INET6, SOCK_STREAM, 0); if(fd < 0) { if(errno == EPROTONOSUPPORT || errno == EAFNOSUPPORT) { dnsQueryIPv6 = 0; } else { do_log_error(L_WARN, errno, "DNS: couldn't create socket"); } } else { close(fd); } #endif #ifndef NO_FANCY_RESOLVER parseResolvConf("/etc/resolv.conf"); if(dnsNameServer == NULL || dnsNameServer->string[0] == '\0') dnsNameServer = internAtom("127.0.0.1"); CONFIG_VARIABLE(dnsMaxTimeout, CONFIG_TIME, "Max timeout for DNS queries."); CONFIG_VARIABLE(dnsNegativeTtl, CONFIG_TIME, "TTL for negative DNS replies with no TTL."); CONFIG_VARIABLE(dnsNameServer, CONFIG_ATOM_LOWER, "The name server to use."); #ifndef NO_STANDARD_RESOLVER CONFIG_VARIABLE(dnsUseGethostbyname, CONFIG_TETRASTATE, "Use the system resolver."); #endif #endif #ifdef HAVE_IPv6 CONFIG_VARIABLE(dnsQueryIPv6, CONFIG_TETRASTATE, "Query for IPv6 addresses."); #endif } void initDns() { #ifndef NO_FANCY_RESOLVER int rc; struct timeval t; struct sockaddr_in *sin = (struct sockaddr_in*)&nameserverAddress; #ifdef HAVE_IPv6 struct sockaddr_in6 *sin6 = (struct sockaddr_in6*)&nameserverAddress; #endif atomLocalhost = internAtom("localhost"); atomLocalhostDot = internAtom("localhost."); inFlightDnsQueries = NULL; inFlightDnsQueriesLast = NULL; gettimeofday(&t, NULL); idSeed = t.tv_usec & 0xFFFF; sin->sin_family = AF_INET; sin->sin_port = htons(53); rc = inet_aton(dnsNameServer->string, &sin->sin_addr); #ifdef HAVE_IPv6 if(rc != 1) { sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(53); rc = inet_pton(AF_INET6, dnsNameServer->string, &sin6->sin6_addr); } #endif if(rc != 1) { do_log(L_ERROR, "DNS: couldn't parse name server %s.\n", dnsNameServer->string); exit(1); } #endif } int do_gethostbyname(char *origname, int count, int (*handler)(int, GethostbynameRequestPtr), void *data) { ObjectPtr object; int n = strlen(origname); AtomPtr name; GethostbynameRequestRec request; int done, rc; memset(&request, 0, sizeof(request)); request.name = NULL; request.addr = NULL; request.error_message = NULL; request.count = count; request.handler = handler; request.data = data; if(n <= 0 || n > 131) { if(n <= 0) { request.error_message = internAtom("empty name"); do_log(L_ERROR, "Empty DNS name.\n"); done = handler(-EINVAL, &request); } else { request.error_message = internAtom("name too long"); do_log(L_ERROR, "DNS name too long.\n"); done = handler(-ENAMETOOLONG, &request); } assert(done); releaseAtom(request.error_message); return 1; } if(origname[n - 1] == '.') n--; name = internAtomLowerN(origname, n); if(name == NULL) { request.error_message = internAtom("couldn't allocate name"); do_log(L_ERROR, "Couldn't allocate DNS name.\n"); done = handler(-ENOMEM, &request); assert(done); releaseAtom(request.error_message); return 1; } request.name = name; request.addr = NULL; request.error_message = NULL; request.count = count; request.object = NULL; request.handler = handler; request.data = data; object = findObject(OBJECT_DNS, name->string, name->length); if(object == NULL || objectMustRevalidate(object, NULL)) { if(object) { privatiseObject(object, 0); releaseObject(object); } object = makeObject(OBJECT_DNS, name->string, name->length, 1, 0, NULL, NULL); if(object == NULL) { request.error_message = internAtom("Couldn't allocate object"); do_log(L_ERROR, "Couldn't allocate DNS object.\n"); done = handler(-ENOMEM, &request); assert(done); releaseAtom(name); releaseAtom(request.error_message); return 1; } } if((object->flags & (OBJECT_INITIAL | OBJECT_INPROGRESS)) == OBJECT_INITIAL) { if(dnsUseGethostbyname >= 3) rc = really_do_gethostbyname(name, object); else rc = really_do_dns(name, object); if(rc < 0) { assert(!(object->flags & (OBJECT_INITIAL | OBJECT_INPROGRESS))); goto fail; } } if(dnsUseGethostbyname >= 3) assert(!(object->flags & OBJECT_INITIAL)); #ifndef NO_FANCY_RESOLVER if(object->flags & OBJECT_INITIAL) { ConditionHandlerPtr chandler; assert(object->flags & OBJECT_INPROGRESS); request.object = object; chandler = conditionWait(&object->condition, dnsHandler, sizeof(request), &request); if(chandler == NULL) { rc = ENOMEM; goto fail; } return 1; } #endif if(object->headers && object->headers->length > 0) { if(object->headers->string[0] == DNS_A) assert(((object->headers->length - 1) % sizeof(HostAddressRec)) == 0); else assert(object->headers->string[0] == DNS_CNAME); request.addr = retainAtom(object->headers); } else if(object->message) { request.error_message = retainAtom(object->message); } releaseObject(object); if(request.addr && request.addr->length > 0) done = handler(1, &request); else done = handler(-EDNS_HOST_NOT_FOUND, &request); assert(done); releaseAtom(request.addr); request.addr = NULL; releaseAtom(request.name); request.name = NULL; releaseAtom(request.error_message); request.error_message = NULL; return 1; fail: releaseNotifyObject(object); done = handler(-errno, &request); assert(done); releaseAtom(name); return 1; } static int dnsDelayedErrorNotifyHandler(TimeEventHandlerPtr event) { int done; GethostbynameRequestRec request = *(GethostbynameRequestPtr)event->data; done = request.handler(-EDNS_HOST_NOT_FOUND, &request); assert(done); releaseAtom(request.name); request.name = NULL; releaseAtom(request.addr); request.addr = NULL; releaseAtom(request.error_message); request.error_message = NULL; return 1; } static int dnsDelayedDoneNotifyHandler(TimeEventHandlerPtr event) { int done; GethostbynameRequestRec request = *(GethostbynameRequestPtr)event->data; done = request.handler(1, &request); assert(done); releaseAtom(request.name); request.name = NULL; releaseAtom(request.addr); request.addr = NULL; releaseAtom(request.error_message); request.error_message = NULL; return 1; } static int dnsDelayedNotify(int error, GethostbynameRequestPtr request) { TimeEventHandlerPtr handler; if(error) handler = scheduleTimeEvent(0, dnsDelayedErrorNotifyHandler, sizeof(*request), request); else handler = scheduleTimeEvent(0, dnsDelayedDoneNotifyHandler, sizeof(*request), request); if(handler == NULL) { do_log(L_ERROR, "Couldn't schedule DNS notification.\n"); return -1; } return 1; } #ifdef HAVE_IPv6 AtomPtr rfc2732(AtomPtr name) { char buf[38]; int rc; AtomPtr a = NULL; if(name->length < 38 && name->string[0] == '[' && name->string[name->length - 1] == ']') { struct in6_addr in6a; memcpy(buf, name->string + 1, name->length - 2); buf[name->length - 2] = '\0'; rc = inet_pton(AF_INET6, buf, &in6a); if(rc == 1) { char s[1 + sizeof(HostAddressRec)]; memset(s, 0, sizeof(s)); s[0] = DNS_A; s[1] = 6; memcpy(s + 2, &in6a, 16); a = internAtomN(s, 1 + sizeof(HostAddressRec)); if(a == NULL) return NULL; } } return a; } /* Used for sorting host addresses depending on the value of dnsQueryIPv6 */ int compare_hostaddr(const void *av, const void *bv) { const HostAddressRec *a = av, *b = bv; int r; if(a->af == 4) { if(b->af == 4) r = 0; else r = -1; } else { if(b->af == 6) r = 0; else r = 1; } if(dnsQueryIPv6 >= 2) return -r; else return r; } #ifndef NO_STANDARD_RESOLVER static int really_do_gethostbyname(AtomPtr name, ObjectPtr object) { struct addrinfo *ai, *entry, hints; int rc; int error, i; char buf[1024]; AtomPtr a; a = rfc2732(name); if(a) { object->headers = a; object->age = current_time.tv_sec; object->expires = current_time.tv_sec + 240; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); notifyObject(object); return 0; } memset(&hints, 0, sizeof(hints)); hints.ai_protocol = IPPROTO_TCP; if(dnsQueryIPv6 <= 0) hints.ai_family = AF_INET; else if(dnsQueryIPv6 >= 3) hints.ai_family = AF_INET6; rc = getaddrinfo(name->string, NULL, &hints, &ai); switch(rc) { case 0: error = 0; break; case EAI_FAMILY: #ifdef EAI_ADDRFAMILY case EAI_ADDRFAMILY: #endif case EAI_SOCKTYPE: error = EAFNOSUPPORT; break; case EAI_BADFLAGS: error = EINVAL; break; case EAI_SERVICE: error = EDNS_NO_RECOVERY; break; #ifdef EAI_NONAME case EAI_NONAME: #endif #ifdef EAI_NODATA case EAI_NODATA: #endif error = EDNS_NO_ADDRESS; break; case EAI_FAIL: error = EDNS_NO_RECOVERY; break; case EAI_AGAIN: error = EDNS_TRY_AGAIN; break; #ifdef EAI_MEMORY case EAI_MEMORY: error = ENOMEM; break; #endif case EAI_SYSTEM: error = errno; break; default: error = EUNKNOWN; } if(error == EDNS_NO_ADDRESS) { object->headers = NULL; object->age = current_time.tv_sec; object->expires = current_time.tv_sec + dnsNegativeTtl; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); notifyObject(object); return 0; } else if(error) { do_log_error(L_ERROR, error, "Getaddrinfo failed"); object->flags &= ~OBJECT_INPROGRESS; abortObject(object, 404, internAtomError(error, "Getaddrinfo failed")); notifyObject(object); return 0; } entry = ai; buf[0] = DNS_A; i = 0; while(entry) { HostAddressRec host; int host_valid = 0; if(entry->ai_family == AF_INET && entry->ai_protocol == IPPROTO_TCP) { if(dnsQueryIPv6 < 3) { host.af = 4; memset(host.data, 0, sizeof(host.data)); memcpy(&host.data, &((struct sockaddr_in*)entry->ai_addr)->sin_addr, 4); host_valid = 1; } } else if(entry->ai_family == AF_INET6 && entry->ai_protocol == IPPROTO_TCP) { if(dnsQueryIPv6 > 0) { host.af = 6; memset(&host.data, 0, sizeof(host.data)); memcpy(&host.data, &((struct sockaddr_in6*)entry->ai_addr)->sin6_addr, 16); host_valid = 1; } } if(host_valid) { if(i >= 1024 / sizeof(HostAddressRec) - 2) { do_log(L_ERROR, "Too many addresses for host %s\n", name->string); break; } memcpy(buf + 1 + i * sizeof(HostAddressRec), &host, sizeof(HostAddressRec)); i++; } entry = entry->ai_next; } freeaddrinfo(ai); if(i == 0) { do_log(L_ERROR, "Getaddrinfo returned no useful addresses\n"); object->flags &= ~OBJECT_INPROGRESS; abortObject(object, 404, internAtom("Getaddrinfo returned no useful addresses")); notifyObject(object); return 0; } if(1 <= dnsQueryIPv6 && dnsQueryIPv6 <= 2) qsort(buf + 1, i, sizeof(HostAddressRec), compare_hostaddr); a = internAtomN(buf, 1 + i * sizeof(HostAddressRec)); if(a == NULL) { object->flags &= ~OBJECT_INPROGRESS; abortObject(object, 501, internAtom("Couldn't allocate address")); notifyObject(object); return 0; } object->headers = a; object->age = current_time.tv_sec; object->expires = current_time.tv_sec + dnsGethostbynameTtl; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); notifyObject(object); return 0; } #endif #else #ifndef NO_STANDARD_RESOLVER static int really_do_gethostbyname(AtomPtr name, ObjectPtr object) { struct hostent *host; char *s; AtomPtr a; int i, j; int error; host = gethostbyname(name->string); if(host == NULL) { switch(h_errno) { case HOST_NOT_FOUND: error = EDNS_HOST_NOT_FOUND; break; case NO_ADDRESS: error = EDNS_NO_ADDRESS; break; case NO_RECOVERY: error = EDNS_NO_RECOVERY; break; case TRY_AGAIN: error = EDNS_TRY_AGAIN; break; default: error = EUNKNOWN; break; } if(error == EDNS_HOST_NOT_FOUND) { object->headers = NULL; object->age = current_time.tv_sec; object->expires = current_time.tv_sec + dnsNegativeTtl; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); object->flags &= ~OBJECT_INPROGRESS; notifyObject(object); return 0; } else { do_log_error(L_ERROR, error, "Gethostbyname failed"); abortObject(object, 404, internAtomError(error, "Gethostbyname failed")); object->flags &= ~OBJECT_INPROGRESS; notifyObject(object); return 0; } } if(host->h_addrtype != AF_INET) { do_log(L_ERROR, "Address is not AF_INET.\n"); object->flags &= ~OBJECT_INPROGRESS; abortObject(object, 404, internAtom("Address is not AF_INET")); notifyObject(object); return -1; } if(host->h_length != sizeof(struct in_addr)) { do_log(L_ERROR, "Address size inconsistent.\n"); object->flags &= ~OBJECT_INPROGRESS; abortObject(object, 404, internAtom("Address size inconsistent")); notifyObject(object); return 0; } i = 0; while(host->h_addr_list[i] != NULL) i++; s = malloc(1 + i * sizeof(HostAddressRec)); if(s == NULL) { a = NULL; } else { memset(s, 0, 1 + i * sizeof(HostAddressRec)); s[0] = DNS_A; for(j = 0; j < i; j++) { s[j * sizeof(HostAddressRec) + 1] = 4; memcpy(&s[j * sizeof(HostAddressRec) + 2], host->h_addr_list[j], sizeof(struct in_addr)); } a = internAtomN(s, i * sizeof(HostAddressRec) + 1); free(s); } if(!a) { object->flags &= ~OBJECT_INPROGRESS; abortObject(object, 501, internAtom("Couldn't allocate address")); notifyObject(object); return 0; } object->headers = a; object->age = current_time.tv_sec; object->expires = current_time.tv_sec + dnsGethostbynameTtl; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); notifyObject(object); return 0; } #endif #endif #ifdef NO_STANDARD_RESOLVER static int really_do_gethostbyname(AtomPtr name, ObjectPtr object) { abort(); } #endif #ifndef NO_FANCY_RESOLVER static int dnsSocket = -1; static FdEventHandlerPtr dnsSocketHandler = NULL; static int dnsHandler(int status, ConditionHandlerPtr chandler) { GethostbynameRequestRec request = *(GethostbynameRequestPtr)chandler->data; ObjectPtr object = request.object; assert(!(object->flags & OBJECT_INPROGRESS)); if(object->headers) { request.addr = retainAtom(object->headers); dnsDelayedNotify(0, &request); } else { if(object->message) request.error_message = retainAtom(object->message); dnsDelayedNotify(1, &request); } releaseObject(object); return 1; } static int queryInFlight(DnsQueryPtr query) { DnsQueryPtr other; other = inFlightDnsQueries; while(other) { if(other == query) return 1; other = other->next; } return 0; } static void removeQuery(DnsQueryPtr query) { DnsQueryPtr previous; if(query == inFlightDnsQueries) { inFlightDnsQueries = query->next; if(inFlightDnsQueries == NULL) inFlightDnsQueriesLast = NULL; } else { previous = inFlightDnsQueries; while(previous->next) { if(previous->next == query) break; previous = previous->next; } assert(previous->next != NULL); previous->next = query->next; if(previous->next == NULL) inFlightDnsQueriesLast = previous; } } static void insertQuery(DnsQueryPtr query) { if(inFlightDnsQueriesLast) inFlightDnsQueriesLast->next = query; else inFlightDnsQueries = query; inFlightDnsQueriesLast = query; } static DnsQueryPtr findQuery(int id, AtomPtr name) { DnsQueryPtr query; query = inFlightDnsQueries; while(query) { if(query->id == id && (name == NULL || query->name == name)) return query; query = query->next; } return NULL; } static int dnsTimeoutHandler(TimeEventHandlerPtr event) { DnsQueryPtr query = *(DnsQueryPtr*)event->data; ObjectPtr object = query->object; int rc; /* People are reporting that this does happen. And I have no idea why. */ if(!queryInFlight(query)) { do_log(L_ERROR, "BUG: timing out martian query (%s, flags: 0x%x).\n", query->name->string, (unsigned)object->flags); return 1; } query->timeout = MAX(10, query->timeout * 2); if(query->timeout > dnsMaxTimeout) { abortObject(object, 501, internAtom("Timeout")); goto fail; } else { rc = sendQuery(query); if(rc < 0) { if(rc != -EWOULDBLOCK && rc != -EAGAIN && rc != -ENOBUFS) { abortObject(object, 501, internAtomError(-rc, "Couldn't send DNS query")); goto fail; } /* else let it timeout */ } query->timeout_handler = scheduleTimeEvent(query->timeout, dnsTimeoutHandler, sizeof(query), &query); if(query->timeout_handler == NULL) { do_log(L_ERROR, "Couldn't schedule DNS timeout handler.\n"); abortObject(object, 501, internAtom("Couldn't schedule DNS timeout handler")); goto fail; } return 1; } fail: removeQuery(query); object->flags &= ~OBJECT_INPROGRESS; if(query->inet4) releaseAtom(query->inet4); if(query->inet6) releaseAtom(query->inet6); free(query); releaseNotifyObject(object); return 1; } static int establishDnsSocket() { int rc; #ifdef HAVE_IPv6 int inet6 = (nameserverAddress.sa_family == AF_INET6); int pf = inet6 ? PF_INET6 : PF_INET; int sa_size = inet6 ? sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in); #else int pf = PF_INET; int sa_size = sizeof(struct sockaddr_in); #endif if(dnsSocket < 0) { assert(!dnsSocketHandler); dnsSocket = socket(pf, SOCK_DGRAM, 0); if(dnsSocket < 0) { do_log_error(L_ERROR, errno, "Couldn't create DNS socket"); return -errno; } rc = connect(dnsSocket, &nameserverAddress, sa_size); if(rc < 0) { CLOSE(dnsSocket); dnsSocket = -1; do_log_error(L_ERROR, errno, "Couldn't create DNS \"connection\""); return -errno; } } if(!dnsSocketHandler) { dnsSocketHandler = registerFdEvent(dnsSocket, POLLIN, dnsReplyHandler, 0, NULL); if(dnsSocketHandler == NULL) { do_log(L_ERROR, "Couldn't register DNS socket handler.\n"); CLOSE(dnsSocket); dnsSocket = -1; return -ENOMEM; } } return 1; } static int sendQuery(DnsQueryPtr query) { char buf[512]; int buflen; int rc; int af[2]; int i; if(dnsSocket < 0) return -1; if(dnsQueryIPv6 <= 0) { af[0] = 4; af[1] = 0; } else if(dnsQueryIPv6 <= 2) { af[0] = 4; af[1] = 6; } else { af[0] = 6; af[1] = 0; } for(i = 0; i < 2; i++) { if(af[i] == 0) continue; if(af[i] == 4 && query->inet4) continue; else if(af[i] == 6 && query->inet6) continue; buflen = dnsBuildQuery(query->id, buf, 0, 512, query->name, af[i]); if(buflen <= 0) { do_log(L_ERROR, "Couldn't build DNS query.\n"); return buflen; } rc = send(dnsSocket, buf, buflen, 0); if(rc < buflen) { if(rc >= 0) { do_log(L_ERROR, "Couldn't send DNS query: partial send.\n"); return -EAGAIN; } else { do_log_error(L_ERROR, errno, "Couldn't send DNS query"); return -errno; } } } return 1; } static int really_do_dns(AtomPtr name, ObjectPtr object) { int rc; DnsQueryPtr query; AtomPtr message = NULL; int id; AtomPtr a = NULL; if(a == NULL) { if(name == atomLocalhost || name == atomLocalhostDot) { char s[1 + sizeof(HostAddressRec)]; memset(s, 0, sizeof(s)); s[0] = DNS_A; s[1] = 4; s[2] = 127; s[3] = 0; s[4] = 0; s[5] = 1; a = internAtomN(s, 1 + sizeof(HostAddressRec)); if(a == NULL) { abortObject(object, 501, internAtom("Couldn't allocate address")); notifyObject(object); errno = ENOMEM; return -1; } } } if(a == NULL) { struct in_addr ina; rc = inet_aton(name->string, &ina); if(rc == 1) { char s[1 + sizeof(HostAddressRec)]; memset(s, 0, sizeof(s)); s[0] = DNS_A; s[1] = 4; memcpy(s + 2, &ina, 4); a = internAtomN(s, 1 + sizeof(HostAddressRec)); if(a == NULL) { abortObject(object, 501, internAtom("Couldn't allocate address")); notifyObject(object); errno = ENOMEM; return -1; } } } #ifdef HAVE_IPv6 if(a == NULL) a = rfc2732(name); #endif if(a) { object->headers = a; object->age = current_time.tv_sec; object->expires = current_time.tv_sec + 240; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); notifyObject(object); return 0; } rc = establishDnsSocket(); if(rc < 0) { do_log_error(L_ERROR, -rc, "Couldn't establish DNS socket.\n"); message = internAtomError(-rc, "Couldn't establish DNS socket"); goto fallback; } /* The id is used to speed up detecting replies to queries that are no longer current -- see dnsReplyHandler. */ id = (idSeed++) & 0xFFFF; query = malloc(sizeof(DnsQueryRec)); if(query == NULL) { do_log(L_ERROR, "Couldn't allocate DNS query.\n"); message = internAtom("Couldn't allocate DNS query"); goto fallback; } query->id = id; query->inet4 = NULL; query->inet6 = NULL; query->name = name; query->time = current_time.tv_sec; query->object = retainObject(object); query->timeout = 4; query->timeout_handler = NULL; query->next = NULL; query->timeout_handler = scheduleTimeEvent(query->timeout, dnsTimeoutHandler, sizeof(query), &query); if(query->timeout_handler == NULL) { do_log(L_ERROR, "Couldn't schedule DNS timeout handler.\n"); message = internAtom("Couldn't schedule DNS timeout handler"); goto free_fallback; } insertQuery(query); object->flags |= OBJECT_INPROGRESS; rc = sendQuery(query); if(rc < 0) { if(rc != -EWOULDBLOCK && rc != -EAGAIN && rc != -ENOBUFS) { object->flags &= ~OBJECT_INPROGRESS; message = internAtomError(-rc, "Couldn't send DNS query"); goto remove_fallback; } /* else let it timeout */ } releaseAtom(message); return 1; remove_fallback: removeQuery(query); free_fallback: releaseObject(query->object); cancelTimeEvent(query->timeout_handler); free(query); fallback: if(dnsUseGethostbyname >= 1) { releaseAtom(message); do_log(L_WARN, "Falling back on gethostbyname.\n"); return really_do_gethostbyname(name, object); } else { abortObject(object, 501, message); notifyObject(object); return 1; } } static int dnsReplyHandler(int abort, FdEventHandlerPtr event) { int fd = event->fd; char buf[2048]; int len, rc; ObjectPtr object; unsigned ttl = 0; AtomPtr name, value, message = NULL; int id; int af; DnsQueryPtr query; AtomPtr cname = NULL; if(abort) { dnsSocketHandler = NULL; rc = establishDnsSocket(); if(rc < 0) { do_log(L_ERROR, "Couldn't reestablish DNS socket.\n"); /* At this point, we should abort all in-flight DNS requests. Oh, well, they'll timeout anyway. */ } return 1; } len = recv(fd, buf, 2048, 0); if(len <= 0) { if(errno == EINTR || errno == EAGAIN) return 0; /* This is where we get ECONNREFUSED for an ICMP port unreachable */ do_log_error(L_ERROR, errno, "DNS: recv failed"); dnsGethostbynameFallback(-1, message); return 0; } /* This could be a late reply to a query that timed out and was resent, a reply to a query that timed out, or a reply to an AAAA query when we already got a CNAME reply to the associated A. We filter such replies straight away, without trying to parse them. */ rc = dnsReplyId(buf, 0, len, &id); if(rc < 0) { do_log(L_WARN, "Short DNS reply.\n"); return 0; } if(!findQuery(id, NULL)) { return 0; } rc = dnsDecodeReply(buf, 0, len, &id, &name, &value, &af, &ttl); if(rc < 0) { assert(value == NULL); /* We only want to fallback on gethostbyname if we received a reply that we could not understand. What about truncated replies? */ if(rc < 0) { do_log_error(L_WARN, -rc, "DNS"); if(dnsUseGethostbyname >= 2 || (dnsUseGethostbyname && (rc != -EDNS_HOST_NOT_FOUND && rc != -EDNS_NO_RECOVERY && rc != -EDNS_FORMAT))) { dnsGethostbynameFallback(id, message); return 0; } else { message = internAtomError(-rc, NULL); } } else { assert(name != NULL && id >= 0 && af >= 0); } } query = findQuery(id, name); if(query == NULL) { /* Duplicate id ? */ releaseAtom(value); releaseAtom(name); return 0; } /* We're going to use the information in this reply. If it was an error, construct an empty atom to distinguish it from information we're still waiting for. */ if(value == NULL) value = internAtom(""); again: if(af == 4) { if(query->inet4 == NULL) { query->inet4 = value; query->ttl4 = current_time.tv_sec + ttl; } else releaseAtom(value); } else if(af == 6) { if(query->inet6 == NULL) { query->inet6 = value; query->ttl6 = current_time.tv_sec + ttl; } else releaseAtom(value); } else if(af == 0) { if(query->inet4 || query->inet6) { do_log(L_WARN, "Host %s has both %s and CNAME -- " "ignoring CNAME.\n", query->name->string, query->inet4 ? "A" : "AAAA"); releaseAtom(value); value = internAtom(""); af = query->inet4 ? 4 : 6; goto again; } else { cname = value; } } if(rc >= 0 && !cname && ((dnsQueryIPv6 < 3 && query->inet4 == NULL) || (dnsQueryIPv6 > 0 && query->inet6 == NULL))) return 0; /* This query is complete */ cancelTimeEvent(query->timeout_handler); object = query->object; if(object->flags & OBJECT_INITIAL) { assert(!object->headers); if(cname) { assert(query->inet4 == NULL && query->inet6 == NULL); object->headers = cname; object->expires = current_time.tv_sec + ttl; } else if((!query->inet4 || query->inet4->length == 0) && (!query->inet6 || query->inet6->length == 0)) { releaseAtom(query->inet4); releaseAtom(query->inet6); object->expires = current_time.tv_sec + dnsNegativeTtl; abortObject(object, 500, retainAtom(message)); } else if(!query->inet4 || query->inet4->length == 0) { object->headers = query->inet6; object->expires = query->ttl6; releaseAtom(query->inet4); } else if(!query->inet6 || query->inet6->length == 0) { object->headers = query->inet4; object->expires = query->ttl4; releaseAtom(query->inet6); } else { /* need to merge results */ char buf[1024]; if(query->inet4->length + query->inet6->length > 1024) { releaseAtom(query->inet4); releaseAtom(query->inet6); abortObject(object, 500, internAtom("DNS reply too long")); } else { if(dnsQueryIPv6 <= 1) { memcpy(buf, query->inet4->string, query->inet4->length); memcpy(buf + query->inet4->length, query->inet6->string + 1, query->inet6->length - 1); } else { memcpy(buf, query->inet6->string, query->inet6->length); memcpy(buf + query->inet6->length, query->inet4->string + 1, query->inet4->length - 1); } object->headers = internAtomN(buf, query->inet4->length + query->inet6->length - 1); if(object->headers == NULL) abortObject(object, 500, internAtom("Couldn't allocate DNS atom")); } object->expires = MIN(query->ttl4, query->ttl6); } object->age = current_time.tv_sec; object->flags &= ~(OBJECT_INITIAL | OBJECT_INPROGRESS); } else { do_log(L_WARN, "DNS object ex nihilo for %s.\n", query->name->string); } removeQuery(query); free(query); releaseAtom(name); releaseAtom(message); releaseNotifyObject(object); return 0; } static int dnsGethostbynameFallback(int id, AtomPtr message) { DnsQueryPtr query, previous; ObjectPtr object; if(inFlightDnsQueries == NULL) { releaseAtom(message); return 1; } query = NULL; if(id < 0 || inFlightDnsQueries->id == id) { previous = NULL; query = inFlightDnsQueries; } else { previous = inFlightDnsQueries; while(previous->next) { if(previous->next->id == id) { query = previous->next; break; } previous = previous->next; } if(!query) { previous = NULL; query = inFlightDnsQueries; } } if(previous == NULL) { inFlightDnsQueries = query->next; if(inFlightDnsQueries == NULL) inFlightDnsQueriesLast = NULL; } else { previous->next = query->next; if(query->next == NULL) inFlightDnsQueriesLast = NULL; } object = makeObject(OBJECT_DNS, query->name->string, query->name->length, 1, 0, NULL, NULL); if(!object) { do_log(L_ERROR, "Couldn't make DNS object.\n"); releaseAtom(query->name); releaseAtom(message); releaseObject(query->object); cancelTimeEvent(query->timeout_handler); free(query); return -1; } if(dnsUseGethostbyname >= 1) { releaseAtom(message); do_log(L_WARN, "Falling back to using system resolver.\n"); really_do_gethostbyname(retainAtom(query->name), object); } else { releaseAtom(object->message); object->message = message; object->flags &= ~OBJECT_INPROGRESS; releaseNotifyObject(object); } cancelTimeEvent(query->timeout_handler); releaseAtom(query->name); if(query->inet4) releaseAtom(query->inet4); if(query->inet6) releaseAtom(query->inet6); releaseObject(query->object); free(query); return 1; } static int stringToLabels(char *buf, int offset, int n, char *string) { int i = offset; int j = 0, k = 0; while(1) { while(string[k] != '.' && string[k] != '\0') k++; if(k >= j + 256) return -1; buf[i] = (unsigned char)(k - j); i++; if(i >= n) return -1; while(j < k) { buf[i] = string[j]; i++; j++; if(i >= n) return -1; } if(string[j] == '\0') { buf[i] = '\0'; i++; if(i >= n) return -1; break; } j++; k++; } return i; } #ifdef UNALIGNED_ACCESS #define DO_NTOHS(_d, _s) _d = ntohs(*(short*)(_s)); #define DO_NTOHL(_d, _s) _d = ntohl(*(unsigned*)(_s)) #define DO_HTONS(_d, _s) *(short*)(_d) = htons(_s); #define DO_HTONL(_d, _s) *(unsigned*)(_d) = htonl(_s) #else #define DO_NTOHS(_d, _s) \ do { short _dd; \ memcpy(&(_dd), (_s), sizeof(short)); \ _d = ntohs(_dd); } while(0) #define DO_NTOHL(_d, _s) \ do { unsigned _dd; \ memcpy(&(_dd), (_s), sizeof(unsigned)); \ _d = ntohl(_dd); } while(0) #define DO_HTONS(_d, _s) \ do { unsigned short _dd; \ _dd = htons(_s); \ memcpy((_d), &(_dd), sizeof(unsigned short)); } while(0); #define DO_HTONL(_d, _s) \ do { unsigned _dd; \ _dd = htonl(_s); \ memcpy((_d), &(_dd), sizeof(unsigned)); } while(0); #endif static int labelsToString(char *buf, int offset, int n, char *d, int m, int *j_return) { int i = offset, j, k; int ll; j = 0; while(1) { if(i >= n) return -1; ll = *(unsigned char*)&buf[i]; i++; if(ll == 0) { break; } if((ll & (3 << 6)) == (3 << 6)) { /* RFC 1035, 4.1.4 */ int o; if(i >= n) return -1; o = (ll & ~(3 << 6)) << 8 | *(unsigned char*)&buf[i]; i++; labelsToString(buf, o, n, &d[j], m - j, &k); j += k; break; } else if((ll & (3 << 6)) == 0) { for(k = 0; k < ll; k++) { if(i >= n || j >= m) return -1; d[j++] = buf[i++]; } if(i >= n) return -1; if(buf[i] != '\0') { if(j >= m) return -1; d[j++] = '.'; } } else { return -1; } } *j_return = j; return i; } static int dnsBuildQuery(int id, char *buf, int offset, int n, AtomPtr name, int af) { int i = offset; int type; switch(af) { case 4: type = 1; break; case 6: type = 28; break; default: return -EINVAL; } if(i + 12 >= n) return -1; DO_HTONS(&buf[i], id); i += 2; DO_HTONS(&buf[i], 1<<8); i += 2; DO_HTONS(&buf[i], 1); i += 2; DO_HTONS(&buf[i], 0); i += 2; DO_HTONS(&buf[i], 0); i += 2; DO_HTONS(&buf[i], 0); i += 2; i = stringToLabels(buf, i, n, name->string); if(i < 0) return -ENAMETOOLONG; if(i + 4 >= n) return -ENAMETOOLONG; DO_HTONS(&buf[i], type); i += 2; DO_HTONS(&buf[i], 1); i += 2; return i; } static int dnsReplyId(char *buf, int offset, int n, int *id_return) { if(n - offset < 12) return -1; DO_NTOHS(*id_return, &buf[offset]); return 1; } static int dnsDecodeReply(char *buf, int offset, int n, int *id_return, AtomPtr *name_return, AtomPtr *value_return, int *af_return, unsigned *ttl_return) { int i = offset, j, m; int id = -1, b23, qdcount, ancount, nscount, arcount, rdlength; int class, type; unsigned int ttl; char b[2048]; int af = -1; AtomPtr name = NULL, value; char addresses[1024]; int addr_index = 0; int error = EDNS_NO_ADDRESS; unsigned final_ttl = 7 * 24 * 3600; int dnserror; if(n - i < 12) { error = EDNS_INVALID; goto fail; } DO_NTOHS(id, &buf[i]); i += 2; DO_NTOHS(b23, &buf[i]); i += 2; DO_NTOHS(qdcount, &buf[i]); i += 2; DO_NTOHS(ancount, &buf[i]); i += 2; DO_NTOHS(nscount, &buf[i]); i += 2; DO_NTOHS(arcount, &buf[i]); i += 2; do_log(D_DNS, "DNS id %d, b23 0x%x, qdcount %d, ancount %d, " "nscount %d, arcount %d\n", id, b23, qdcount, ancount, nscount, arcount); if((b23 & (0xF870)) != 0x8000) { do_log(L_ERROR, "Incorrect DNS reply (b23 = 0x%x).\n", b23); error = EDNS_INVALID; goto fail; } dnserror = b23 & 0xF; if(b23 & 0x200) { do_log(L_WARN, "Truncated DNS reply (b23 = 0x%x).\n", b23); } if(dnserror || qdcount != 1) { if(!dnserror) do_log(L_ERROR, "Unexpected number %d of DNS questions.\n", qdcount); if(dnserror == 1) error = EDNS_FORMAT; else if(dnserror == 2) error = EDNS_NO_RECOVERY; else if(dnserror == 3) error = EDNS_HOST_NOT_FOUND; else if(dnserror == 4 || dnserror == 5) error = EDNS_REFUSED; else if(dnserror == 0) error = EDNS_INVALID; else error = EUNKNOWN; goto fail; } /* We do this early, so that we can return the address family to the caller in case of error. */ i = labelsToString(buf, i, n, b, 2048, &m); if(i < 0) { error = EDNS_FORMAT; goto fail; } DO_NTOHS(type, &buf[i]); i += 2; DO_NTOHS(class, &buf[i]); i += 2; if(type == 1) af = 4; else if(type == 28) af = 6; else { error = EDNS_FORMAT; goto fail; } do_log(D_DNS, "DNS q: "); do_log_n(D_DNS, b, m); do_log(D_DNS, " (%d, %d)\n", type, class); name = internAtomLowerN(b, m); if(name == NULL) { error = ENOMEM; goto fail; } if(class != 1) { error = EDNS_FORMAT; goto fail; } #define PARSE_ANSWER(kind, label) \ do { \ i = labelsToString(buf, i, 1024, b, 2048, &m); \ if(i < 0) goto label; \ DO_NTOHS(type, &buf[i]); i += 2; if(i > 1024) goto label; \ DO_NTOHS(class, &buf[i]); i += 2; if(i > 1024) goto label; \ DO_NTOHL(ttl, &buf[i]); i += 4; if(i > 1024) goto label; \ DO_NTOHS(rdlength, &buf[i]); i += 2; if(i > 1024) goto label; \ do_log(D_DNS, "DNS " kind ": "); \ do_log_n(D_DNS, b, m); \ do_log(D_DNS, " (%d, %d): %d bytes, ttl %u\n", \ type, class, rdlength, ttl); \ } while(0) for(j = 0; j < ancount; j++) { PARSE_ANSWER("an", fail); if(strcasecmp_n(name->string, b, m) == 0) { if(class != 1) { do_log(D_DNS, "DNS: %s: unknown class %d.\n", name->string, class); error = EDNS_UNSUPPORTED; goto cont; } if(type == 1 || type == 28) { if((type == 1 && rdlength != 4) || (type == 28 && rdlength != 16)) { do_log(L_ERROR, "DNS: %s: unexpected length %d of %s record.\n", name->string, rdlength, type == 1 ? "A" : "AAAA"); error = EDNS_INVALID; if(rdlength <= 0 || rdlength >= 32) goto fail; goto cont; } if(af == 0) { do_log(L_WARN, "DNS: %s: host has both A and CNAME -- " "ignoring CNAME.\n", name->string); addr_index = 0; af = -1; } if(type == 1) { if(af < 0) af = 4; else if(af == 6) { do_log(L_WARN, "Unexpected AAAA reply.\n"); goto cont; } } else { if(af < 0) af = 6; else if(af == 4) { do_log(L_WARN, "Unexpected A reply.\n"); goto cont; } } if(addr_index == 0) { addresses[0] = DNS_A; addr_index++; } else { if(addr_index > 1000) { error = EDNS_INVALID; goto fail; } } assert(addresses[0] == DNS_A); if(final_ttl > ttl) final_ttl = ttl; memset(&addresses[addr_index], 0, sizeof(HostAddressRec)); if(type == 1) { addresses[addr_index] = 4; memcpy(addresses + addr_index + 1, buf + i, 4); } else { addresses[addr_index] = 6; memcpy(addresses + addr_index + 1, buf + i, 16); } addr_index += sizeof(HostAddressRec); } else if(type == 5) { int j, k; if(af != 0 && addr_index > 0) { do_log(L_WARN, "DNS: host has both CNAME and A -- " "ignoring CNAME.\n"); goto cont; } af = 0; if(addr_index != 0) { /* Only warn if the CNAMEs are not identical */ char tmp[512]; int jj, kk; assert(addresses[0] == DNS_CNAME); jj = labelsToString(buf, i, n, tmp, 512, &kk); if(jj < 0 || kk != strlen(addresses + 1) || memcmp(addresses + 1, tmp, kk) != 0) { do_log(L_WARN, "DNS: " "%s: host has multiple CNAMEs -- " "ignoring subsequent.\n", name->string); } goto cont; } addresses[0] = DNS_CNAME; addr_index++; j = labelsToString(buf, i, n, addresses + 1, 1020, &k); if(j < 0) { addr_index = 0; error = ENAMETOOLONG; continue; } addr_index = k + 1; } else { error = EDNS_NO_ADDRESS; i += rdlength; continue; } } cont: i += rdlength; } #if (LOGGING_MAX & D_DNS) for(j = 0; j < nscount; j++) { PARSE_ANSWER("ns", nofail); i += rdlength; } for(j = 0; j < arcount; j++) { PARSE_ANSWER("ar", nofail); i += rdlength; } nofail: #endif #undef PARSE_ANSWER do_log(D_DNS, "DNS: %d bytes\n", addr_index); if(af < 0) goto fail; value = internAtomN(addresses, addr_index); if(value == NULL) { error = ENOMEM; goto fail; } assert(af >= 0); *id_return = id; *name_return = name; *value_return = value; *af_return = af; *ttl_return = final_ttl; return 1; fail: *id_return = id; *name_return = name; *value_return = NULL; *af_return = af; return -error; } #else static int really_do_dns(AtomPtr name, ObjectPtr object) { abort(); } #endif polipo-1.0.4.1/diskcache.h0000644000175000017500000000454111331407220014553 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ extern int maxDiskEntries; extern AtomPtr diskCacheRoot; extern AtomPtr additionalDiskCacheRoot; typedef struct _DiskCacheEntry { char *filename; ObjectPtr object; int fd; off_t offset; off_t size; int body_offset; short local; short writeable; short metadataDirty; struct _DiskCacheEntry *next; struct _DiskCacheEntry *previous; } *DiskCacheEntryPtr, DiskCacheEntryRec; typedef struct _DiskObject { char *location; char *filename; int body_offset; int length; int size; time_t age; time_t access; time_t date; time_t last_modified; time_t expires; struct _DiskObject *next; } DiskObjectRec, *DiskObjectPtr; struct stat; extern int maxDiskCacheEntrySize; void preinitDiskcache(void); void initDiskcache(void); int destroyDiskEntry(ObjectPtr object, int); int diskEntrySize(ObjectPtr object); ObjectPtr objectGetFromDisk(ObjectPtr); int objectFillFromDisk(ObjectPtr object, int offset, int chunks); int writeoutMetadata(ObjectPtr object); int writeoutToDisk(ObjectPtr object, int upto, int max); void dirtyDiskEntry(ObjectPtr object); int revalidateDiskEntry(ObjectPtr object); DiskObjectPtr readDiskObject(char *filename, struct stat *sb); void indexDiskObjects(FILE *out, const char *root, int r); void expireDiskObjects(void); polipo-1.0.4.1/diskcache.c0000644000175000017500000021171411331407220014550 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #ifndef NO_DISK_CACHE #include "md5import.h" int maxDiskEntries = 32; /* Because the functions in this file can be called during object expiry, we cannot use get_chunk. */ AtomPtr diskCacheRoot; AtomPtr localDocumentRoot; DiskCacheEntryPtr diskEntries = NULL, diskEntriesLast = NULL; int numDiskEntries = 0; int diskCacheDirectoryPermissions = 0700; int diskCacheFilePermissions = 0600; int diskCacheWriteoutOnClose = (32 * 1024); int maxDiskCacheEntrySize = -1; int diskCacheUnlinkTime = 32 * 24 * 60 * 60; int diskCacheTruncateTime = 4 * 24 * 60 * 60 + 12 * 60 * 60; int diskCacheTruncateSize = 1024 * 1024; int preciseExpiry = 0; static DiskCacheEntryRec negativeEntry = { NULL, NULL, -1, -1, -1, -1, 0, 0, 0, NULL, NULL }; #ifndef LOCAL_ROOT #define LOCAL_ROOT "/usr/share/polipo/www/" #endif #ifndef DISK_CACHE_ROOT #define DISK_CACHE_ROOT "/var/cache/polipo/" #endif static int maxDiskEntriesSetter(ConfigVariablePtr, void*); static int atomSetterFlush(ConfigVariablePtr, void*); static int reallyWriteoutToDisk(ObjectPtr object, int upto, int max); void preinitDiskcache() { diskCacheRoot = internAtom(DISK_CACHE_ROOT); localDocumentRoot = internAtom(LOCAL_ROOT); CONFIG_VARIABLE_SETTABLE(diskCacheDirectoryPermissions, CONFIG_OCTAL, configIntSetter, "Access rights for new directories."); CONFIG_VARIABLE_SETTABLE(diskCacheFilePermissions, CONFIG_OCTAL, configIntSetter, "Access rights for new cache files."); CONFIG_VARIABLE_SETTABLE(diskCacheWriteoutOnClose, CONFIG_INT, configIntSetter, "Number of bytes to write out eagerly."); CONFIG_VARIABLE_SETTABLE(diskCacheRoot, CONFIG_ATOM, atomSetterFlush, "Root of the disk cache."); CONFIG_VARIABLE_SETTABLE(localDocumentRoot, CONFIG_ATOM, atomSetterFlush, "Root of the local tree."); CONFIG_VARIABLE_SETTABLE(maxDiskEntries, CONFIG_INT, maxDiskEntriesSetter, "File descriptors used by the on-disk cache."); CONFIG_VARIABLE(diskCacheUnlinkTime, CONFIG_TIME, "Time after which on-disk objects are removed."); CONFIG_VARIABLE(diskCacheTruncateTime, CONFIG_TIME, "Time after which on-disk objects are truncated."); CONFIG_VARIABLE(diskCacheTruncateSize, CONFIG_INT, "Size to which on-disk objects are truncated."); CONFIG_VARIABLE(preciseExpiry, CONFIG_BOOLEAN, "Whether to consider all files for purging."); CONFIG_VARIABLE_SETTABLE(maxDiskCacheEntrySize, CONFIG_INT, configIntSetter, "Maximum size of objects cached on disk."); } static int maxDiskEntriesSetter(ConfigVariablePtr var, void *value) { int i; assert(var->type == CONFIG_INT && var->value.i == &maxDiskEntries); i = *(int*)value; if(i < 0 || i > 1000000) return -3; maxDiskEntries = i; while(numDiskEntries > maxDiskEntries) destroyDiskEntry(diskEntriesLast->object, 0); return 1; } static int atomSetterFlush(ConfigVariablePtr var, void *value) { discardObjects(1, 0); return configAtomSetter(var, value); } static int checkRoot(AtomPtr root) { struct stat ss; int rc; if(!root || root->length == 0) return 0; if(root->string[0] != '/') { return -2; } rc = stat(root->string, &ss); if(rc < 0) return -1; else if(!S_ISDIR(ss.st_mode)) { errno = ENOTDIR; return -1; } return 1; } static AtomPtr maybeAddSlash(AtomPtr atom) { AtomPtr newAtom = NULL; if(!atom) return NULL; if(atom->length > 0 && atom->string[atom->length - 1] != '/') { newAtom = atomCat(atom, "/"); releaseAtom(atom); return newAtom; } return atom; } void initDiskcache() { int rc; diskCacheRoot = expandTilde(maybeAddSlash(diskCacheRoot)); rc = checkRoot(diskCacheRoot); if(rc <= 0) { switch(rc) { case 0: break; case -1: do_log_error(L_WARN, errno, "Disabling disk cache"); break; case -2: do_log(L_WARN, "Disabling disk cache: path %s is not absolute.\n", diskCacheRoot->string); break; default: abort(); } releaseAtom(diskCacheRoot); diskCacheRoot = NULL; } localDocumentRoot = expandTilde(maybeAddSlash(localDocumentRoot)); rc = checkRoot(localDocumentRoot); if(rc <= 0) { switch(rc) { case 0: break; case -1: do_log_error(L_WARN, errno, "Disabling local tree"); break; case -2: do_log(L_WARN, "Disabling local tree: path is not absolute.\n"); break; default: abort(); } releaseAtom(localDocumentRoot); localDocumentRoot = NULL; } } #ifdef DEBUG_DISK_CACHE #define CHECK_ENTRY(entry) check_entry((entry)) static void check_entry(DiskCacheEntryPtr entry) { if(entry && entry->fd < 0) assert(entry == &negativeEntry); if(entry && entry->fd >= 0) { assert((!entry->previous) == (entry == diskEntries)); assert((!entry->next) == (entry == diskEntriesLast)); if(entry->size >= 0) assert(entry->size + entry->body_offset >= entry->offset); assert(entry->body_offset >= 0); if(entry->offset >= 0) { off_t offset; offset = lseek(entry->fd, 0, SEEK_CUR); assert(offset == entry->offset); } if(entry->size >= 0) { int rc; struct stat ss; rc = fstat(entry->fd, &ss); assert(rc >= 0); assert(ss.st_size == entry->size + entry->body_offset); } } } #else #define CHECK_ENTRY(entry) do {} while(0) #endif int diskEntrySize(ObjectPtr object) { struct stat buf; int rc; DiskCacheEntryPtr entry = object->disk_entry; if(!entry || entry == &negativeEntry) return -1; if(entry->size >= 0) return entry->size; rc = fstat(entry->fd, &buf); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't stat"); return -1; } if(buf.st_size <= entry->body_offset) entry->size = 0; else entry->size = buf.st_size - entry->body_offset; CHECK_ENTRY(entry); if(object->length >= 0 && entry->size == object->length) object->flags |= OBJECT_DISK_ENTRY_COMPLETE; return entry->size; } static int entrySeek(DiskCacheEntryPtr entry, off_t offset) { off_t rc; CHECK_ENTRY(entry); assert(entry != &negativeEntry); if(entry->offset == offset) return 1; if(offset > entry->body_offset) { /* Avoid extending the file by mistake */ if(entry->size < 0) diskEntrySize(entry->object); if(entry->size < 0) return -1; if(entry->size + entry->body_offset < offset) return -1; } rc = lseek(entry->fd, offset, SEEK_SET); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't seek"); entry->offset = -1; return -1; } entry->offset = offset; return 1; } /* Given a local URL, constructs the filename where it can be found. */ int localFilename(char *buf, int n, char *key, int len) { int i, j; if(len <= 0 || key[0] != '/') return -1; if(urlIsSpecial(key, len)) return -1; if(localDocumentRoot == NULL || localDocumentRoot->length <= 0 || localDocumentRoot->string[0] != '/') return -1; if(n <= localDocumentRoot->length) return -1; i = 0; if(key[i] != '/') return -1; memcpy(buf, localDocumentRoot->string, localDocumentRoot->length); j = localDocumentRoot->length; if(buf[j - 1] == '/') j--; while(i < len) { if(j >= n - 1) return -1; if(key[i] == '/' && i < len - 2) if(key[i + 1] == '.' && (key[i + 2] == '.' || key[i + 2] == '/')) return -1; buf[j++] = key[i++]; } if(buf[j - 1] == '/') { if(j >= n - 11) return -1; memcpy(buf + j, "index.html", 10); j += 10; } buf[j] = '\0'; return j; } static void md5(unsigned char *restrict key, int len, unsigned char *restrict dst) { static MD5_CTX ctx; MD5Init(&ctx); MD5Update(&ctx, key, len); MD5Final(&ctx); memcpy(dst, ctx.digest, 16); } /* Check whether a character can be stored in a filename. This is needed since we want to support deficient file systems. */ static int fssafe(char c) { if(c <= 31 || c >= 127) return 0; if((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '.' || c == '-' || c == '_') return 1; return 0; } /* Given a URL, returns the directory name within which all files starting with this URL can be found. */ static int urlDirname(char *buf, int n, const char *url, int len) { int i, j; if(len < 8) return -1; if(memcmp(url, "http://", 7) != 0) return -1; if(diskCacheRoot == NULL || diskCacheRoot->length <= 0 || diskCacheRoot->string[0] != '/') return -1; if(n <= diskCacheRoot->length) return -1; memcpy(buf, diskCacheRoot->string, diskCacheRoot->length); j = diskCacheRoot->length; if(buf[j - 1] != '/') buf[j++] = '/'; for(i = 7; i < len; i++) { if(i >= len || url[i] == '/') break; if(url[i] == '.' && i != len - 1 && url[i + 1] == '.') return -1; if(url[i] == '%' || !fssafe(url[i])) { if(j + 3 >= n) return -1; buf[j++] = '%'; buf[j++] = i2h((url[i] & 0xF0) >> 4); buf[j++] = i2h(url[i] & 0x0F); } else { buf[j++] = url[i]; if(j >= n) return -1; } } buf[j++] = '/'; if(j >= n) return -1; buf[j] = '\0'; return j; } /* Given a URL, returns the filename where the cached data can be found. */ static int urlFilename(char *restrict buf, int n, const char *url, int len) { int j; unsigned char md5buf[18]; j = urlDirname(buf, n, url, len); if(j < 0 || j + 24 >= n) return -1; md5((unsigned char*)url, len, md5buf); b64cpy(buf + j, (char*)md5buf, 16, 1); buf[j + 24] = '\0'; return j + 24; } static char * dirnameUrl(char *url, int n, char *name, int len) { int i, j, k, c1, c2; k = diskCacheRoot->length; if(len < k) return NULL; if(memcmp(name, diskCacheRoot->string, k) != 0) return NULL; if(n < 8) return NULL; memcpy(url, "http://", 7); if(name[len - 1] == '/') len --; j = 7; for(i = k; i < len; i++) { if(name[i] == '%') { if(i >= len - 2) return NULL; c1 = h2i(name[i + 1]); c2 = h2i(name[i + 2]); if(c1 < 0 || c2 < 0) return NULL; url[j++] = c1 * 16 + c2; if(j >= n) goto fail; i += 2; /* skip extra digits */ } else if(i < len - 1 && name[i] == '.' && name[i + 1] == '/') { return NULL; } else if(i == len - 1 && name[i] == '.') { return NULL; } else { url[j++] = name[i]; if(j >= n) goto fail; } } url[j++] = '/'; if(j >= n) goto fail; url[j] = '\0'; return url; fail: return NULL; } /* Create a file and all intermediate directories. */ static int createFile(const char *name, int path_start) { int fd; char buf[1024]; int n; int rc; if(name[path_start] == '/') path_start++; if(path_start < 2 || name[path_start - 1] != '/' ) { do_log(L_ERROR, "Incorrect name %s (%d).\n", name, path_start); return -1; } fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY, diskCacheFilePermissions); if(fd >= 0) return fd; if(errno != ENOENT) { do_log_error(L_ERROR, errno, "Couldn't create disk file %s", name); return -1; } n = path_start; while(name[n] != '\0' && n < 1024) { while(name[n] != '/' && name[n] != '\0' && n < 512) n++; if(name[n] != '/' || n >= 1024) break; memcpy(buf, name, n + 1); buf[n + 1] = '\0'; rc = mkdir(buf, diskCacheDirectoryPermissions); if(rc < 0 && errno != EEXIST) { do_log_error(L_ERROR, errno, "Couldn't create directory %s", buf); return -1; } n++; } fd = open(name, O_RDWR | O_CREAT | O_EXCL | O_BINARY, diskCacheFilePermissions); if(fd < 0) { do_log_error(L_ERROR, errno, "Couldn't create file %s", name); return -1; } return fd; } static int chooseBodyOffset(int n, ObjectPtr object) { int length = MAX(object->size, object->length); int body_offset; if(object->length >= 0 && object->length + n < 4096 - 4) return -1; /* no gap for small objects */ if(n <= 128) body_offset = 256; else if(n <= 192) body_offset = 384; else if(n <= 256) body_offset = 512; else if(n <= 384) body_offset = 768; else if(n <= 512) body_offset = 1024; else if(n <= 1024) body_offset = 2048; else if(n < 2048) body_offset = 4096; else body_offset = ((n + 32 + 4095) / 4096 + 1) * 4096; /* Tweak the gap so that we don't use up a full disk block for a small tail */ if(object->length >= 0 && object->length < 64 * 1024) { int last = (body_offset + object->length) % 4096; int gap = body_offset - n - 32; if(last < gap / 2) body_offset -= last; } /* Rewriting large objects is expensive -- don't use small gaps. This has the additional benefit of block-aligning large bodies. */ if(length >= 64 * 1024) { int min_gap, min_offset; if(length >= 512 * 1024) min_gap = 4096; else if(length >= 256 * 1024) min_gap = 2048; else min_gap = 1024; min_offset = ((n + 32 + min_gap - 1) / min_gap + 1) * min_gap; body_offset = MAX(body_offset, min_offset); } return body_offset; } /* Assumes the file descriptor is at offset 0. Returns -1 on failure, otherwise the offset at which the file descriptor is left. */ /* If chunk is not null, it should be the first chunk of the object, and will be written out in the same operation if possible. */ static int writeHeaders(int fd, int *body_offset_return, ObjectPtr object, char *chunk, int chunk_len) { int n; int rc; int body_offset = *body_offset_return; char *buf = NULL; int buf_is_chunk = 0; int bufsize = 0; if(object->flags & OBJECT_LOCAL) return -1; if(body_offset > CHUNK_SIZE) goto overflow; /* get_chunk might trigger object expiry */ bufsize = CHUNK_SIZE; buf_is_chunk = 1; buf = maybe_get_chunk(); if(!buf) { bufsize = 2048; buf_is_chunk = 0; buf = malloc(2048); if(buf == NULL) { do_log(L_ERROR, "Couldn't allocate buffer.\n"); return -1; } } format_again: n = snnprintf(buf, 0, bufsize, "HTTP/1.1 %3d %s", object->code, object->message->string); n = httpWriteObjectHeaders(buf, n, bufsize, object, 0, -1); if(n < 0) goto overflow; n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Location: "); n = snnprint_n(buf, n, bufsize, object->key, object->key_size); if(object->age >= 0 && object->age != object->date) { n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Date: "); n = format_time(buf, n, bufsize, object->age); } if(object->atime >= 0) { n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Access: "); n = format_time(buf, n, bufsize, object->atime); } if(n < 0) goto overflow; if(body_offset < 0) body_offset = chooseBodyOffset(n, object); if(body_offset > bufsize) goto overflow; if(body_offset > 0 && body_offset != n + 4) n = snnprintf(buf, n, bufsize, "\r\nX-Polipo-Body-Offset: %d", body_offset); n = snnprintf(buf, n, bufsize, "\r\n\r\n"); if(n < 0) goto overflow; if(body_offset < 0) body_offset = n; if(n > body_offset) goto fail; if(n < body_offset) memset(buf + n, 0, body_offset - n); again: #ifdef HAVE_READV_WRITEV if(chunk_len > 0) { struct iovec iov[2]; iov[0].iov_base = buf; iov[0].iov_len = body_offset; iov[1].iov_base = chunk; iov[1].iov_len = chunk_len; rc = writev(fd, iov, 2); } else #endif rc = write(fd, buf, body_offset); if(rc < 0 && errno == EINTR) goto again; if(rc < body_offset) goto fail; if(object->length >= 0 && rc - body_offset >= object->length) object->flags |= OBJECT_DISK_ENTRY_COMPLETE; *body_offset_return = body_offset; if(buf_is_chunk) dispose_chunk(buf); else free(buf); return rc; overflow: if(bufsize < bigBufferSize) { char *oldbuf = buf; buf = malloc(bigBufferSize); if(!buf) { do_log(L_ERROR, "Couldn't allocate big buffer.\n"); goto fail; } bufsize = bigBufferSize; if(oldbuf) { if(buf_is_chunk) dispose_chunk(oldbuf); else free(oldbuf); } buf_is_chunk = 0; goto format_again; } /* fall through */ fail: if(buf_is_chunk) dispose_chunk(buf); else free(buf); return -1; } typedef struct _MimeEntry { char *extension; char *mime; } MimeEntryRec; static const MimeEntryRec mimeEntries[] = { { "html", "text/html" }, { "htm", "text/html" }, { "text", "text/plain" }, { "txt", "text/plain" }, { "png", "image/png" }, { "gif", "image/gif" }, { "jpeg", "image/jpeg" }, { "jpg", "image/jpeg" }, { "ico", "image/x-icon" }, { "pdf", "application/pdf" }, { "ps", "application/postscript" }, { "tar", "application/x-tar" }, { "pac", "application/x-ns-proxy-autoconfig" }, { "css", "text/css" }, { "js", "application/x-javascript" }, { "xml", "text/xml" }, { "swf", "application/x-shockwave-flash" }, }; static char* localObjectMimeType(ObjectPtr object, char **encoding_return) { char *name = object->key; int nlen = object->key_size; int i; assert(nlen >= 1); if(name[nlen - 1] == '/') { *encoding_return = NULL; return "text/html"; } if(nlen < 3) { *encoding_return = NULL; return "application/octet-stream"; } if(memcmp(name + nlen - 3, ".gz", 3) == 0) { *encoding_return = "x-gzip"; nlen -= 3; } else if(memcmp(name + nlen - 2, ".Z", 2) == 0) { *encoding_return = "x-compress"; nlen -= 2; } else { *encoding_return = NULL; } for(i = 0; i < sizeof(mimeEntries) / sizeof(mimeEntries[0]); i++) { int len = strlen(mimeEntries[i].extension); if(nlen > len && name[nlen - len - 1] == '.' && memcmp(name + nlen - len, mimeEntries[i].extension, len) == 0) return mimeEntries[i].mime; } return "application/octet-stream"; } /* Same interface as validateEntry -- see below */ int validateLocalEntry(ObjectPtr object, int fd, int *body_offset_return, off_t *offset_return) { struct stat ss; char buf[512]; int n, rc; char *encoding; rc = fstat(fd, &ss); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't stat"); return -1; } if(S_ISREG(ss.st_mode)) { if(!(ss.st_mode & S_IROTH) || (object->length >= 0 && object->length != ss.st_size) || (object->last_modified >= 0 && object->last_modified != ss.st_mtime)) return -1; } else { notifyObject(object); return -1; } n = snnprintf(buf, 0, 512, "%lx-%lx-%lx", (unsigned long)ss.st_ino, (unsigned long)ss.st_size, (unsigned long)ss.st_mtime); if(n >= 512) n = -1; if(n > 0 && object->etag) { if(strlen(object->etag) != n || memcmp(object->etag, buf, n) != 0) return -1; } if(!(object->flags & OBJECT_INITIAL)) { if(!object->last_modified && !object->etag) return -1; } if(object->flags & OBJECT_INITIAL) { object->length = ss.st_size; object->last_modified = ss.st_mtime; object->date = current_time.tv_sec; object->age = current_time.tv_sec; object->code = 200; if(n > 0) object->etag = strdup(buf); /* okay if fails */ object->message = internAtom("Okay"); n = snnprintf(buf, 0, 512, "\r\nServer: Polipo" "\r\nContent-Type: %s", localObjectMimeType(object, &encoding)); if(encoding != NULL) n = snnprintf(buf, n, 512, "\r\nContent-Encoding: %s", encoding); if(n < 0) return -1; object->headers = internAtomN(buf, n); if(object->headers == NULL) return -1; object->flags &= ~OBJECT_INITIAL; } if(body_offset_return) *body_offset_return = 0; if(offset_return) *offset_return = 0; return 0; } /* Assumes fd is at offset 0. Returns -1 if not valid, 1 if metadata should be written out, 0 otherwise. */ int validateEntry(ObjectPtr object, int fd, int *body_offset_return, off_t *offset_return) { char *buf; int buf_is_chunk, bufsize; int rc, n; int dummy; int code; AtomPtr headers; time_t date, last_modified, expires, polipo_age, polipo_access; int length; off_t offset = -1; int body_offset; char *etag; AtomPtr via; CacheControlRec cache_control; char *location; AtomPtr message; int dirty = 0; if(object->flags & OBJECT_LOCAL) return validateLocalEntry(object, fd, body_offset_return, offset_return); if(!(object->flags & OBJECT_PUBLIC) && (object->flags & OBJECT_INITIAL)) return 0; /* get_chunk might trigger object expiry */ bufsize = CHUNK_SIZE; buf_is_chunk = 1; buf = maybe_get_chunk(); if(!buf) { bufsize = 2048; buf_is_chunk = 0; buf = malloc(2048); if(buf == NULL) { do_log(L_ERROR, "Couldn't allocate buffer.\n"); return -1; } } again: rc = read(fd, buf, bufsize); if(rc < 0) { if(errno == EINTR) goto again; do_log_error(L_ERROR, errno, "Couldn't read disk entry"); goto fail; } offset = rc; parse_again: n = findEndOfHeaders(buf, 0, rc, &dummy); if(n < 0) { char *oldbuf = buf; if(bufsize < bigBufferSize) { buf = malloc(bigBufferSize); if(!buf) { do_log(L_ERROR, "Couldn't allocate big buffer.\n"); goto fail; } bufsize = bigBufferSize; memcpy(buf, oldbuf, offset); if(buf_is_chunk) dispose_chunk(oldbuf); else free(oldbuf); buf_is_chunk = 0; again2: rc = read(fd, buf + offset, bufsize - offset); if(rc < 0) { if(errno == EINTR) goto again2; do_log_error(L_ERROR, errno, "Couldn't read disk entry"); goto fail; } offset += rc; goto parse_again; } do_log(L_ERROR, "Couldn't parse disk entry.\n"); goto fail; } rc = httpParseServerFirstLine(buf, &code, &dummy, &message); if(rc < 0) { do_log(L_ERROR, "Couldn't parse disk entry.\n"); goto fail; } if(object->code != 0 && object->code != code) { releaseAtom(message); goto fail; } rc = httpParseHeaders(0, NULL, buf, rc, NULL, &headers, &length, &cache_control, NULL, NULL, &date, &last_modified, &expires, &polipo_age, &polipo_access, &body_offset, NULL, &etag, NULL, NULL, NULL, &location, &via, NULL); if(rc < 0) { releaseAtom(message); goto fail; } if(body_offset < 0) body_offset = n; if(!location || strlen(location) != object->key_size || memcmp(location, object->key, object->key_size) != 0) { do_log(L_ERROR, "Inconsistent cache file for %s.\n", location); goto invalid; } if(polipo_age < 0) polipo_age = date; if(polipo_age < 0) { do_log(L_ERROR, "Undated disk entry for %s.\n", location); goto invalid; } if(!(object->flags & OBJECT_INITIAL)) { if((last_modified >= 0) != (object->last_modified >= 0)) goto invalid; if((object->cache_control & CACHE_MISMATCH) || (cache_control.flags & CACHE_MISMATCH)) goto invalid; if(last_modified >= 0 && object->last_modified >= 0 && last_modified != object->last_modified) goto invalid; if(length >= 0 && object->length >= 0) if(length != object->length) goto invalid; if(!!etag != !!object->etag) goto invalid; if(etag && object->etag && strcmp(etag, object->etag) != 0) goto invalid; /* If we don't have a usable ETag, and either CACHE_VARY or we don't have a last-modified date, we validate disk entries by using their date. */ if(!(etag && object->etag) && (!(last_modified >= 0 && object->last_modified >= 0) || ((cache_control.flags & CACHE_VARY) || (object->cache_control & CACHE_VARY)))) { if(date >= 0 && date != object->date) goto invalid; if(polipo_age >= 0 && polipo_age != object->age) goto invalid; } if((object->cache_control & CACHE_VARY) && dontTrustVaryETag >= 1) { /* Check content-type to work around mod_gzip bugs */ if(!httpHeaderMatch(atomContentType, object->headers, headers) || !httpHeaderMatch(atomContentEncoding, object->headers, headers)) goto invalid; } } if(location) free(location); if(headers) { if(!object->headers) object->headers = headers; else releaseAtom(headers); } if(object->code == 0) { object->code = code; object->message = retainAtom(message); } if(object->date <= date) object->date = date; else dirty = 1; if(object->last_modified < 0) object->last_modified = last_modified; if(object->expires < 0) object->expires = expires; else if(object->expires > expires) dirty = 1; if(object->age < 0) object->age = polipo_age; else if(object->age > polipo_age) dirty = 1; if(object->atime <= polipo_access) object->atime = polipo_access; else dirty = 1; object->cache_control |= cache_control.flags; if(object->age < 0) object->age = object->date; if(object->age < 0) object->age = 0; /* a long time ago */ if(object->length < 0) object->length = length; if(!object->etag) object->etag = etag; else { if(etag) free(etag); } releaseAtom(message); if(object->flags & OBJECT_INITIAL) object->via = via; object->flags &= ~OBJECT_INITIAL; if(offset > body_offset) { /* We need to make sure we don't invoke object expiry recursively */ objectSetChunks(object, 1); if(object->numchunks >= 1) { if(object->chunks[0].data == NULL) object->chunks[0].data = maybe_get_chunk(); if(object->chunks[0].data) objectAddData(object, buf + body_offset, 0, MIN(offset - body_offset, CHUNK_SIZE)); } } httpTweakCachability(object); if(buf_is_chunk) dispose_chunk(buf); else free(buf); if(body_offset_return) *body_offset_return = body_offset; if(offset_return) *offset_return = offset; return dirty; invalid: releaseAtom(message); if(etag) free(etag); if(location) free(location); if(via) releaseAtom(via); /* fall through */ fail: if(buf_is_chunk) dispose_chunk(buf); else free(buf); return -1; } void dirtyDiskEntry(ObjectPtr object) { DiskCacheEntryPtr entry = object->disk_entry; if(entry && entry != &negativeEntry) entry->metadataDirty = 1; } int revalidateDiskEntry(ObjectPtr object) { DiskCacheEntryPtr entry = object->disk_entry; int rc; int body_offset; if(!entry || entry == &negativeEntry) return 1; CHECK_ENTRY(entry); rc = entrySeek(entry, 0); if(rc < 0) return 0; rc = validateEntry(object, entry->fd, &body_offset, &entry->offset); if(rc < 0) { destroyDiskEntry(object, 0); return 0; } if(body_offset != entry->body_offset) { do_log(L_WARN, "Inconsistent body offset (%d != %d).\n", body_offset, entry->body_offset); destroyDiskEntry(object, 0); return 0; } entry->metadataDirty |= !!rc; CHECK_ENTRY(entry); return 1; } static inline int objectHasDiskEntry(ObjectPtr object) { return object->disk_entry && object->disk_entry != &negativeEntry; } static DiskCacheEntryPtr makeDiskEntry(ObjectPtr object, int writeable, int create) { DiskCacheEntryPtr entry = NULL; char buf[1024]; int fd = -1; int negative = 0, isWriteable = 0, size = -1, name_len = -1; char *name = NULL; off_t offset = -1; int body_offset = -1; int rc; int local = (object->flags & OBJECT_LOCAL) != 0; int dirty = 0; if(local && (writeable || create)) return NULL; if(!local && !(object->flags & OBJECT_PUBLIC)) return NULL; if(maxDiskCacheEntrySize >= 0) { if(object->length > 0) { if(object->length > maxDiskCacheEntrySize) return NULL; } else { if(object->size > maxDiskCacheEntrySize) return NULL; } } if(object->disk_entry) { entry = object->disk_entry; CHECK_ENTRY(entry); if(entry != &negativeEntry && (!writeable || entry->writeable)) { /* We'll keep the entry -- put it at the front. */ if(entry != diskEntries && entry != &negativeEntry) { entry->previous->next = entry->next; if(entry->next) entry->next->previous = entry->previous; else diskEntriesLast = entry->previous; entry->next = diskEntries; diskEntries->previous = entry; entry->previous = NULL; diskEntries = entry; } return entry; } else { if(entry == &negativeEntry) { negative = 1; if(!create) return NULL; object->disk_entry = NULL; } entry = NULL; destroyDiskEntry(object, 0); } } if(numDiskEntries > maxDiskEntries) destroyDiskEntry(diskEntriesLast->object, 0); if(!local) { if(diskCacheRoot == NULL || diskCacheRoot->length <= 0) return NULL; name_len = urlFilename(buf, 1024, object->key, object->key_size); if(name_len < 0) return NULL; if(!negative) { isWriteable = 1; fd = open(buf, O_RDWR | O_BINARY); if(fd < 0 && !writeable && errno == EACCES) { writeable = 0; fd = open(buf, O_RDONLY | O_BINARY); } } if(fd >= 0) { rc = validateEntry(object, fd, &body_offset, &offset); if(rc >= 0) { dirty = rc; } else { close(fd); fd = -1; rc = unlink(buf); if(rc < 0 && errno != ENOENT) { do_log_error(L_WARN, errno, "Couldn't unlink stale disk entry %s", buf); /* But continue -- it's okay to have stale entries. */ } } } if(fd < 0 && create && name_len > 0 && !(object->flags & OBJECT_INITIAL)) { isWriteable = 1; fd = createFile(buf, diskCacheRoot->length); if(fd < 0) return NULL; if(fd >= 0) { char *data = NULL; int dsize = 0; if(object->numchunks > 0) { data = object->chunks[0].data; dsize = object->chunks[0].size; } rc = writeHeaders(fd, &body_offset, object, data, dsize); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't write headers"); rc = unlink(buf); if(rc < 0 && errno != ENOENT) do_log_error(L_ERROR, errno, "Couldn't unlink truncated entry %s", buf); close(fd); return NULL; } assert(rc >= body_offset); size = rc - body_offset; offset = rc; dirty = 0; } } } else { /* local */ if(localDocumentRoot == NULL || localDocumentRoot->length == 0) return NULL; name_len = localFilename(buf, 1024, object->key, object->key_size); if(name_len < 0) return NULL; isWriteable = 0; fd = open(buf, O_RDONLY | O_BINARY); if(fd >= 0) { if(validateEntry(object, fd, &body_offset, NULL) < 0) { close(fd); fd = -1; } } offset = 0; } if(fd < 0) { object->disk_entry = &negativeEntry; return NULL; } assert(body_offset >= 0); name = strdup_n(buf, name_len); if(name == NULL) { do_log(L_ERROR, "Couldn't allocate name.\n"); close(fd); fd = -1; return NULL; } entry = malloc(sizeof(DiskCacheEntryRec)); if(entry == NULL) { do_log(L_ERROR, "Couldn't allocate entry.\n"); free(name); close(fd); return NULL; } entry->filename = name; entry->object = object; entry->fd = fd; entry->body_offset = body_offset; entry->local = local; entry->offset = offset; entry->size = size; entry->metadataDirty = dirty; entry->writeable = isWriteable; entry->next = diskEntries; if(diskEntries) diskEntries->previous = entry; diskEntries = entry; if(diskEntriesLast == NULL) diskEntriesLast = entry; entry->previous = NULL; numDiskEntries++; object->disk_entry = entry; CHECK_ENTRY(entry); return entry; } /* Rewrite a disk cache entry, used when the body offset needs to change. */ static int rewriteEntry(ObjectPtr object) { int old_body_offset = object->disk_entry->body_offset; int fd, rc, n; DiskCacheEntryPtr entry; char* buf; int buf_is_chunk, bufsize; int offset; fd = dup(object->disk_entry->fd); if(fd < 0) { do_log_error(L_ERROR, errno, "Couldn't duplicate file descriptor"); return -1; } rc = destroyDiskEntry(object, 1); if(rc < 0) { close(fd); return -1; } entry = makeDiskEntry(object, 1, 1); if(!entry) { close(fd); return -1; } offset = diskEntrySize(object); if(offset < 0) { close(fd); return -1; } bufsize = CHUNK_SIZE; buf_is_chunk = 1; buf = maybe_get_chunk(); if(!buf) { bufsize = 2048; buf_is_chunk = 0; buf = malloc(2048); if(buf == NULL) { do_log(L_ERROR, "Couldn't allocate buffer.\n"); close(fd); return -1; } } rc = lseek(fd, old_body_offset + offset, SEEK_SET); if(rc < 0) goto done; while(1) { CHECK_ENTRY(entry); n = read(fd, buf, bufsize); if(n <= 0) goto done; rc = entrySeek(entry, entry->body_offset + offset); if(rc < 0) goto done; rc = write(entry->fd, buf, n); if(rc >= 0) { entry->offset += rc; entry->size += rc; } if(rc < n) goto done; } done: CHECK_ENTRY(entry); if(object->length >= 0 && entry->size == object->length) object->flags |= OBJECT_DISK_ENTRY_COMPLETE; close(fd); if(buf_is_chunk) dispose_chunk(buf); else free(buf); return 1; } int destroyDiskEntry(ObjectPtr object, int d) { DiskCacheEntryPtr entry = object->disk_entry; int rc, urc = 1; assert(!entry || !entry->local || !d); if(d && !entry) entry = makeDiskEntry(object, 1, 0); CHECK_ENTRY(entry); if(!entry || entry == &negativeEntry) { return 1; } assert(entry->object == object); if(maxDiskCacheEntrySize >= 0 && object->size > maxDiskCacheEntrySize) { /* See writeoutToDisk */ d = 1; } if(d) { entry->object->flags &= ~OBJECT_DISK_ENTRY_COMPLETE; if(entry->filename) { urc = unlink(entry->filename); if(urc < 0) do_log_error(L_WARN, errno, "Couldn't unlink %s", entry->filename); } } else { if(entry && entry->metadataDirty) writeoutMetadata(object); makeDiskEntry(object, 1, 0); /* rewriteDiskEntry may change the disk entry */ entry = object->disk_entry; if(entry == NULL || entry == &negativeEntry) return 0; if(entry->writeable && diskCacheWriteoutOnClose > 0) reallyWriteoutToDisk(object, -1, diskCacheWriteoutOnClose); } again: rc = close(entry->fd); if(rc < 0 && errno == EINTR) goto again; entry->fd = -1; if(entry->filename) free(entry->filename); entry->filename = NULL; if(entry->previous) entry->previous->next = entry->next; else diskEntries = entry->next; if(entry->next) entry->next->previous = entry->previous; else diskEntriesLast = entry->previous; numDiskEntries--; assert(numDiskEntries >= 0); free(entry); object->disk_entry = NULL; if(urc < 0) return -1; else return 1; } ObjectPtr objectGetFromDisk(ObjectPtr object) { DiskCacheEntryPtr entry = makeDiskEntry(object, 0, 0); if(!entry) return NULL; return object; } int objectFillFromDisk(ObjectPtr object, int offset, int chunks) { DiskCacheEntryPtr entry; int rc, result; int i, j, k; int complete; if(object->type != OBJECT_HTTP) return 0; if(object->flags & OBJECT_LINEAR) return 0; if(object->length >= 0) { chunks = MIN(chunks, (object->length - offset + CHUNK_SIZE - 1) / CHUNK_SIZE); } rc = objectSetChunks(object, offset / CHUNK_SIZE + chunks); if(rc < 0) return 0; complete = 1; if(object->flags & OBJECT_INITIAL) { complete = 0; } else if((object->length < 0 || object->size < object->length) && object->size < (offset / CHUNK_SIZE + chunks) * CHUNK_SIZE) { complete = 0; } else { for(k = 0; k < chunks; k++) { int s; i = offset / CHUNK_SIZE + k; s = MIN(CHUNK_SIZE, object->size - i * CHUNK_SIZE); if(object->chunks[i].size < s) { complete = 0; break; } } } if(complete) return 1; /* This has the side-effect of revalidating the entry, which is what makes HEAD requests work. */ entry = makeDiskEntry(object, 0, 0); if(!entry) return 0; for(k = 0; k < chunks; k++) { i = offset / CHUNK_SIZE + k; if(!object->chunks[i].data) object->chunks[i].data = get_chunk(); if(!object->chunks[i].data) { chunks = k; break; } lockChunk(object, i); } result = 0; for(k = 0; k < chunks; k++) { int o; i = offset / CHUNK_SIZE + k; j = object->chunks[i].size; o = i * CHUNK_SIZE + j; if(object->chunks[i].size == CHUNK_SIZE) continue; if(entry->size >= 0 && entry->size <= o) break; if(entry->offset != entry->body_offset + o) { rc = entrySeek(entry, entry->body_offset + o); if(rc < 0) { result = 0; break; } } CHECK_ENTRY(entry); again: rc = read(entry->fd, object->chunks[i].data + j, CHUNK_SIZE - j); if(rc < 0) { if(errno == EINTR) goto again; entry->offset = -1; do_log_error(L_ERROR, errno, "Couldn't read"); break; } entry->offset += rc; object->chunks[i].size += rc; if(object->size < o + rc) object->size = o + rc; if(entry->object->length >= 0 && entry->size < 0 && entry->offset - entry->body_offset == entry->object->length) entry->size = entry->object->length; if(rc < CHUNK_SIZE - j) { /* Paranoia: the read may have been interrupted half-way. */ if(entry->size < 0) { if(rc == 0 || (entry->object->length >= 0 && entry->object->length == entry->offset - entry->body_offset)) entry->size = entry->offset - entry->body_offset; break; } else if(entry->size != entry->offset - entry->body_offset) { if(rc == 0 || entry->size < entry->offset - entry->body_offset) { do_log(L_WARN, "Disk entry size changed behind our back: " "%ld -> %ld (%d).\n", (long)entry->size, (long)entry->offset - entry->body_offset, object->size); entry->size = -1; } } break; } CHECK_ENTRY(entry); result = 1; } CHECK_ENTRY(object->disk_entry); for(k = 0; k < chunks; k++) { i = offset / CHUNK_SIZE + k; unlockChunk(object, i); } if(result > 0) { notifyObject(object); return 1; } else { return 0; } } int writeoutToDisk(ObjectPtr object, int upto, int max) { if(maxDiskCacheEntrySize >= 0 && object->size > maxDiskCacheEntrySize) { /* An object was created with an unknown length, and then grew beyond maxDiskCacheEntrySize. Destroy the disk entry. */ destroyDiskEntry(object, 1); return 0; } return reallyWriteoutToDisk(object, upto, max); } static int reallyWriteoutToDisk(ObjectPtr object, int upto, int max) { DiskCacheEntryPtr entry; int rc; int i, j; int offset; int bytes = 0; if(upto < 0) upto = object->size; if((object->cache_control & CACHE_NO_STORE) || (object->flags & OBJECT_LOCAL)) return 0; if((object->flags & OBJECT_DISK_ENTRY_COMPLETE) && !object->disk_entry) return 0; entry = makeDiskEntry(object, 1, 1); if(!entry) return 0; assert(!entry->local); if(object->flags & OBJECT_DISK_ENTRY_COMPLETE) goto done; diskEntrySize(object); if(entry->size < 0) return 0; if(object->length >= 0 && entry->size >= object->length) { object->flags |= OBJECT_DISK_ENTRY_COMPLETE; goto done; } if(entry->size >= upto) goto done; if(!entry->writeable) { entry = makeDiskEntry(object, 1, 1); if(!entry) return 0; if(!entry->writeable) return 0; diskEntrySize(object); if(entry->size < 0) return 0; } offset = entry->size; /* Avoid a seek in case we start writing at the beginning */ if(offset == 0 && entry->metadataDirty) { writeoutMetadata(object); /* rewriteDiskEntry may change the entry */ entry = makeDiskEntry(object, 1, 0); if(entry == NULL || !entry->writeable) return 0; } rc = entrySeek(entry, offset + entry->body_offset); if(rc < 0) return 0; do { if(max >= 0 && bytes >= max) break; CHECK_ENTRY(entry); assert(entry->offset == offset + entry->body_offset); i = offset / CHUNK_SIZE; j = offset % CHUNK_SIZE; if(i >= object->numchunks) break; if(object->chunks[i].size <= j) break; again: rc = write(entry->fd, object->chunks[i].data + j, object->chunks[i].size - j); if(rc < 0) { if(errno == EINTR) goto again; do_log_error(L_ERROR, errno, "Couldn't write disk entry"); break; } entry->offset += rc; offset += rc; bytes += rc; if(entry->size < offset) entry->size = offset; } while(j + rc >= CHUNK_SIZE); done: CHECK_ENTRY(entry); if(entry->metadataDirty) writeoutMetadata(object); return bytes; } int writeoutMetadata(ObjectPtr object) { DiskCacheEntryPtr entry; int rc; if((object->cache_control & CACHE_NO_STORE) || (object->flags & OBJECT_LOCAL)) return 0; entry = makeDiskEntry(object, 1, 0); if(entry == NULL || entry == &negativeEntry) goto fail; assert(!entry->local); rc = entrySeek(entry, 0); if(rc < 0) goto fail; rc = writeHeaders(entry->fd, &entry->body_offset, object, NULL, 0); if(rc == -2) { rc = rewriteEntry(object); if(rc < 0) return 0; return 1; } if(rc < 0) goto fail; entry->offset = rc; entry->metadataDirty = 0; return 1; fail: /* We need this in order to avoid trying to write this entry out multiple times. */ if(entry && entry != &negativeEntry) entry->metadataDirty = 0; return 0; } static void mergeDobjects(DiskObjectPtr dst, DiskObjectPtr src) { if(dst->filename == NULL) { dst->filename = src->filename; dst->body_offset = src->body_offset; } else free(src->filename); free(src->location); if(dst->length < 0) dst->length = src->length; if(dst->size < 0) dst->size = src->size; if(dst->age < 0) dst->age = src->age; if(dst->date < 0) dst->date = src->date; if(dst->last_modified < 0) dst->last_modified = src->last_modified; free(src); } DiskObjectPtr readDiskObject(char *filename, struct stat *sb) { int fd, rc, n, dummy, code; int length, size; time_t date, last_modified, age, atime, expires; char *location = NULL, *fn = NULL; DiskObjectPtr dobject; char *buf; int buf_is_chunk, bufsize; int body_offset; struct stat ss; fd = -1; if(sb == NULL) { rc = stat(filename, &ss); if(rc < 0) { do_log_error(L_WARN, errno, "Couldn't stat %s", filename); return NULL; } sb = &ss; } buf_is_chunk = 1; bufsize = CHUNK_SIZE; buf = get_chunk(); if(buf == NULL) { do_log(L_ERROR, "Couldn't allocate buffer.\n"); return NULL; } if(S_ISREG(sb->st_mode)) { fd = open(filename, O_RDONLY | O_BINARY); if(fd < 0) goto fail; again: rc = read(fd, buf, bufsize); if(rc < 0) goto fail; n = findEndOfHeaders(buf, 0, rc, &dummy); if(n < 0) { long lrc; if(buf_is_chunk) { dispose_chunk(buf); buf_is_chunk = 0; bufsize = bigBufferSize; buf = malloc(bigBufferSize); if(buf == NULL) goto fail2; lrc = lseek(fd, 0, SEEK_SET); if(lrc < 0) goto fail; goto again; } goto fail; } rc = httpParseServerFirstLine(buf, &code, &dummy, NULL); if(rc < 0) goto fail; rc = httpParseHeaders(0, NULL, buf, rc, NULL, NULL, &length, NULL, NULL, NULL, &date, &last_modified, &expires, &age, &atime, &body_offset, NULL, NULL, NULL, NULL, NULL, &location, NULL, NULL); if(rc < 0 || location == NULL) goto fail; if(body_offset < 0) body_offset = n; size = sb->st_size - body_offset; if(size < 0) size = 0; } else if(S_ISDIR(sb->st_mode)) { char *n; n = dirnameUrl(buf, 512, (char*)filename, strlen(filename)); if(n == NULL) goto fail; location = strdup(n); if(location == NULL) goto fail; length = -1; size = -1; body_offset = -1; age = -1; atime = -1; date = -1; last_modified = -1; } else { goto fail; } dobject = malloc(sizeof(DiskObjectRec)); if(!dobject) goto fail; fn = strdup(filename); if(!fn) goto fail; if(buf_is_chunk) dispose_chunk(buf); else free(buf); dobject->location = location; dobject->filename = fn; dobject->length = length; dobject->body_offset = body_offset; dobject->size = size; dobject->age = age; dobject->access = atime; dobject->date = date; dobject->last_modified = last_modified; dobject->expires = expires; if(fd >= 0) close(fd); return dobject; fail: if(buf_is_chunk) dispose_chunk(buf); else free(buf); fail2: if(fd >= 0) close(fd); if(location) free(location); return NULL; } DiskObjectPtr processObject(DiskObjectPtr dobjects, char *filename, struct stat *sb) { DiskObjectPtr dobject = NULL; int c = 0; dobject = readDiskObject((char*)filename, sb); if(dobject == NULL) return 0; if(!dobjects || (c = strcmp(dobject->location, dobjects->location)) <= 0) { if(dobjects && c == 0) { mergeDobjects(dobjects, dobject); } else { dobject->next = dobjects; dobjects = dobject; } } else { DiskObjectPtr other = dobjects; while(other->next) { c = strcmp(dobject->location, other->next->location); if(c < 0) break; other = other->next; } if(strcmp(dobject->location, other->location) == 0) { mergeDobjects(other, dobject); } else { dobject->next = other->next; other->next = dobject; } } return dobjects; } /* Determine whether p is below root */ static int filter(DiskObjectPtr p, const char *root, int n, int recursive) { char *cp; int m = strlen(p->location); if(m < n) return 0; if(memcmp(root, p->location, n) != 0) return 0; if(recursive) return 1; if(m == 0 || p->location[m - 1] == '/') return 1; cp = strchr(p->location + n, '/'); if(cp && cp - p->location != m - 1) return 0; return 1; } /* Filter out all disk objects that are not under root */ DiskObjectPtr filterDiskObjects(DiskObjectPtr from, const char *root, int recursive) { int n = strlen(root); DiskObjectPtr p, q; while(from && !filter(from, root, n, recursive)) { p = from; from = p->next; free(p->location); free(p); } p = from; while(p && p->next) { if(!filter(p->next, root, n, recursive)) { q = p->next; p->next = q->next; free(q->location); free(q); } else { p = p->next; } } return from; } DiskObjectPtr insertRoot(DiskObjectPtr from, const char *root) { DiskObjectPtr p; p = from; while(p) { if(strcmp(root, p->location) == 0) return from; p = p->next; } p = malloc(sizeof(DiskObjectRec)); if(!p) return from; p->location = strdup(root); if(p->location == NULL) { free(p); return from; } p->filename = NULL; p->length = -1; p->size = -1; p->age = -1; p->access = -1; p->last_modified = -1; p->expires = -1; p->next = from; return p; } /* Insert all missing directories in a sorted list of dobjects */ DiskObjectPtr insertDirs(DiskObjectPtr from) { DiskObjectPtr p, q, new; int n, m; char *cp; p = NULL; q = from; while(q) { n = strlen(q->location); if(n > 0 && q->location[n - 1] != '/') { cp = strrchr(q->location, '/'); m = cp - q->location + 1; if(cp && (!p || strlen(p->location) < m || memcmp(p->location, q->location, m) != 0)) { new = malloc(sizeof(DiskObjectRec)); if(!new) break; new->location = strdup_n(q->location, m); if(new->location == NULL) { free(new); break; } new->filename = NULL; new->length = -1; new->size = -1; new->age = -1; new->access = -1; new->last_modified = -1; new->expires = -1; new->next = q; if(p) p->next = new; else from = new; } } p = q; q = q->next; } return from; } void indexDiskObjects(FILE *out, const char *root, int recursive) { int n, i, isdir; DIR *dir; struct dirent *dirent; char buf[1024]; char *fts_argv[2]; FTS *fts; FTSENT *fe; DiskObjectPtr dobjects = NULL; char *of = root[0] == '\0' ? "" : " of "; fprintf(out, "\n" "\n" "%s%s%s\n" "\n" "

%s%s%s

\n", recursive ? "Recursive index" : "Index", of, root, recursive ? "Recursive index" : "Index", of, root); if(diskCacheRoot == NULL || diskCacheRoot->length <= 0) { fprintf(out, "

No diskCacheRoot.

\n"); goto trailer; } if(diskCacheRoot->length >= 1024) { fprintf(out, "

The value of diskCacheRoot is " "too long (%d).

\n", diskCacheRoot->length); goto trailer; } if(strlen(root) < 8) { memcpy(buf, diskCacheRoot->string, diskCacheRoot->length); buf[diskCacheRoot->length] = '\0'; n = diskCacheRoot->length; } else { n = urlDirname(buf, 1024, root, strlen(root)); } if(n > 0) { if(recursive) { dir = NULL; fts_argv[0] = buf; fts_argv[1] = NULL; fts = fts_open(fts_argv, FTS_LOGICAL, NULL); if(fts) { while(1) { fe = fts_read(fts); if(!fe) break; if(fe->fts_info != FTS_DP) dobjects = processObject(dobjects, fe->fts_path, fe->fts_info == FTS_NS || fe->fts_info == FTS_NSOK ? fe->fts_statp : NULL); } fts_close(fts); } } else { dir = opendir(buf); if(dir) { while(1) { dirent = readdir(dir); if(!dirent) break; if(n + strlen(dirent->d_name) < 1024) { strcpy(buf + n, dirent->d_name); } else { continue; } dobjects = processObject(dobjects, buf, NULL); } closedir(dir); } else { fprintf(out, "

Couldn't open directory: %s (%d).

\n", strerror(errno), errno); goto trailer; } } } if(dobjects) { DiskObjectPtr dobject; int entryno; dobjects = insertRoot(dobjects, root); dobjects = insertDirs(dobjects); dobjects = filterDiskObjects(dobjects, root, recursive); dobject = dobjects; buf[0] = '\0'; alternatingHttpStyle(out, "diskcachelist"); fprintf(out, "\n"); fprintf(out, "\n"); entryno = 0; while(dobjects) { dobject = dobjects; i = strlen(dobject->location); isdir = (i == 0 || dobject->location[i - 1] == '/'); if(entryno % 2) fprintf(out, ""); else fprintf(out, ""); if(dobject->size >= 0) { fprintf(out, " "); if(dobject->length >= 0) { if(dobject->size == dobject->length) fprintf(out, " ", dobject->length); else fprintf(out, " ", dobject->size, dobject->length); } else { /* Avoid a trigraph. */ fprintf(out, " ", dobject->size); } if(dobject->last_modified >= 0) { struct tm *tm = gmtime(&dobject->last_modified); if(tm == NULL) n = -1; else n = strftime(buf, 1024, "%d.%m.%Y", tm); } else n = -1; if(n > 0) { buf[n] = '\0'; fprintf(out, " ", buf); } else { fprintf(out, ""); } if(dobject->date >= 0) { struct tm *tm = gmtime(&dobject->date); if(tm == NULL) n = -1; else n = strftime(buf, 1024, "%d.%m.%Y", tm); } else n = -1; if(n > 0) { buf[n] = '\0'; fprintf(out, "", buf); } else { fprintf(out, ""); } } else { fprintf(out, ""); } if(isdir) { fprintf(out, "" "", dobject->location, dobject->location); } fprintf(out, "\n"); entryno++; dobjects = dobject->next; free(dobject->location); free(dobject->filename); free(dobject); } fprintf(out, "\n"); fprintf(out, "
", dobject->location); htmlPrint(out, dobject->location, strlen(dobject->location)); fprintf(out, "%d%d/%d%d/??" "?%s%s"); htmlPrint(out, dobject->location, strlen(dobject->location)); fprintf(out, "plain" "recursive
\n"); } trailer: fprintf(out, "

back

\n"); fprintf(out, "\n"); return; } static int checkForZeroes(char *buf, int n) { int i, j; unsigned long *lbuf = (unsigned long *)buf; assert(n % sizeof(unsigned long) == 0); for(i = 0; i * sizeof(unsigned long) < n; i++) { if(lbuf[i] != 0L) return i * sizeof(unsigned long); } for(j = 0; i * sizeof(unsigned long) + j < n; j++) { if(buf[i * sizeof(unsigned long) + j] != 0) break; } return i * sizeof(unsigned long) + j; } static int copyFile(int from, char *filename, int n) { char *buf; int to, offset, nread, nzeroes, rc; buf = malloc(CHUNK_SIZE); if(buf == NULL) return -1; to = open(filename, O_RDWR | O_CREAT | O_EXCL | O_BINARY, diskCacheFilePermissions); if(to < 0) { free(buf); return -1; } offset = 0; while(offset < n) { nread = read(from, buf, MIN(CHUNK_SIZE, n - offset)); if(nread <= 0) break; nzeroes = checkForZeroes(buf, nread & -8); if(nzeroes > 0) { /* I like holes */ rc = lseek(to, nzeroes, SEEK_CUR); if(rc != offset + nzeroes) { if(rc < 0) do_log_error(L_ERROR, errno, "Couldn't extend file"); else do_log(L_ERROR, "Couldn't extend file: " "unexpected offset %d != %d + %d.\n", rc, offset, nread); break; } } if(nread > nzeroes) { rc = write(to, buf + nzeroes, nread - nzeroes); if(rc != nread - nzeroes) { if(rc < 0) do_log_error(L_ERROR, errno, "Couldn't write"); else do_log(L_ERROR, "Short write.\n"); break; } } offset += nread; } free(buf); close(to); if(offset <= 0) unlink(filename); /* something went wrong straight away */ return 1; } static long int expireFile(char *filename, struct stat *sb, int *considered, int *unlinked, int *truncated) { DiskObjectPtr dobject = NULL; time_t t; int fd, rc; long int ret = sb->st_size; if(!preciseExpiry) { t = sb->st_mtime; if(t > current_time.tv_sec + 1) { do_log(L_WARN, "File %s has access time in the future.\n", filename); t = current_time.tv_sec; } if(t > current_time.tv_sec - diskCacheUnlinkTime && (sb->st_size < diskCacheTruncateSize || t > current_time.tv_sec - diskCacheTruncateTime)) return ret; } (*considered)++; dobject = readDiskObject(filename, sb); if(!dobject) { do_log(L_ERROR, "Incorrect disk entry %s -- removing.\n", filename); rc = unlink(filename); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't unlink %s", filename); return ret; } else { (*unlinked)++; return 0; } } t = dobject->access; if(t < 0) t = dobject->age; if(t < 0) t = dobject->date; if(t > current_time.tv_sec) do_log(L_WARN, "Disk entry %s (%s) has access time in the future.\n", dobject->location, dobject->filename); if(t < current_time.tv_sec - diskCacheUnlinkTime) { rc = unlink(dobject->filename); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't unlink %s", filename); } else { (*unlinked)++; ret = 0; } } else if(dobject->size > diskCacheTruncateSize + 4 * dobject->body_offset && t < current_time.tv_sec - diskCacheTruncateTime) { /* We need to copy rather than simply truncate in place: the latter would confuse a running polipo. */ fd = open(dobject->filename, O_RDONLY | O_BINARY, 0); rc = unlink(dobject->filename); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't unlink %s", filename); close(fd); fd = -1; } else { (*unlinked)++; copyFile(fd, dobject->filename, dobject->body_offset + diskCacheTruncateSize); close(fd); (*unlinked)--; (*truncated)++; ret = sb->st_size - dobject->body_offset + diskCacheTruncateSize; } } free(dobject->location); free(dobject->filename); free(dobject); return ret; } void expireDiskObjects() { int rc; char *fts_argv[2]; FTS *fts; FTSENT *fe; int files = 0, considered = 0, unlinked = 0, truncated = 0; int dirs = 0, rmdirs = 0; long left = 0, total = 0; if(diskCacheRoot == NULL || diskCacheRoot->length <= 0 || diskCacheRoot->string[0] != '/') return; fts_argv[0] = diskCacheRoot->string; fts_argv[1] = NULL; fts = fts_open(fts_argv, FTS_LOGICAL, NULL); if(fts == NULL) { do_log_error(L_ERROR, errno, "Couldn't fts_open disk cache"); } else { while(1) { gettimeofday(¤t_time, NULL); fe = fts_read(fts); if(!fe) break; if(fe->fts_info == FTS_D) continue; if(fe->fts_info == FTS_DP || fe->fts_info == FTS_DC || fe->fts_info == FTS_DNR) { if(fe->fts_accpath[0] == '/' && strlen(fe->fts_accpath) <= diskCacheRoot->length) continue; dirs++; rc = rmdir(fe->fts_accpath); if(rc >= 0) rmdirs++; else if(errno != ENOTEMPTY && errno != EEXIST) do_log_error(L_ERROR, errno, "Couldn't remove directory %s", fe->fts_accpath); continue; } else if(fe->fts_info == FTS_NS) { do_log_error(L_ERROR, fe->fts_errno, "Couldn't stat file %s", fe->fts_accpath); continue; } else if(fe->fts_info == FTS_ERR) { do_log_error(L_ERROR, fe->fts_errno, "Couldn't fts_read disk cache"); break; } if(!S_ISREG(fe->fts_statp->st_mode)) { do_log(L_ERROR, "Unexpected file %s type 0%o.\n", fe->fts_accpath, (unsigned int)fe->fts_statp->st_mode); continue; } files++; left += expireFile(fe->fts_accpath, fe->fts_statp, &considered, &unlinked, &truncated); total += fe->fts_statp->st_size; } fts_close(fts); } printf("Disk cache purged.\n"); printf("%d files, %d considered, %d removed, %d truncated " "(%ldkB -> %ldkB).\n", files, considered, unlinked, truncated, total/1024, left/1024); printf("%d directories, %d removed.\n", dirs, rmdirs); return; } #else void preinitDiskcache() { return; } void initDiskcache() { return; } int writeoutToDisk(ObjectPtr object, int upto, int max) { return 0; } int destroyDiskEntry(ObjectPtr object, int d) { return 0; } ObjectPtr objectGetFromDisk(ObjectPtr object) { return NULL; } int objectFillFromDisk(ObjectPtr object, int offset, int chunks) { return 0; } int revalidateDiskEntry(ObjectPtr object) { return 0; } void dirtyDiskEntry(ObjectPtr object) { return; } void expireDiskObjects() { do_log(L_ERROR, "Disk cache not supported in this version.\n"); } int diskEntrySize(ObjectPtr object) { return -1; } #endif polipo-1.0.4.1/config.sample0000644000175000017500000000747211331407220015142 0ustar chrisdchrisd# Sample configuration file for Polipo. -*-sh-*- # You should not need to use a configuration file; all configuration # variables have reasonable defaults. If you want to use one, you # can copy this to /etc/polipo/config or to ~/.polipo and modify. # This file only contains some of the configuration variables; see the # list given by ``polipo -v'' and the manual for more. ### Basic configuration ### ******************* # Uncomment one of these if you want to allow remote clients to # connect: # proxyAddress = "::0" # both IPv4 and IPv6 # proxyAddress = "0.0.0.0" # IPv4 only # If you do that, you'll want to restrict the set of hosts allowed to # connect: # allowedClients = "127.0.0.1, 134.157.168.57" # allowedClients = "127.0.0.1, 134.157.168.0/24" # Uncomment this if you want your Polipo to identify itself by # something else than the host name: # proxyName = "polipo.example.org" # Uncomment this if there's only one user using this instance of Polipo: # cacheIsShared = false # Uncomment this if you want to use a parent proxy: # parentProxy = "squid.example.org:3128" # Uncomment this if you want to use a parent SOCKS proxy: # socksParentProxy = "localhost:9050" # socksProxyType = socks5 ### Memory ### ****** # Uncomment this if you want Polipo to use a ridiculously small amount # of memory (a hundred C-64 worth or so): # chunkHighMark = 819200 # objectHighMark = 128 # Uncomment this if you've got plenty of memory: # chunkHighMark = 50331648 # objectHighMark = 16384 ### On-disk data ### ************ # Uncomment this if you want to disable the on-disk cache: # diskCacheRoot = "" # Uncomment this if you want to put the on-disk cache in a # non-standard location: # diskCacheRoot = "~/.polipo-cache/" # Uncomment this if you want to disable the local web server: # localDocumentRoot = "" # Uncomment this if you want to enable the pages under /polipo/index? # and /polipo/servers?. This is a serious privacy leak if your proxy # is shared. # disableIndexing = false # disableServersList = false ### Domain Name System ### ****************** # Uncomment this if you want to contact IPv4 hosts only (and make DNS # queries somewhat faster): # dnsQueryIPv6 = no # Uncomment this if you want Polipo to prefer IPv4 to IPv6 for # double-stack hosts: # dnsQueryIPv6 = reluctantly # Uncomment this to disable Polipo's DNS resolver and use the system's # default resolver instead. If you do that, Polipo will freeze during # every DNS query: # dnsUseGethostbyname = yes ### HTTP ### **** # Uncomment this if you want to enable detection of proxy loops. # This will cause your hostname (or whatever you put into proxyName # above) to be included in every request: # disableVia=false # Uncomment this if you want to slightly reduce the amount of # information that you leak about yourself: # censoredHeaders = from, accept-language # censorReferer = maybe # Uncomment this if you're paranoid. This will break a lot of sites, # though: # censoredHeaders = set-cookie, cookie, cookie2, from, accept-language # censorReferer = true # Uncomment this if you want to use Poor Man's Multiplexing; increase # the sizes if you're on a fast line. They should each amount to a few # seconds' worth of transfer; if pmmSize is small, you'll want # pmmFirstSize to be larger. # Note that PMM is somewhat unreliable. # pmmFirstSize = 16384 # pmmSize = 8192 # Uncomment this if your user-agent does something reasonable with # Warning headers (most don't): # relaxTransparency = maybe # Uncomment this if you never want to revalidate instances for which # data is available (this is not a good idea): # relaxTransparency = yes # Uncomment this if you have no network: # proxyOffline = yes # Uncomment this if you want to avoid revalidating instances with a # Vary header (this is not a good idea): # mindlesslyCacheVary = true polipo-1.0.4.1/config.h0000644000175000017500000000472211331407220014103 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #define CONFIG_INT 0 #define CONFIG_OCTAL 1 #define CONFIG_HEX 2 #define CONFIG_TIME 3 #define CONFIG_BOOLEAN 4 #define CONFIG_TRISTATE 5 #define CONFIG_TETRASTATE 6 #define CONFIG_PENTASTATE 7 #define CONFIG_FLOAT 8 #define CONFIG_ATOM 9 #define CONFIG_ATOM_LOWER 10 #define CONFIG_PASSWORD 11 #define CONFIG_INT_LIST 12 #define CONFIG_ATOM_LIST 13 #define CONFIG_ATOM_LIST_LOWER 14 typedef struct _ConfigVariable { AtomPtr name; int type; union { int *i; float *f; struct _Atom **a; struct _AtomList **al; struct _IntList **il; } value; int (*setter)(struct _ConfigVariable*, void*); char *help; struct _ConfigVariable *next; } ConfigVariableRec, *ConfigVariablePtr; #define CONFIG_VARIABLE(name, type, help) \ CONFIG_VARIABLE_SETTABLE(name, type, NULL, help) #define CONFIG_VARIABLE_SETTABLE(name, type, setter, help) \ declareConfigVariable(internAtom(#name), type, &name, setter, help) void declareConfigVariable(AtomPtr name, int type, void *value, int (*setter)(ConfigVariablePtr, void*), char *help); void printConfigVariables(FILE *out, int html); int parseConfigLine(char *line, char *filename, int lineno, int set); int parseConfigFile(AtomPtr); int configIntSetter(ConfigVariablePtr, void*); int configFloatSetter(ConfigVariablePtr, void*); int configAtomSetter(ConfigVariablePtr, void*); polipo-1.0.4.1/config.c0000644000175000017500000006022311331407220014074 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" ConfigVariablePtr configVariables = NULL; static ConfigVariablePtr findConfigVariable(AtomPtr name) { ConfigVariablePtr var; var = configVariables; while(var != NULL) { if(var->name == name) break; var = var->next; } return var; } void declareConfigVariable(AtomPtr name, int type, void *value, int (*setter)(ConfigVariablePtr, void*), char *help) { ConfigVariablePtr var, previous, next; var = findConfigVariable(name); if(var) { do_log(L_ERROR, "Configuration variable %s declared multiple times.\n", name->string); if(var->type != type) { exit(1); } } var = malloc(sizeof(ConfigVariableRec)); if(var == NULL) { do_log(L_ERROR, "Couldn't allocate config variable.\n"); exit(1); } var->name = retainAtom(name); var->type = type; switch(type) { case CONFIG_INT: case CONFIG_OCTAL: case CONFIG_HEX: case CONFIG_TIME: case CONFIG_BOOLEAN: case CONFIG_TRISTATE: case CONFIG_TETRASTATE: case CONFIG_PENTASTATE: var->value.i = value; break; case CONFIG_FLOAT: var->value.f = value; break; case CONFIG_ATOM: case CONFIG_ATOM_LOWER: case CONFIG_PASSWORD: var->value.a = value; break; case CONFIG_INT_LIST: var->value.il = value; break; case CONFIG_ATOM_LIST: case CONFIG_ATOM_LIST_LOWER: var->value.al = value; break; default: abort(); } var->setter = setter; var->help = help; previous = NULL; next = configVariables; while(next && strcmp(next->name->string, var->name->string) < 0) { previous = next; next = next->next; } if(next && strcmp(next->name->string, var->name->string) == 0) { do_log(L_ERROR, "Variable %s declared multiple times.\n", next->name->string); abort(); } if(previous == NULL) { var->next = configVariables; configVariables = var; } else { var->next = next; previous->next = var; } } static void printString(FILE *out, char *string, int html) { if(html) { char buf[512]; int i; i = htmlString(buf, 0, 512, string, strlen(string)); if(i < 0) { fprintf(out, "(overflow)"); return; } fwrite(buf, 1, i, out); } else { fprintf(out, "%s", string); } } static void printVariable(FILE *out, ConfigVariablePtr var, int html, int parseable) { int i; switch(var->type) { case CONFIG_INT: fprintf(out, "%d", *var->value.i); break; case CONFIG_OCTAL: fprintf(out, "0%o", *var->value.i); break; case CONFIG_HEX: fprintf(out, "0x%x", *var->value.i); break; case CONFIG_TIME: { int v = *var->value.i; if(v == 0) { fprintf(out, "0s"); } else { if(v >= 3600 * 24) fprintf(out, "%dd", v/(3600*24)); v = v % (3600 * 24); if(v >= 3600) fprintf(out, "%dh", v / 3600); v = v % 3600; if(v >= 60) fprintf(out, "%dm", v / 60); v = v % 60; if(v > 0) fprintf(out, "%ds", v); } } break; case CONFIG_BOOLEAN: switch(*var->value.i) { case 0: fprintf(out, "false"); break; case 1: fprintf(out, "true"); break; default: fprintf(out, "???"); break; } break; case CONFIG_TRISTATE: switch(*var->value.i) { case 0: fprintf(out, "false"); break; case 1: fprintf(out, "maybe"); break; case 2: fprintf(out, "true"); break; default: fprintf(out, "???"); break; } break; case CONFIG_TETRASTATE: switch(*var->value.i) { case 0: fprintf(out, "false"); break; case 1: fprintf(out, "reluctantly"); break; case 2: fprintf(out, "happily"); break; case 3: fprintf(out, "true"); break; default: fprintf(out, "???"); break; } break; case CONFIG_PENTASTATE: switch(*var->value.i) { case 0: fprintf(out, "no"); break; case 1: fprintf(out, "reluctantly"); break; case 2: fprintf(out, "maybe"); break; case 3: fprintf(out, "happily"); break; case 4: fprintf(out, "true"); break; default: fprintf(out, "???"); break; } break; case CONFIG_FLOAT: fprintf(out, "%f", *var->value.f); break; case CONFIG_ATOM: case CONFIG_ATOM_LOWER: if(*var->value.a) { if((*var->value.a)->length > 0) { printString(out, (*var->value.a)->string, html); } else { if(!parseable) fprintf(out, "(empty)"); } } else { if(!parseable) fprintf(out, "(none)"); } break; case CONFIG_PASSWORD: if(!parseable) fprintf(out, "(hidden)"); break; case CONFIG_INT_LIST: if((*var->value.il) == NULL) { if(!parseable) fprintf(out, "(not set)"); } else if((*var->value.il)->length == 0) { if(!parseable) fprintf(out, "(empty list)"); } else { for(i = 0; i < (*var->value.il)->length; i++) { int from = (*var->value.il)->ranges[i].from; int to = (*var->value.il)->ranges[i].to; assert(from <= to); if(from == to) fprintf(out, "%d", from); else fprintf(out, "%d-%d", from, to); if(i < (*var->value.il)->length - 1) fprintf(out, ", "); } } break; case CONFIG_ATOM_LIST: case CONFIG_ATOM_LIST_LOWER: if((*var->value.al) == NULL) { if(!parseable) fprintf(out, "(not set)"); } else if((*var->value.al)->length == 0) { if(!parseable) fprintf(out, "(empty list)"); } else { for(i = 0; i < (*var->value.al)->length; i++) { AtomPtr atom = (*var->value.al)->list[i]; if(atom) { if(atom->length > 0) printString(out, atom->string, html); else { if(!parseable) fprintf(out, "(empty)"); } } else { if(!parseable) fprintf(out, "(none)"); } if(i < (*var->value.al)->length - 1) fprintf(out, ", "); } } break; default: abort(); } } static void printVariableForm(FILE *out, ConfigVariablePtr var) { char *disabled = ""; int i; if(disableConfiguration || !var->setter) disabled = "disabled=true"; fprintf(out, "
"); switch(var->type) { case CONFIG_INT: case CONFIG_OCTAL: case CONFIG_HEX: case CONFIG_TIME: case CONFIG_FLOAT: case CONFIG_ATOM: case CONFIG_ATOM_LOWER: case CONFIG_PASSWORD: case CONFIG_INT_LIST: case CONFIG_ATOM_LIST: case CONFIG_ATOM_LIST_LOWER: fprintf(out, "\n", var->type == CONFIG_PASSWORD ? " type=password" : "", var->name->string, disabled); break; case CONFIG_BOOLEAN: { static char *states[] = {"false", "true"}; fprintf(out, ""); if(var->setter) fprintf(out, ""); break; } case CONFIG_TRISTATE: { static char *states[] = {"false", "maybe", "true"}; fprintf(out, ""); if(var->setter) fprintf(out, ""); break; } case CONFIG_TETRASTATE: { static char *states[] = {"false", "reluctantly", "happily", "true"}; fprintf(out, ""); if(var->setter) fprintf(out, ""); break; } case CONFIG_PENTASTATE: { static char *states[] = {"no", "reluctantly", "maybe", "happily", "true"}; fprintf(out, ""); if(var->setter) fprintf(out,""); break; } default: abort(); } fprintf(out, "
"); } void printConfigVariables(FILE *out, int html) { ConfigVariablePtr var; int entryno = 0; #define PRINT_SEP() \ do {if(html) fprintf(out, ""); else fprintf(out, " ");} while(0) if(html) { fprintf(out, "\n"); fprintf(out, "\n"); } if(html) { alternatingHttpStyle(out, "configlist"); fprintf(out, "
\n" "\n" "" "" "" "\n" "\n" ); } /* configFile is not a config variable, for obvious bootstrapping reasons. CHUNK_SIZE is hardwired for now. */ fprintf(out, html ? "\n" : "configFile %s Configuration file.\n", configFile && configFile->length > 0 ? configFile->string : "(none)"); fprintf(out, html ? "\n" : "CHUNK_SIZE %d Unit of chunk memory allocation.\n", CHUNK_SIZE); var = configVariables; while(var != NULL) { if(html) { if(entryno % 2) fprintf(out, ""); else fprintf(out, ""); fprintf(out, "\n"); else fprintf(out, "\n"); entryno++; var = var->next; } if(html) { fprintf(out, "\n"); fprintf(out, "
variable namecurrent valuenew valuedescription
configFile%s" "Configuration file.
CHUNK_SIZE%d" "Unit of chunk memory allocation.
"); } fprintf(out, "%s", var->name->string); fprintf(out, html ? "
" : " "); fprintf(out, html ? "" : ""); switch(var->type) { case CONFIG_INT: case CONFIG_OCTAL: case CONFIG_HEX: fprintf(out, "integer"); break; case CONFIG_TIME: fprintf(out, "time"); break; case CONFIG_BOOLEAN: fprintf(out, "boolean"); break; case CONFIG_TRISTATE: fprintf(out, "tristate"); break; case CONFIG_TETRASTATE: fprintf(out, "4-state"); break; case CONFIG_PENTASTATE: fprintf(out, "5-state"); break; case CONFIG_FLOAT: fprintf(out, "float"); break; case CONFIG_ATOM: case CONFIG_ATOM_LOWER: case CONFIG_PASSWORD: fprintf(out, "atom"); break; case CONFIG_INT_LIST: fprintf(out, "intlist"); break; case CONFIG_ATOM_LIST: case CONFIG_ATOM_LIST_LOWER: fprintf(out, "list"); break; default: abort(); } fprintf(out, html ? "" : ""); PRINT_SEP(); printVariable(out, var, html, 0); PRINT_SEP(); if(html) { printVariableForm(out, var); PRINT_SEP(); } fprintf(out, "%s", var->help?var->help:""); if(html) fprintf(out, "
\n"); } return; #undef PRINT_SEP } static int skipWhitespace(char *buf, int i) { while(buf[i] == ' ' || buf[i] == '\t' || buf[i] == '\r') i++; return i; } static int parseInt(char *buf, int offset, int *value_return) { char *p; int value; value = strtol(buf + offset, &p, 0); if(p <= buf + offset) return -1; *value_return = value; return p - buf; } static struct config_state { char *name; int value; } states[] = { { "false", 0 }, { "no", 0 }, { "reluctantly", 1 }, { "seldom", 1 }, { "rarely", 1 }, { "lazily", 1 }, { "maybe", 2 }, { "perhaps", 2 }, { "happily", 3 }, { "often", 3 }, { "eagerly", 3 }, { "true", 4 }, { "yes", 4 } }; static int parseState(char *buf, int offset, int kind) { int i = offset; int n; int state = -1; while(letter(buf[i])) i++; for(n = 0; n < sizeof(states) / sizeof(states[0]); n++) { if(strlen(states[n].name) == i - offset && lwrcmp(buf + offset, states[n].name, i - offset) == 0) { state = states[n].value; break; } } if(state < 0) return -1; switch(kind) { case CONFIG_BOOLEAN: if(state == 0) return 0; else if(state == 4) return 1; else return -1; break; case CONFIG_TRISTATE: if(state == 0) return 0; else if(state == 2) return 1; else if(state == 4) return 2; else return -1; break; case CONFIG_TETRASTATE: if(state == 0) return 0; else if(state == 1) return 1; else if(state == 3) return 2; else if(state == 4) return 3; else return -1; break; case CONFIG_PENTASTATE: return state; break; default: abort(); } } static int parseAtom(char *buf, int offset, AtomPtr *value_return, int insensitive) { int y0, i, j, k; AtomPtr atom; int escape = 0; char *s; i = offset; if(buf[i] == '\"') { i++; y0 = i; while(buf[i] != '\"' && buf[i] != '\n' && buf[i] != '\0') { if(buf[i] == '\\' && buf[i + 1] != '\0') { escape = 1; i += 2; } else i++; } if(buf[i] != '\"') return -1; j = i + 1; } else { y0 = i; while(letter(buf[i]) || digit(buf[i]) || buf[i] == '_' || buf[i] == '-' || buf[i] == '~' || buf[i] == '.' || buf[i] == ':' || buf[i] == '/') i++; j = i; } if(escape) { s = malloc(i - y0); if(buf == NULL) return -1; k = 0; j = y0; while(j < i) { if(buf[j] == '\\' && j <= i - 2) { s[k++] = buf[j + 1]; j += 2; } else s[k++] = buf[j++]; } if(insensitive) atom = internAtomLowerN(s, k); else atom = internAtomN(s, k); free(s); j++; } else { if(insensitive) atom = internAtomLowerN(buf + y0, i - y0); else atom = internAtomN(buf + y0, i - y0); } *value_return = atom; return j; } static int parseTime(char *line, int i, int *value_return) { int v = 0, w; while(1) { if(!digit(line[i])) break; w = atoi(line + i); while(digit(line[i])) i++; switch(line[i]) { case 'd': v += w * 24 * 3600; i++; break; case 'h': v += w * 3600; i++; break; case 'm': v += w * 60; i++; break; case 's': v += w; i++; break; default: v += w; goto done; } } done: *value_return = v; return i; } int parseConfigLine(char *line, char *filename, int lineno, int set) { int x0, x1; int i, from, to; AtomPtr name, value; ConfigVariablePtr var; int iv; float fv; AtomPtr av; AtomListPtr alv; IntListPtr ilv; i = skipWhitespace(line, 0); if(line[i] == '\n' || line[i] == '\0' || line[i] == '#') return 0; x0 = i; while(letter(line[i]) || digit(line[i])) i++; x1 = i; i = skipWhitespace(line, i); if(line[i] != '=') { goto syntax; } i++; i = skipWhitespace(line, i); name = internAtomN(line + x0, x1 - x0); var = findConfigVariable(name); releaseAtom(name); if(set && var->setter == NULL) return -2; if(var == NULL) { if(!set) { do_log(L_ERROR, "%s:%d: unknown config variable ", filename, lineno); do_log_n(L_ERROR, line + x0, x1 - x0); do_log(L_ERROR, "\n"); } return -1; } i = skipWhitespace(line, i); switch(var->type) { case CONFIG_INT: case CONFIG_OCTAL: case CONFIG_HEX: i = parseInt(line, i, &iv); if(i < 0) goto syntax; if(set) var->setter(var, &iv); else *var->value.i = iv; break; case CONFIG_TIME: i = parseTime(line, i, &iv); if(i < 0) goto syntax; i = skipWhitespace(line, i); if(line[i] != '\n' && line[i] != '\0' && line[i] != '#') goto syntax; if(set) var->setter(var, &iv); else *var->value.i = iv; break; case CONFIG_BOOLEAN: case CONFIG_TRISTATE: case CONFIG_TETRASTATE: case CONFIG_PENTASTATE: iv = parseState(line, i, var->type); if(iv < 0) goto syntax; if(set) var->setter(var, &iv); else *var->value.i = iv; break; case CONFIG_FLOAT: if(!digit(line[i]) && line[i] != '.') goto syntax; fv = atof(line + i); if(set) var->setter(var, &fv); else *var->value.f = fv; break; case CONFIG_ATOM: case CONFIG_ATOM_LOWER: case CONFIG_PASSWORD: i = parseAtom(line, i, &av, (var->type == CONFIG_ATOM_LOWER)); if(i < 0) goto syntax; if(!av) { if(!set) do_log(L_ERROR, "%s:%d: couldn't allocate atom.\n", filename, lineno); return -1; } i = skipWhitespace(line, i); if(line[i] != '\n' && line[i] != '\0' && line[i] != '#') { releaseAtom(av); goto syntax; } if(set) var->setter(var, &av); else { if(*var->value.a) releaseAtom(*var->value.a); *var->value.a = av; } break; case CONFIG_INT_LIST: ilv = makeIntList(0); if(ilv == NULL) { if(!set) do_log(L_ERROR, "%s:%d: couldn't allocate int list.\n", filename, lineno); return -1; } while(1) { i = parseInt(line, i, &from); if(i < 0) goto syntax; to = from; i = skipWhitespace(line, i); if(line[i] == '-') { i = skipWhitespace(line, i + 1); i = parseInt(line, i, &to); if(i < 0) { destroyIntList(ilv); goto syntax; } i = skipWhitespace(line, i); } intListCons(from, to, ilv); if(line[i] == '\n' || line[i] == '\0' || line[i] == '#') break; if(line[i] != ',') { destroyIntList(ilv); goto syntax; } i = skipWhitespace(line, i + 1); } if(set) var->setter(var, &ilv); else { if(*var->value.il) destroyIntList(*var->value.il); *var->value.il = ilv; } break; case CONFIG_ATOM_LIST: case CONFIG_ATOM_LIST_LOWER: alv = makeAtomList(NULL, 0); if(alv == NULL) { if(!set) do_log(L_ERROR, "%s:%d: couldn't allocate atom list.\n", filename, lineno); return -1; } while(1) { i = parseAtom(line, i, &value, (var->type == CONFIG_ATOM_LIST_LOWER)); if(i < 0) goto syntax; if(!value) { if(!set) do_log(L_ERROR, "%s:%d: couldn't allocate atom.\n", filename, lineno); return -1; } atomListCons(value, alv); i = skipWhitespace(line, i); if(line[i] == '\n' || line[i] == '\0' || line[i] == '#') break; if(line[i] != ',') { destroyAtomList(alv); goto syntax; } i = skipWhitespace(line, i + 1); } if(set) var->setter(var, &alv); else { if(*var->value.al) destroyAtomList(*var->value.al); *var->value.al = alv; } break; default: abort(); } return 1; syntax: if(!set) do_log(L_ERROR, "%s:%d: parse error.\n", filename, lineno); return -1; } int parseConfigFile(AtomPtr filename) { char buf[512]; int rc, lineno; FILE *f; if(!filename || filename->length == 0) return 0; f = fopen(filename->string, "r"); if(f == NULL) { do_log(L_ERROR, "Couldn't open config file %s: %d.\n", filename->string, errno); return -1; } lineno = 1; while(1) { char *s; s = fgets(buf, 512, f); if(s == NULL) { fclose(f); return 1; } rc = parseConfigLine(buf, filename->string, lineno, 0); lineno++; } } int configIntSetter(ConfigVariablePtr var, void* value) { assert(var->type <= CONFIG_PENTASTATE); *var->value.i = *(int*)value; return 1; } int configFloatSetter(ConfigVariablePtr var, void* value) { assert(var->type == CONFIG_FLOAT); *var->value.i = *(float*)value; return 1; } int configAtomSetter(ConfigVariablePtr var, void* value) { assert(var->type == CONFIG_ATOM || var->type == CONFIG_ATOM_LOWER || var->type == CONFIG_PASSWORD); if(*var->value.a) releaseAtom(*var->value.a); *var->value.a = *(AtomPtr*)value; return 1; } polipo-1.0.4.1/client.h0000644000175000017500000000661011331407220014112 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ int httpAccept(int, FdEventHandlerPtr, AcceptRequestPtr); void httpClientFinish(HTTPConnectionPtr connection, int s); int httpClientHandler(int, FdEventHandlerPtr, StreamRequestPtr); int httpClientNoticeError(HTTPRequestPtr, int code, struct _Atom *message); int httpClientError(HTTPRequestPtr, int code, struct _Atom *message); int httpClientNewError(HTTPConnectionPtr, int method, int persist, int code, struct _Atom *message); int httpClientRawError(HTTPConnectionPtr, int, struct _Atom*, int close); int httpErrorStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpErrorNocloseStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpErrorNofinishStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpClientRequest(HTTPRequestPtr request, AtomPtr url); int httpClientRequestContinue(int forbidden_code, AtomPtr url, AtomPtr forbidden_message, AtomPtr forbidden_headers, void *closure); int httpClientDiscardBody(HTTPConnectionPtr connection); int httpClientDiscardHandler(int, FdEventHandlerPtr, StreamRequestPtr); int httpClientGetHandler(int, ConditionHandlerPtr); int httpClientHandlerHeaders(FdEventHandlerPtr event, StreamRequestPtr request, HTTPConnectionPtr connection); int httpClientNoticeRequest(HTTPRequestPtr request, int); int httpServeObject(HTTPConnectionPtr); int delayedHttpServeObject(HTTPConnectionPtr connection); int httpServeObjectStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServeObjectStreamHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr request); int httpServeObjectHandler(int, ConditionHandlerPtr); int httpClientSideRequest(HTTPRequestPtr request); int httpClientSideHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest); polipo-1.0.4.1/client.c0000644000175000017500000021425111331407220014107 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" static int httpAcceptAgain(TimeEventHandlerPtr event) { FdEventHandlerPtr newevent; int fd = *(int*)event->data; newevent = schedule_accept(fd, httpAccept, NULL); if(newevent == NULL) { free_chunk_arenas(); newevent = schedule_accept(fd, httpAccept, NULL); if(newevent == NULL) { do_log(L_ERROR, "Couldn't schedule accept.\n"); polipoExit(); } } return 1; } int httpAccept(int fd, FdEventHandlerPtr event, AcceptRequestPtr request) { int rc; HTTPConnectionPtr connection; TimeEventHandlerPtr timeout; if(fd < 0) { if(-fd == EINTR || -fd == EAGAIN || -fd == EWOULDBLOCK) return 0; do_log_error(L_ERROR, -fd, "Couldn't establish listening socket"); if(-fd == EMFILE || -fd == ENOMEM || -fd == ENOBUFS) { TimeEventHandlerPtr again = NULL; do_log(L_WARN, "Refusing client connections for one second.\n"); free_chunk_arenas(); again = scheduleTimeEvent(1, httpAcceptAgain, sizeof(request->fd), &request->fd); if(!again) { do_log(L_ERROR, "Couldn't schedule accept -- sleeping.\n"); sleep(1); again = scheduleTimeEvent(1, httpAcceptAgain, sizeof(request->fd), &request->fd); if(!again) { do_log(L_ERROR, "Couldn't schedule accept -- aborting.\n"); polipoExit(); } } return 1; } else { polipoExit(); return 1; } } if(allowedNets) { if(netAddressMatch(fd, allowedNets) != 1) { do_log(L_WARN, "Refusing connection from unauthorised net\n"); CLOSE(fd); return 0; } } rc = setNonblocking(fd, 1); if(rc < 0) { do_log_error(L_WARN, errno, "Couldn't set non blocking mode"); CLOSE(fd); return 0; } rc = setNodelay(fd, 1); if(rc < 0) do_log_error(L_WARN, errno, "Couldn't disable Nagle's algorithm"); connection = httpMakeConnection(); timeout = scheduleTimeEvent(clientTimeout, httpTimeoutHandler, sizeof(connection), &connection); if(!timeout) { CLOSE(fd); free(connection); return 0; } connection->fd = fd; connection->timeout = timeout; do_log(D_CLIENT_CONN, "Accepted client connection 0x%lx\n", (unsigned long)connection); connection->flags = CONN_READER; do_stream_buf(IO_READ | IO_NOTNOW, connection->fd, 0, &connection->reqbuf, CHUNK_SIZE, httpClientHandler, connection); return 0; } /* Abort a client connection. It is only safe to abort the requests if we know the connection is closed. */ void httpClientAbort(HTTPConnectionPtr connection, int closed) { HTTPRequestPtr request = connection->request; pokeFdEvent(connection->fd, -EDOSHUTDOWN, POLLOUT); if(closed) { while(request) { if(request->chandler) { request->error_code = 500; request->error_message = internAtom("Connection finishing"); abortConditionHandler(request->chandler); request->chandler = NULL; } request = request->next; } } } /* s != 0 specifies that the connection must be shut down. It is 1 in order to linger the connection, 2 to close it straight away. */ void httpClientFinish(HTTPConnectionPtr connection, int s) { HTTPRequestPtr request = connection->request; assert(!(request && request->request && request->request->request != request)); if(s == 0) { if(!request || !(request->flags & REQUEST_PERSISTENT)) s = 1; } httpConnectionDestroyBuf(connection); connection->flags &= ~CONN_WRITER; if(connection->flags & CONN_SIDE_READER) { /* We're in POST or PUT and the reader isn't done yet. Wait for the read side to close the connection. */ assert(request && (connection->flags & CONN_READER)); if(s >= 2) { pokeFdEvent(connection->fd, -EDOSHUTDOWN, POLLIN); } else { pokeFdEvent(connection->fd, -EDOGRACEFUL, POLLIN); } return; } if(connection->timeout) cancelTimeEvent(connection->timeout); connection->timeout = NULL; if(request) { HTTPRequestPtr requestee; requestee = request->request; if(requestee) { request->request = NULL; requestee->request = NULL; } if(requestee) httpServerClientReset(requestee); if(request->chandler) { request->error_code = 500; request->error_message = internAtom("Connection finishing"); abortConditionHandler(request->chandler); request->chandler = NULL; } if(request->object) { if(request->object->requestor == request) request->object->requestor = NULL; releaseObject(request->object); request->object = NULL; } httpDequeueRequest(connection); httpDestroyRequest(request); request = NULL; } connection->len = -1; connection->offset = 0; connection->te = TE_IDENTITY; if(!s) { assert(connection->fd > 0); connection->serviced++; httpSetTimeout(connection, clientTimeout); if(!connection->flags & CONN_READER) { if(connection->reqlen == 0) httpConnectionDestroyReqbuf(connection); else if((connection->flags & CONN_BIGREQBUF) && connection->reqlen < CHUNK_SIZE) httpConnectionUnbigifyReqbuf(connection); connection->flags |= CONN_READER; httpSetTimeout(connection, clientTimeout); do_stream_buf(IO_READ | IO_NOTNOW | (connection->reqlen ? IO_IMMEDIATE : 0), connection->fd, connection->reqlen, &connection->reqbuf, (connection->flags & CONN_BIGREQBUF) ? bigBufferSize : CHUNK_SIZE, httpClientHandler, connection); } /* The request has already been validated when it first got into the queue */ if(connection->request) { if(connection->request->object != NULL) httpClientNoticeRequest(connection->request, 1); else assert(connection->flags & CONN_READER); } return; } do_log(D_CLIENT_CONN, "Closing client connection 0x%lx\n", (unsigned long)connection); if(connection->flags & CONN_READER) { httpSetTimeout(connection, 10); if(connection->fd < 0) return; if(s >= 2) { pokeFdEvent(connection->fd, -EDOSHUTDOWN, POLLIN); } else { pokeFdEvent(connection->fd, -EDOGRACEFUL, POLLIN); } return; } while(1) { HTTPRequestPtr requestee; request = connection->request; if(!request) break; requestee = request->request; request->request = NULL; if(requestee) { requestee->request = NULL; httpServerClientReset(requestee); } if(request->chandler) abortConditionHandler(request->chandler); request->chandler = NULL; if(request->object && request->object->requestor == request) request->object->requestor = NULL; httpDequeueRequest(connection); httpDestroyRequest(request); } httpConnectionDestroyReqbuf(connection); if(connection->timeout) cancelTimeEvent(connection->timeout); connection->timeout = NULL; if(connection->fd >= 0) { if(s >= 2) CLOSE(connection->fd); else lingeringClose(connection->fd); } connection->fd = -1; free(connection); } /* Extremely baroque implementation of close: we need to synchronise between the writer and the reader. */ static char client_shutdown_buffer[17]; static int httpClientDelayedShutdownHandler(TimeEventHandlerPtr); static int httpClientDelayedShutdown(HTTPConnectionPtr connection) { TimeEventHandlerPtr handler; assert(connection->flags & CONN_READER); handler = scheduleTimeEvent(1, httpClientDelayedShutdownHandler, sizeof(connection), &connection); if(!handler) { do_log(L_ERROR, "Couldn't schedule delayed shutdown -- freeing memory."); free_chunk_arenas(); handler = scheduleTimeEvent(1, httpClientDelayedShutdownHandler, sizeof(connection), &connection); if(!handler) { do_log(L_ERROR, "Couldn't schedule delayed shutdown -- aborting.\n"); polipoExit(); } } return 1; } static int httpClientShutdownHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { HTTPConnectionPtr connection = request->data; assert(connection->flags & CONN_READER); if(!(connection->flags & CONN_WRITER)) { connection->flags &= ~CONN_READER; connection->reqlen = 0; httpConnectionDestroyReqbuf(connection); if(status && status != -EDOGRACEFUL) httpClientFinish(connection, 2); else httpClientFinish(connection, 1); return 1; } httpClientDelayedShutdown(connection); return 1; } static int httpClientDelayedShutdownHandler(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; assert(connection->flags & CONN_READER); if(!(connection->flags & CONN_WRITER)) { connection->flags &= ~CONN_READER; connection->reqlen = 0; httpConnectionDestroyReqbuf(connection); httpClientFinish(connection, 1); return 1; } do_stream(IO_READ | IO_NOTNOW, connection->fd, 0, client_shutdown_buffer, 17, httpClientShutdownHandler, connection); return 1; } int httpClientHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { HTTPConnectionPtr connection = request->data; int i, body; int bufsize = (connection->flags & CONN_BIGREQBUF) ? connection->reqlen : CHUNK_SIZE; assert(connection->flags & CONN_READER); /* There's no point trying to do something with this request if the client has shut the connection down -- HTTP doesn't do half-open connections. */ if(status != 0) { connection->reqlen = 0; httpConnectionDestroyReqbuf(connection); if(!(connection->flags & CONN_WRITER)) { connection->flags &= ~CONN_READER; if(status > 0 || status == -ECONNRESET || status == -EDOSHUTDOWN) httpClientFinish(connection, 2); else httpClientFinish(connection, 1); return 1; } httpClientAbort(connection, status > 0 || status == -ECONNRESET); connection->flags &= ~CONN_READER; return 1; } i = findEndOfHeaders(connection->reqbuf, 0, request->offset, &body); connection->reqlen = request->offset; if(i >= 0) { connection->reqbegin = i; httpClientHandlerHeaders(event, request, connection); return 1; } if(status) { if(connection->reqlen > 0) { if(connection->serviced <= 0) do_log(L_ERROR, "Client dropped connection.\n"); else do_log(D_CLIENT_CONN, "Client dropped idle connection.\n"); } connection->flags &= ~CONN_READER; if(!connection->request) httpClientFinish(connection, 2); else pokeFdEvent(connection->fd, -EDOGRACEFUL, POLLOUT); return 1; } if(connection->reqlen >= bufsize) { int rc = 0; if(!(connection->flags & CONN_BIGREQBUF)) rc = httpConnectionBigifyReqbuf(connection); if((connection->flags & CONN_BIGREQBUF) && connection->reqlen < bigBufferSize) { do_stream(IO_READ, connection->fd, connection->reqlen, connection->reqbuf, bigBufferSize, httpClientHandler, connection); return 1; } connection->reqlen = 0; httpConnectionDestroyReqbuf(connection); if(rc < 0) { do_log(L_ERROR, "Couldn't allocate big buffer.\n"); httpClientNewError(connection, METHOD_UNKNOWN, 0, 400, internAtom("Couldn't allocate big buffer")); } else { do_log(L_ERROR, "Couldn't find end of client's headers.\n"); httpClientNewError(connection, METHOD_UNKNOWN, 0, 400, internAtom("Couldn't find end of headers")); } return 1; } httpSetTimeout(connection, clientTimeout); return 0; } int httpClientRawErrorHeaders(HTTPConnectionPtr connection, int code, AtomPtr message, int close, AtomPtr headers) { int fd = connection->fd; int n; char *url; int url_len; char *etag; assert(connection->flags & CONN_WRITER); assert(code != 0); if(close >= 0) { if(connection->request) close = close || !(connection->request->flags & REQUEST_PERSISTENT); else close = 1; } if(connection->request && connection->request->object) { url = connection->request->object->key; url_len = connection->request->object->key_size; etag = connection->request->object->etag; } else { url = NULL; url_len = 0; etag = NULL; } if(connection->buf == NULL) { connection->buf = get_chunk(); if(connection->buf == NULL) { httpClientFinish(connection, 1); return 1; } } n = httpWriteErrorHeaders(connection->buf, CHUNK_SIZE, 0, connection->request && connection->request->method != METHOD_HEAD, code, message, close > 0, headers, url, url_len, etag); if(n <= 0) { shutdown(connection->fd, 1); if(close >= 0) httpClientFinish(connection, 1); return 1; } httpSetTimeout(connection, clientTimeout); do_stream(IO_WRITE, fd, 0, connection->buf, n, close > 0 ? httpErrorStreamHandler : close == 0 ? httpErrorNocloseStreamHandler : httpErrorNofinishStreamHandler, connection); return 1; } int httpClientRawError(HTTPConnectionPtr connection, int code, AtomPtr message, int close) { return httpClientRawErrorHeaders(connection, code, message, close, NULL); } int httpClientNoticeErrorHeaders(HTTPRequestPtr request, int code, AtomPtr message, AtomPtr headers) { if(request->error_message) releaseAtom(request->error_message); if(request->error_headers) releaseAtom(request->error_headers); request->error_code = code; request->error_message = message; request->error_headers = headers; httpClientNoticeRequest(request, 0); return 1; } int httpClientNoticeError(HTTPRequestPtr request, int code, AtomPtr message) { return httpClientNoticeErrorHeaders(request, code, message, NULL); } int httpClientError(HTTPRequestPtr request, int code, AtomPtr message) { if(request->error_message) releaseAtom(request->error_message); request->error_code = code; request->error_message = message; if(request->chandler) { abortConditionHandler(request->chandler); request->chandler = NULL; } else if(request->object) notifyObject(request->object); return 1; } /* This may be called from object handlers. */ int httpClientLeanError(HTTPRequestPtr request, int code, AtomPtr message) { if(request->error_message) releaseAtom(request->error_message); request->error_code = code; request->error_message = message; return 1; } int httpClientNewError(HTTPConnectionPtr connection, int method, int persist, int code, AtomPtr message) { HTTPRequestPtr request; request = httpMakeRequest(); if(request == NULL) { do_log(L_ERROR, "Couldn't allocate error request.\n"); httpClientFinish(connection, 1); return 1; } request->method = method; if(persist) request->flags |= REQUEST_PERSISTENT; else request->flags &= ~REQUEST_PERSISTENT; request->error_code = code; request->error_message = message; httpQueueRequest(connection, request); httpClientNoticeRequest(request, 0); return 1; } int httpErrorStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; if(status == 0 && !streamRequestDone(srequest)) return 0; httpClientFinish(connection, 1); return 1; } int httpErrorNocloseStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; if(status == 0 && !streamRequestDone(srequest)) return 0; httpClientFinish(connection, 0); return 1; } int httpErrorNofinishStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { if(status == 0 && !streamRequestDone(srequest)) return 0; return 1; } int httpClientHandlerHeaders(FdEventHandlerPtr event, StreamRequestPtr srequest, HTTPConnectionPtr connection) { HTTPRequestPtr request; int rc; int method, version; AtomPtr url = NULL; int start; int code; AtomPtr message; start = 0; /* Work around clients working around NCSA lossage. */ if(connection->reqbuf[0] == '\n') start = 1; else if(connection->reqbuf[0] == '\r' && connection->reqbuf[1] == '\n') start = 2; httpSetTimeout(connection, -1); rc = httpParseClientFirstLine(connection->reqbuf, start, &method, &url, &version); if(rc <= 0) { do_log(L_ERROR, "Couldn't parse client's request line\n"); code = 400; message = internAtom("Error in request line"); goto fail; } do_log(D_CLIENT_REQ, "Client request: "); do_log_n(D_CLIENT_REQ, connection->reqbuf, rc - 1); do_log(D_CLIENT_REQ, "\n"); if(version != HTTP_10 && version != HTTP_11) { do_log(L_ERROR, "Unknown client HTTP version\n"); code = 400; message = internAtom("Error in first request line"); goto fail; } if(method == METHOD_UNKNOWN) { code = 501; message = internAtom("Method not implemented"); goto fail; } request = httpMakeRequest(); if(request == NULL) { do_log(L_ERROR, "Couldn't allocate client request.\n"); code = 500; message = internAtom("Couldn't allocate client request"); goto fail; } if(connection->version != HTTP_UNKNOWN && version != connection->version) { do_log(L_WARN, "Client version changed!\n"); } connection->version = version; request->flags = REQUEST_PERSISTENT; request->method = method; request->cache_control = no_cache_control; httpQueueRequest(connection, request); connection->reqbegin = rc; return httpClientRequest(request, url); fail: if(url) releaseAtom(url); shutdown(connection->fd, 0); connection->reqlen = 0; connection->reqbegin = 0; httpConnectionDestroyReqbuf(connection); connection->flags &= ~CONN_READER; httpClientNewError(connection, METHOD_UNKNOWN, 0, code, message); return 1; } static int httpClientRequestDelayed(TimeEventHandlerPtr event) { HTTPRequestPtr request = *(HTTPRequestPtr*)event->data; AtomPtr url; url = internAtomN(request->object->key, request->object->key_size); if(url == NULL) { do_log(L_ERROR, "Couldn't allocate url.\n"); abortObject(request->object, 503, internAtom("Couldn't allocate url")); return 1; } httpClientRequest(request, url); return 1; } int delayedHttpClientRequest(HTTPRequestPtr request) { TimeEventHandlerPtr event; event = scheduleTimeEvent(-1, httpClientRequestDelayed, sizeof(request), &request); if(!event) return -1; return 1; } int httpClientRequest(HTTPRequestPtr request, AtomPtr url) { HTTPConnectionPtr connection = request->connection; int i, rc; int body_len, body_te; AtomPtr headers; CacheControlRec cache_control; AtomPtr via, expect, auth; HTTPConditionPtr condition; HTTPRangeRec range; assert(!request->chandler); assert(connection->reqbuf); i = httpParseHeaders(1, url, connection->reqbuf, connection->reqbegin, request, &headers, &body_len, &cache_control, &condition, &body_te, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &expect, &range, NULL, NULL, &via, &auth); if(i < 0) { releaseAtom(url); do_log(L_ERROR, "Couldn't parse client headers.\n"); shutdown(connection->fd, 0); request->flags &= ~REQUEST_PERSISTENT; connection->flags &= ~CONN_READER; httpClientNoticeError(request, 503, internAtom("Couldn't parse client headers")); return 1; } connection->reqbegin = i; if(body_len < 0) { if(request->method == METHOD_GET || request->method == METHOD_HEAD) body_len = 0; } connection->bodylen = body_len; connection->reqte = body_te; if(authRealm) { AtomPtr message = NULL; AtomPtr challenge = NULL; int code = checkClientAuth(auth, url, &message, &challenge); if(auth) { releaseAtom(auth); auth = NULL; } if(expect) { releaseAtom(expect); expect = NULL; } if(code) { request->flags |= REQUEST_FORCE_ERROR; httpClientDiscardBody(connection); httpClientNoticeErrorHeaders(request, code, message, challenge); return 1; } } if(auth) { releaseAtom(auth); auth = NULL; } if(expect) { if(expect == atom100Continue && REQUEST_SIDE(request)) { request->flags |= REQUEST_WAIT_CONTINUE; } else { httpClientDiscardBody(connection); httpClientNoticeError(request, 417, internAtom("Expectation failed")); releaseAtom(expect); return 1; } releaseAtom(expect); } request->from = range.from < 0 ? 0 : range.from; request->to = range.to; request->cache_control = cache_control; request->via = via; request->headers = headers; request->condition = condition; request->object = NULL; if(connection->serviced > 500) request->flags &= ~REQUEST_PERSISTENT; if(request->method == METHOD_CONNECT) { if(connection->flags & CONN_WRITER) { /* For now */ httpClientDiscardBody(connection); httpClientNoticeError(request, 500, internAtom("Pipelined CONNECT " "not supported")); return 1; } if(connection->flags & CONN_BIGREQBUF) { /* For now */ httpClientDiscardBody(connection); httpClientNoticeError(request, 500, internAtom("CONNECT over big buffer " "not supported")); return 1; } connection->flags &= ~CONN_READER; do_tunnel(connection->fd, connection->reqbuf, connection->reqbegin, connection->reqlen, url); connection->fd = -1; connection->reqbuf = NULL; connection->reqlen = 0; connection->reqbegin = 0; httpClientFinish(connection, 2); return 1; } rc = urlForbidden(url, httpClientRequestContinue, request); if(rc < 0) { do_log(L_ERROR, "Couldn't schedule httpClientRequestContinue.\n"); httpClientDiscardBody(connection); httpClientNoticeError(request, 500, internAtom("Couldn't schedule " "httpClientRequestContinue")); return 1; } return 1; } int httpClientRequestContinue(int forbidden_code, AtomPtr url, AtomPtr forbidden_message, AtomPtr forbidden_headers, void *closure) { HTTPRequestPtr request = (HTTPRequestPtr)closure; HTTPConnectionPtr connection = request->connection; RequestFunction requestfn; ObjectPtr object = NULL; if(forbidden_code < 0) { releaseAtom(url); httpClientDiscardBody(connection); httpClientNoticeError(request, 500, internAtomError(-forbidden_code, "Couldn't test for forbidden " "URL")); return 1; } if(forbidden_code) { releaseAtom(url); httpClientDiscardBody(connection); httpClientNoticeErrorHeaders(request, forbidden_code, forbidden_message, forbidden_headers); return 1; } requestfn = urlIsLocal(url->string, url->length) ? httpLocalRequest : httpServerRequest; if(request->method == METHOD_POST || request->method == METHOD_PUT) { do { object = findObject(OBJECT_HTTP, url->string, url->length); if(object) { privatiseObject(object, 0); releaseObject(object); } } while(object); request->object = makeObject(OBJECT_HTTP, url->string, url->length, 0, 0, requestfn, NULL); if(request->object == NULL) { httpClientDiscardBody(connection); httpClientNoticeError(request, 503, internAtom("Couldn't allocate object")); return 1; } if(requestfn == httpLocalRequest) request->object->flags |= OBJECT_LOCAL; return httpClientSideRequest(request); } if(request->cache_control.flags & CACHE_AUTHORIZATION) { do { object = makeObject(OBJECT_HTTP, url->string, url->length, 0, 0, requestfn, NULL); if(object && object->flags != OBJECT_INITIAL) { if(!(object->cache_control & CACHE_PUBLIC)) { privatiseObject(object, 0); releaseObject(object); object = NULL; } else break; } } while(object == NULL); if(object) object->flags |= OBJECT_LINEAR; } else { object = findObject(OBJECT_HTTP, url->string, url->length); if(!object) object = makeObject(OBJECT_HTTP, url->string, url->length, 1, 1, requestfn, NULL); } releaseAtom(url); url = NULL; if(!object) { do_log(L_ERROR, "Couldn't allocate object.\n"); httpClientDiscardBody(connection); httpClientNoticeError(request, 503, internAtom("Couldn't allocate object")); return 1; } if(object->request == httpLocalRequest) { object->flags |= OBJECT_LOCAL; } else { if(disableProxy) { httpClientDiscardBody(connection); httpClientNoticeError(request, 403, internAtom("Proxying disabled")); releaseObject(object); return 1; } if(!checkVia(proxyName, request->via)) { httpClientDiscardBody(connection); httpClientNoticeError(request, 504, internAtom("Proxy loop detected")); releaseObject(object); return 1; } } request->object = object; httpClientDiscardBody(connection); httpClientNoticeRequest(request, 0); return 1; } static int httpClientDelayed(TimeEventHandlerPtr handler); int httpClientDiscardBody(HTTPConnectionPtr connection) { TimeEventHandlerPtr handler; assert(connection->reqoffset == 0); assert(connection->flags & CONN_READER); if(connection->reqte != TE_IDENTITY) goto fail; if(connection->bodylen < 0) goto fail; if(connection->bodylen < connection->reqlen - connection->reqbegin) { connection->reqbegin += connection->bodylen; connection->bodylen = 0; } else { connection->bodylen -= connection->reqlen - connection->reqbegin; connection->reqbegin = 0; connection->reqlen = 0; httpConnectionDestroyReqbuf(connection); } connection->reqte = TE_UNKNOWN; if(connection->bodylen > 0) { httpSetTimeout(connection, clientTimeout); do_stream_buf(IO_READ | IO_NOTNOW, connection->fd, connection->reqlen, &connection->reqbuf, CHUNK_SIZE, httpClientDiscardHandler, connection); return 1; } if(connection->reqlen > connection->reqbegin && (connection->reqlen - connection->reqbegin) > 0) { memmove(connection->reqbuf, connection->reqbuf + connection->reqbegin, connection->reqlen - connection->reqbegin); connection->reqlen -= connection->reqbegin; connection->reqbegin = 0; } else { connection->reqlen = 0; connection->reqbegin = 0; } httpSetTimeout(connection, clientTimeout); /* We need to delay in order to make sure the previous request gets queued on the server side. IO_NOTNOW isn't strong enough for that due to IO_IMMEDIATE. */ handler = scheduleTimeEvent(-1, httpClientDelayed, sizeof(connection), &connection); if(handler == NULL) { do_log(L_ERROR, "Couldn't schedule reading from client."); goto fail; } return 1; fail: connection->reqlen = 0; connection->reqbegin = 0; connection->bodylen = 0; connection->reqte = TE_UNKNOWN; shutdown(connection->fd, 2); handler = scheduleTimeEvent(-1, httpClientDelayed, sizeof(connection), &connection); if(handler == NULL) { do_log(L_ERROR, "Couldn't schedule reading from client."); connection->flags &= ~CONN_READER; } return 1; } static int httpClientDelayed(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; /* IO_NOTNOW is unfortunate, but needed to avoid starvation if a client is pipelining a lot of requests. */ if(connection->reqlen > 0) { int bufsize; if((connection->flags & CONN_BIGREQBUF) && connection->reqlen < CHUNK_SIZE) httpConnectionUnbigifyReqbuf(connection); /* Don't read new requests if buffer is big. */ bufsize = (connection->flags & CONN_BIGREQBUF) ? connection->reqlen : CHUNK_SIZE; do_stream(IO_READ | IO_IMMEDIATE | IO_NOTNOW, connection->fd, connection->reqlen, connection->reqbuf, bufsize, httpClientHandler, connection); } else { httpConnectionDestroyReqbuf(connection); do_stream_buf(IO_READ | IO_NOTNOW, connection->fd, 0, &connection->reqbuf, CHUNK_SIZE, httpClientHandler, connection); } return 1; } int httpClientDiscardHandler(int status, FdEventHandlerPtr event, StreamRequestPtr request) { HTTPConnectionPtr connection = request->data; assert(connection->flags & CONN_READER); if(status) { if(status < 0 && status != -EPIPE) do_log_error(L_ERROR, -status, "Couldn't read from client"); connection->bodylen = -1; return httpClientDiscardBody(connection); } assert(request->offset > connection->reqlen); connection->reqlen = request->offset; httpClientDiscardBody(connection); return 1; } int httpClientNoticeRequest(HTTPRequestPtr request, int novalidate) { HTTPConnectionPtr connection = request->connection; ObjectPtr object = request->object; int serveNow = (request == connection->request); int validate = 0; int conditional = 0; int local, haveData; int rc; assert(!request->chandler); if(request->error_code) { if((request->flags & REQUEST_FORCE_ERROR) || REQUEST_SIDE(request) || request->object == NULL || (request->object->flags & OBJECT_LOCAL) || (request->object->flags & OBJECT_ABORTED) || (relaxTransparency < 1 && !proxyOffline)) { if(serveNow) { connection->flags |= CONN_WRITER; return httpClientRawErrorHeaders(connection, request->error_code, retainAtom(request-> error_message), 0, request->error_headers); } else { return 1; } } } if(REQUEST_SIDE(request)) { assert(!(request->flags & REQUEST_REQUESTED)); if(serveNow) { assert(!request->chandler); request->chandler = conditionWait(&request->object->condition, httpClientGetHandler, sizeof(request), &request); if(request->chandler == NULL) { do_log(L_ERROR, "Couldn't register condition handler.\n"); connection->flags |= CONN_WRITER; httpClientRawError(connection, 500, internAtom("Couldn't register " "condition handler"), 0); return 1; } connection->flags |= CONN_WRITER; rc = object->request(request->object, request->method, request->from, request->to, request, request->object->request_closure); } return 1; } local = urlIsLocal(object->key, object->key_size); objectFillFromDisk(object, request->from, request->method == METHOD_HEAD ? 0 : 1); if(request->condition && request->condition->ifrange) { if(!object->etag || strcmp(object->etag, request->condition->ifrange) != 0) { request->from = 0; request->to = -1; } } if(object->flags & OBJECT_DYNAMIC) { request->from = 0; request->to = -1; } if(request->method == METHOD_HEAD || request->object->code == 204 || request->object->code < 200) haveData = !(request->object->flags & OBJECT_INITIAL); else haveData = (request->object->length >= 0 && request->object->length <= request->from) || (objectHoleSize(request->object, request->from) == 0); if(request->flags & REQUEST_REQUESTED) validate = 0; else if(novalidate || (!local && proxyOffline)) validate = 0; else if(local) validate = objectMustRevalidate(request->object, &request->cache_control); else if(request->cache_control.flags & CACHE_ONLY_IF_CACHED) validate = 0; else if((request->object->flags & OBJECT_FAILED) && !(object->flags & OBJECT_INPROGRESS) && !relaxTransparency) validate = 1; else if(request->method != METHOD_HEAD && !objectHasData(object, request->from, request->to) && !(object->flags & OBJECT_INPROGRESS)) validate = 1; else if(objectMustRevalidate((relaxTransparency <= 1 ? request->object : NULL), &request->cache_control)) validate = 1; else validate = 0; if(request->cache_control.flags & CACHE_ONLY_IF_CACHED) { validate = 0; if(!haveData) { if(serveNow) { connection->flags |= CONN_WRITER; return httpClientRawError(connection, 504, internAtom("Object not in cache"), 0); } else return 1; } } if(!(request->object->flags & OBJECT_VALIDATING) && ((!validate && haveData) || (request->object->flags & OBJECT_FAILED))) { if(serveNow) { connection->flags |= CONN_WRITER; lockChunk(request->object, request->from / CHUNK_SIZE); return httpServeObject(connection); } else { return 1; } } if((request->flags & REQUEST_REQUESTED) && !(request->object->flags & OBJECT_INPROGRESS)) { /* This can happen either because the server side ran out of memory, or because it is using HEAD validation. We mark the object to be fetched again. */ request->flags &= ~REQUEST_REQUESTED; } if(serveNow) { connection->flags |= CONN_WRITER; if(!local && proxyOffline) return httpClientRawError(connection, 502, internAtom("Disconnected operation " "and object not in cache"), 0); request->chandler = conditionWait(&request->object->condition, httpClientGetHandler, sizeof(request), &request); if(request->chandler == NULL) { do_log(L_ERROR, "Couldn't register condition handler.\n"); return httpClientRawError(connection, 503, internAtom("Couldn't register " "condition handler"), 0); } } if(request->object->flags & OBJECT_VALIDATING) return 1; conditional = (haveData && request->method == METHOD_GET); if(!mindlesslyCacheVary && (request->object->cache_control & CACHE_VARY)) conditional = conditional && (request->object->etag != NULL); conditional = conditional && !(request->object->cache_control & CACHE_MISMATCH); request->object->flags |= OBJECT_VALIDATING; rc = request->object->request(request->object, conditional ? METHOD_CONDITIONAL_GET : request->method, request->from, request->to, request, request->object->request_closure); if(rc < 0) { if(request->chandler) unregisterConditionHandler(request->chandler); request->chandler = NULL; request->object->flags &= ~OBJECT_VALIDATING; request->object->flags |= OBJECT_FAILED; if(request->error_message) releaseAtom(request->error_message); request->error_code = 503; request->error_message = internAtom("Couldn't schedule get"); } return 1; } static int httpClientNoticeRequestDelayed(TimeEventHandlerPtr event) { HTTPRequestPtr request = *(HTTPRequestPtr*)event->data; httpClientNoticeRequest(request, 0); return 1; } int delayedHttpClientNoticeRequest(HTTPRequestPtr request) { TimeEventHandlerPtr event; event = scheduleTimeEvent(-1, httpClientNoticeRequestDelayed, sizeof(request), &request); if(!event) return -1; return 1; } int httpClientContinueDelayed(TimeEventHandlerPtr event) { static char httpContinue[] = "HTTP/1.1 100 Continue\r\n\r\n"; HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; do_stream(IO_WRITE, connection->fd, 0, httpContinue, 25, httpErrorNofinishStreamHandler, connection); return 1; } int delayedHttpClientContinue(HTTPConnectionPtr connection) { TimeEventHandlerPtr event; event = scheduleTimeEvent(-1, httpClientContinueDelayed, sizeof(connection), &connection); if(!event) return -1; return 1; } int httpClientGetHandler(int status, ConditionHandlerPtr chandler) { HTTPRequestPtr request = *(HTTPRequestPtr*)chandler->data; HTTPConnectionPtr connection = request->connection; ObjectPtr object = request->object; int rc; assert(request == connection->request); if(request->request) { assert(request->object->flags & OBJECT_INPROGRESS); assert(!request->request->object || request->request->object == request->object); } if(status < 0) { object->flags &= ~OBJECT_VALIDATING; /* for now */ if(request->request && request->request->request == request) httpServerClientReset(request->request); lockChunk(object, request->from / CHUNK_SIZE); request->chandler = NULL; rc = delayedHttpServeObject(connection); if(rc < 0) { unlockChunk(object, request->from / CHUNK_SIZE); do_log(L_ERROR, "Couldn't schedule serving.\n"); abortObject(object, 503, internAtom("Couldn't schedule serving")); } return 1; } if(object->flags & OBJECT_VALIDATING) return 0; if(request->error_code) { lockChunk(object, request->from / CHUNK_SIZE); request->chandler = NULL; rc = delayedHttpServeObject(connection); if(rc < 0) { unlockChunk(object, request->from / CHUNK_SIZE); do_log(L_ERROR, "Couldn't schedule serving.\n"); abortObject(object, 503, internAtom("Couldn't schedule serving")); } return 1; } if(request->flags & REQUEST_WAIT_CONTINUE) { request->flags &= ~REQUEST_WAIT_CONTINUE; if(object->code == 100 && request->request && !(request->request->flags & REQUEST_WAIT_CONTINUE)) delayedHttpClientContinue(connection); return 0; } /* See httpServerHandlerHeaders */ if((object->flags & OBJECT_SUPERSEDED) && request->request && request->request->can_mutate) { ObjectPtr new_object = retainObject(request->request->can_mutate); if(object->requestor == request) { if(new_object->requestor == NULL) new_object->requestor = request; object->requestor = NULL; } request->chandler = NULL; releaseObject(object); request->object = new_object; request->request->object = new_object; /* We're handling the wrong object now. It's simpler to rebuild the whole data structure from scratch rather than trying to compensate. */ rc = delayedHttpClientNoticeRequest(request); if(rc < 0) { do_log(L_ERROR, "Couldn't schedule noticing of request."); abortObject(object, 500, internAtom("Couldn't schedule " "noticing of request")); /* We're probably out of memory. What can we do? */ shutdown(connection->fd, 1); } return 1; } if(object->requestor != request && !(object->flags & OBJECT_ABORTED)) { /* Make sure we don't serve an object that is stale for us unless we're the requestor. */ if((object->flags & (OBJECT_LINEAR | OBJECT_MUTATING)) || objectMustRevalidate(object, &request->cache_control)) { if(object->flags & OBJECT_INPROGRESS) return 0; rc = delayedHttpClientNoticeRequest(request); if(rc < 0) { do_log(L_ERROR, "Couldn't schedule noticing of request."); abortObject(object, 500, internAtom("Couldn't schedule " "noticing of request")); } else { request->chandler = NULL; return 1; } } } if(object->flags & (OBJECT_INITIAL | OBJECT_VALIDATING)) { if(object->flags & (OBJECT_INPROGRESS | OBJECT_VALIDATING)) { return 0; } else if(object->flags & OBJECT_FAILED) { if(request->error_code) abortObject(object, request->error_code, retainAtom(request->error_message)); else { abortObject(object, 500, internAtom("Error message lost in transit")); } } else { /* The request was pruned by httpServerDiscardRequests */ if(chandler == request->chandler) { int rc; request->chandler = NULL; rc = delayedHttpClientNoticeRequest(request); if(rc < 0) abortObject(object, 500, internAtom("Couldn't allocate " "delayed notice request")); else return 1; } else { abortObject(object, 500, internAtom("Wrong request pruned -- " "this shouldn't happen")); } } } if(request->object->flags & OBJECT_DYNAMIC) { if(objectHoleSize(request->object, 0) == 0) { request->from = 0; request->to = -1; } else { /* We really should request again if that is not the case */ } } lockChunk(object, request->from / CHUNK_SIZE); request->chandler = NULL; rc = delayedHttpServeObject(connection); if(rc < 0) { unlockChunk(object, request->from / CHUNK_SIZE); do_log(L_ERROR, "Couldn't schedule serving.\n"); abortObject(object, 503, internAtom("Couldn't schedule serving")); } return 1; } int httpClientSideRequest(HTTPRequestPtr request) { HTTPConnectionPtr connection = request->connection; if(request->from < 0 || request->to >= 0) { httpClientNoticeError(request, 501, internAtom("Partial requests not implemented")); httpClientDiscardBody(connection); return 1; } if(connection->reqte != TE_IDENTITY) { httpClientNoticeError(request, 501, internAtom("Chunked requests not implemented")); httpClientDiscardBody(connection); return 1; } if(connection->bodylen < 0) { httpClientNoticeError(request, 502, internAtom("POST or PUT without " "Content-Length")); httpClientDiscardBody(connection); return 1; } if(connection->reqlen < 0) { httpClientNoticeError(request, 502, internAtom("Incomplete POST or PUT")); httpClientDiscardBody(connection); return 1; } return httpClientNoticeRequest(request, 0); } int httpClientSideHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; HTTPRequestPtr requestee; HTTPConnectionPtr server; int push; int code; AtomPtr message = NULL; assert(connection->flags & CONN_SIDE_READER); if((request->object->flags & OBJECT_ABORTED) || !(request->object->flags & OBJECT_INPROGRESS)) { code = request->object->code; message = retainAtom(request->object->message); goto fail; } if(status < 0) { do_log_error(L_ERROR, -status, "Reading from client"); code = 502; message = internAtomError(-status, "Couldn't read from client"); goto fail; } requestee = request->request; server = requestee->connection; push = MIN(srequest->offset - connection->reqlen, connection->bodylen - connection->reqoffset); if(push > 0) { connection->reqlen += push; httpServerDoSide(server); return 1; } if(server->reqoffset >= connection->bodylen) { connection->flags &= ~(CONN_READER | CONN_SIDE_READER); return 1; } assert(status); do_log(L_ERROR, "Incomplete client request.\n"); code = 502; message = internAtom("Incomplete client request"); fail: request->error_code = code; if(request->error_message) releaseAtom(request->error_message); request->error_message = message; if(request->error_headers) releaseAtom(request->error_headers); request->error_headers = NULL; if(request->request) { shutdown(request->request->connection->fd, 2); pokeFdEvent(request->request->connection->fd, -ESHUTDOWN, POLLOUT); } notifyObject(request->object); connection->flags &= ~CONN_SIDE_READER; httpClientDiscardBody(connection); return 1; } int httpServeObject(HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; ObjectPtr object = request->object; int i = request->from / CHUNK_SIZE; int j = request->from % CHUNK_SIZE; int n, len, rc; int bufsize = CHUNK_SIZE; int condition_result; object->atime = current_time.tv_sec; objectMetadataChanged(object, 0); httpSetTimeout(connection, -1); if((request->error_code && relaxTransparency <= 0) || object->flags & OBJECT_INITIAL) { object->flags &= ~OBJECT_FAILED; unlockChunk(object, i); if(request->error_code) return httpClientRawError(connection, request->error_code, retainAtom(request->error_message), 0); else return httpClientRawError(connection, 500, internAtom("Object vanished."), 0); } if(!(object->flags & OBJECT_INPROGRESS) && object->code == 0) { if(object->flags & OBJECT_INITIAL) { unlockChunk(object, i); return httpClientRawError(connection, 503, internAtom("Error message lost"), 0); } else { unlockChunk(object, i); do_log(L_ERROR, "Internal proxy error: object has no code.\n"); return httpClientRawError(connection, 500, internAtom("Internal proxy error: " "object has no code"), 0); } } condition_result = httpCondition(object, request->condition); if(condition_result == CONDITION_FAILED) { unlockChunk(object, i); return httpClientRawError(connection, 412, internAtom("Precondition failed"), 0); } else if(condition_result == CONDITION_NOT_MODIFIED) { unlockChunk(object, i); return httpClientRawError(connection, 304, internAtom("Not modified"), 0); } objectFillFromDisk(object, request->from, (request->method == METHOD_HEAD || condition_result != CONDITION_MATCH) ? 0 : 1); if(((object->flags & OBJECT_LINEAR) && (object->requestor != connection->request)) || ((object->flags & OBJECT_SUPERSEDED) && !(object->flags & OBJECT_LINEAR))) { if(request->request) { request->request->request = NULL; request->request = NULL; request->object->requestor = NULL; } object = makeObject(OBJECT_HTTP, object->key, object->key_size, 1, 0, object->request, NULL); if(request->object->requestor == request) request->object->requestor = NULL; unlockChunk(request->object, i); releaseObject(request->object); request->object = NULL; if(object == NULL) { do_log(L_ERROR, "Couldn't allocate object."); return httpClientRawError(connection, 501, internAtom("Couldn't allocate object"), 1); } if(urlIsLocal(object->key, object->key_size)) { object->flags |= OBJECT_LOCAL; object->request = httpLocalRequest; } request->object = object; connection->flags &= ~CONN_WRITER; return httpClientNoticeRequest(request, 1); } if(object->flags & OBJECT_ABORTED) { unlockChunk(object, i); return httpClientNoticeError(request, object->code, retainAtom(object->message)); } if(connection->buf == NULL) connection->buf = get_chunk(); if(connection->buf == NULL) { unlockChunk(object, i); do_log(L_ERROR, "Couldn't allocate client buffer.\n"); connection->flags &= ~CONN_WRITER; httpClientFinish(connection, 1); return 1; } if(object->length >= 0 && request->to >= object->length) request->to = -1; if(request->from > 0 || request->to >= 0) { if(request->method == METHOD_HEAD) { request->to = request->from; } else if(request->to < 0) { if(object->length >= 0) request->to = object->length; } } again: connection->len = 0; if((request->from <= 0 && request->to < 0) || request->method == METHOD_HEAD) { n = snnprintf(connection->buf, 0, bufsize, "HTTP/1.1 %d %s", object->code, atomString(object->message)); } else { if(request->from > request->to) { unlockChunk(object, i); return httpClientRawError(connection, 416, internAtom("Requested range " "not satisfiable"), 0); } else { n = snnprintf(connection->buf, 0, bufsize, "HTTP/1.1 206 Partial content"); } } n = httpWriteObjectHeaders(connection->buf, n, bufsize, object, request->from, request->to); if(n < 0) goto fail; if(request->method != METHOD_HEAD && condition_result != CONDITION_NOT_MODIFIED && request->to < 0 && object->length < 0) { if(connection->version == HTTP_11) { connection->te = TE_CHUNKED; n = snnprintf(connection->buf, n, bufsize, "\r\nTransfer-Encoding: chunked"); } else { request->flags &= ~REQUEST_PERSISTENT; } } if(object->age < current_time.tv_sec) { n = snnprintf(connection->buf, n, bufsize, "\r\nAge: %d", (int)(current_time.tv_sec - object->age)); } n = snnprintf(connection->buf, n, bufsize, "\r\nConnection: %s", (request->flags & REQUEST_PERSISTENT) ? "keep-alive" : "close"); if(!(object->flags & OBJECT_LOCAL)) { if((object->flags & OBJECT_FAILED) && !proxyOffline) { n = snnprintf(connection->buf, n, bufsize, "\r\nWarning: 111 %s:%d Revalidation failed", proxyName->string, proxyPort); if(request->error_code) n = snnprintf(connection->buf, n, bufsize, " (%d %s)", request->error_code, atomString(request->error_message)); object->flags &= ~OBJECT_FAILED; } else if(proxyOffline && objectMustRevalidate(object, &request->cache_control)) { n = snnprintf(connection->buf, n, bufsize, "\r\nWarning: 112 %s:%d Disconnected operation", proxyName->string, proxyPort); } else if(objectIsStale(object, &request->cache_control)) { n = snnprintf(connection->buf, n, bufsize, "\r\nWarning: 110 %s:%d Object is stale", proxyName->string, proxyPort); } else if(object->expires < 0 && object->max_age < 0 && object->age < current_time.tv_sec - 24 * 3600) { n = snnprintf(connection->buf, n, bufsize, "\r\nWarning: 113 %s:%d Heuristic expiration", proxyName->string, proxyPort); } } n = snnprintf(connection->buf, n, bufsize, "\r\n\r\n"); if(n < 0) goto fail; connection->offset = request->from; if(request->method == METHOD_HEAD || condition_result == CONDITION_NOT_MODIFIED || (object->flags & OBJECT_ABORTED)) { len = 0; } else { if(i < object->numchunks) { if(object->chunks[i].size <= j) len = 0; else len = object->chunks[i].size - j; } else { len = 0; } if(request->to >= 0) len = MIN(len, request->to - request->from); } connection->offset = request->from; httpSetTimeout(connection, clientTimeout); do_log(D_CLIENT_DATA, "Serving on 0x%lx for 0x%lx: offset %d len %d\n", (unsigned long)connection, (unsigned long)object, connection->offset, len); do_stream_h(IO_WRITE | (connection->te == TE_CHUNKED && len > 0 ? IO_CHUNKED : 0), connection->fd, 0, connection->buf, n, object->chunks[i].data + j, len, httpServeObjectStreamHandler, connection); return 1; fail: rc = 0; connection->len = 0; if(!(connection->flags & CONN_BIGBUF)) rc = httpConnectionBigify(connection); if(rc > 0) { bufsize = bigBufferSize; goto again; } unlockChunk(object, i); return httpClientRawError(connection, 500, rc == 0 ? internAtom("No space for headers") : internAtom("Couldn't allocate big buffer"), 0); } static int httpServeObjectDelayed(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; httpServeObject(connection); return 1; } int delayedHttpServeObject(HTTPConnectionPtr connection) { TimeEventHandlerPtr event; assert(connection->request->object->chunks[connection->request->from / CHUNK_SIZE].locked > 0); event = scheduleTimeEvent(-1, httpServeObjectDelayed, sizeof(connection), &connection); if(!event) return -1; return 1; } static int httpServeObjectFinishHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; (void)request; assert(!request->chandler); if(status == 0 && !streamRequestDone(srequest)) return 0; httpSetTimeout(connection, -1); if(status < 0) { do_log(L_ERROR, "Couldn't terminate chunked reply\n"); httpClientFinish(connection, 1); } else { httpClientFinish(connection, 0); } return 1; } int httpServeChunk(HTTPConnectionPtr connection) { HTTPRequestPtr request = connection->request; ObjectPtr object = request->object; int i = connection->offset / CHUNK_SIZE; int j = connection->offset - (i * CHUNK_SIZE); int to, len, len2, end; int rc; if(object->flags & OBJECT_ABORTED) goto fail_no_unlock; if(object->length >= 0 && request->to >= 0) to = MIN(request->to, object->length); else if(object->length >= 0) to = object->length; else if(request->to >= 0) to = request->to; else to = -1; lockChunk(object, i); len = 0; if(i < object->numchunks) len = object->chunks[i].size - j; if(request->method != METHOD_HEAD && len < CHUNK_SIZE && connection->offset + len < to) { objectFillFromDisk(object, connection->offset + len, 2); len = object->chunks[i].size - j; } if(to >= 0) len = MIN(len, to - connection->offset); if(len <= 0) { if(to >= 0 && connection->offset >= to) { if(request->chandler) { unregisterConditionHandler(request->chandler); request->chandler = NULL; } unlockChunk(object, i); if(connection->te == TE_CHUNKED) { httpSetTimeout(connection, clientTimeout); do_stream(IO_WRITE | IO_CHUNKED | IO_END, connection->fd, 0, NULL, 0, httpServeObjectFinishHandler, connection); } else { httpClientFinish(connection, !(object->length >= 0 && connection->offset >= object->length)); } return 1; } else { if(!request->chandler) { request->chandler = conditionWait(&object->condition, httpServeObjectHandler, sizeof(connection), &connection); if(!request->chandler) { do_log(L_ERROR, "Couldn't register condition handler\n"); goto fail; } } if(!(object->flags & OBJECT_INPROGRESS)) { if(object->flags & OBJECT_SUPERSEDED) { goto fail; } if(REQUEST_SIDE(request)) goto fail; rc = object->request(object, request->method, connection->offset, -1, request, object->request_closure); if(rc <= 0) goto fail; } return 1; } } else { /* len > 0 */ if(request->method != METHOD_HEAD) objectFillFromDisk(object, (i + 1) * CHUNK_SIZE, 1); if(request->chandler) { unregisterConditionHandler(request->chandler); request->chandler = NULL; } len2 = 0; if(j + len == CHUNK_SIZE && object->numchunks > i + 1) { len2 = object->chunks[i + 1].size; if(to >= 0) len2 = MIN(len2, to - (i + 1) * CHUNK_SIZE); } /* Lock early -- httpServerRequest may get_chunk */ if(len2 > 0) lockChunk(object, i + 1); if(object->length >= 0 && connection->offset + len + len2 == object->length) end = 1; else end = 0; /* Prefetch */ if(!(object->flags & OBJECT_INPROGRESS) && !REQUEST_SIDE(request)) { if(object->chunks[i].size < CHUNK_SIZE && to >= 0 && connection->offset + len + 1 < to) object->request(object, request->method, connection->offset + len, -1, request, object->request_closure); else if(i + 1 < object->numchunks && object->chunks[i + 1].size == 0 && to >= 0 && (i + 1) * CHUNK_SIZE + 1 < to) object->request(object, request->method, (i + 1) * CHUNK_SIZE, -1, request, object->request_closure); } if(len2 == 0) { httpSetTimeout(connection, clientTimeout); do_log(D_CLIENT_DATA, "Serving on 0x%lx for 0x%lx: offset %d len %d\n", (unsigned long)connection, (unsigned long)object, connection->offset, len); /* IO_NOTNOW in order to give other clients a chance to run. */ do_stream(IO_WRITE | IO_NOTNOW | (connection->te == TE_CHUNKED ? IO_CHUNKED : 0) | (end ? IO_END : 0), connection->fd, 0, object->chunks[i].data + j, len, httpServeObjectStreamHandler, connection); } else { httpSetTimeout(connection, clientTimeout); do_log(D_CLIENT_DATA, "Serving on 0x%lx for 0x%lx: offset %d len %d + %d\n", (unsigned long)connection, (unsigned long)object, connection->offset, len, len2); do_stream_2(IO_WRITE | IO_NOTNOW | (connection->te == TE_CHUNKED ? IO_CHUNKED : 0) | (end ? IO_END : 0), connection->fd, 0, object->chunks[i].data + j, len, object->chunks[i + 1].data, len2, httpServeObjectStreamHandler2, connection); } return 1; } abort(); fail: unlockChunk(object, i); fail_no_unlock: if(request->chandler) unregisterConditionHandler(request->chandler); request->chandler = NULL; httpClientFinish(connection, 1); return 1; } static int httpServeChunkDelayed(TimeEventHandlerPtr event) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)event->data; httpServeChunk(connection); return 1; } int delayedHttpServeChunk(HTTPConnectionPtr connection) { TimeEventHandlerPtr event; event = scheduleTimeEvent(-1, httpServeChunkDelayed, sizeof(connection), &connection); if(!event) return -1; return 1; } int httpServeObjectHandler(int status, ConditionHandlerPtr chandler) { HTTPConnectionPtr connection = *(HTTPConnectionPtr*)chandler->data; HTTPRequestPtr request = connection->request; int rc; unlockChunk(request->object, connection->offset / CHUNK_SIZE); if((request->object->flags & OBJECT_ABORTED) || status < 0) { shutdown(connection->fd, 1); httpSetTimeout(connection, 10); /* httpServeChunk will take care of the error. */ } httpSetTimeout(connection, -1); request->chandler = NULL; rc = delayedHttpServeChunk(connection); if(rc < 0) { do_log(L_ERROR, "Couldn't schedule serving.\n"); abortObject(request->object, 503, internAtom("Couldn't schedule serving")); } return 1; } static int httpServeObjectStreamHandlerCommon(int kind, int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { HTTPConnectionPtr connection = srequest->data; HTTPRequestPtr request = connection->request; int condition_result = httpCondition(request->object, request->condition); int i = connection->offset / CHUNK_SIZE; assert(!request->chandler); if(status == 0 && !streamRequestDone(srequest)) { httpSetTimeout(connection, clientTimeout); return 0; } httpSetTimeout(connection, -1); unlockChunk(request->object, i); if(kind == 2) unlockChunk(request->object, i + 1); if(status) { if(status < 0) { do_log_error(status == -ECONNRESET ? D_IO : L_ERROR, -status, "Couldn't write to client"); if(status == -EIO || status == -ESHUTDOWN) httpClientFinish(connection, 2); else httpClientFinish(connection, 1); } else { do_log(D_IO, "Couldn't write to client: short write.\n"); httpClientFinish(connection, 2); } return 1; } if(srequest->operation & IO_CHUNKED) { assert(srequest->offset > 2); connection->offset += srequest->offset - 2; } else { connection->offset += srequest->offset; } request->flags &= ~REQUEST_REQUESTED; if(request->object->flags & OBJECT_ABORTED) { httpClientFinish(connection, 1); return 1; } if(request->method == METHOD_HEAD || request->object->code == 204 || request->object->code < 200 || condition_result == CONDITION_NOT_MODIFIED) { httpClientFinish(connection, 0); return 1; } if(srequest->operation & IO_END) httpClientFinish(connection, 0); else { httpConnectionDestroyBuf(connection); httpServeChunk(connection); } return 1; } int httpServeObjectStreamHandler(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { return httpServeObjectStreamHandlerCommon(1, status, event, srequest); } int httpServeObjectStreamHandler2(int status, FdEventHandlerPtr event, StreamRequestPtr srequest) { return httpServeObjectStreamHandlerCommon(2, status, event, srequest); } polipo-1.0.4.1/chunk.h0000644000175000017500000000347411331407220013751 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* A larger chunk size gets you better I/O throughput, at the cost of higer memory usage. We assume you've got plenty of memory if you've got 64-bit longs. */ #ifndef CHUNK_SIZE #ifdef ULONG_MAX #if ULONG_MAX > 4294967295UL #define CHUNK_SIZE (8 * 1024) #else #define CHUNK_SIZE (4 * 1024) #endif #else #warn "ULONG_MAX not defined -- using 4kB chunks" #define CHUNK_SIZE (4 * 1024) #endif #endif #define CHUNKS(bytes) ((bytes) / CHUNK_SIZE) extern int chunkLowMark, chunkHighMark, chunkCriticalMark; extern int used_chunks; void preinitChunks(void); void initChunks(void); void *get_chunk(void) ATTRIBUTE ((malloc)); void *maybe_get_chunk(void) ATTRIBUTE ((malloc)); void dispose_chunk(void *chunk); void free_chunk_arenas(void); int totalChunkArenaSize(void); polipo-1.0.4.1/chunk.c0000644000175000017500000002503711331407220013743 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" #define MB (1024 * 1024) int chunkLowMark = 0, chunkCriticalMark = 0, chunkHighMark = 0; void preinitChunks() { CONFIG_VARIABLE(chunkLowMark, CONFIG_INT, "Low mark for chunk memory (0 = auto)."); CONFIG_VARIABLE(chunkCriticalMark, CONFIG_INT, "Critical mark for chunk memory (0 = auto)."); CONFIG_VARIABLE(chunkHighMark, CONFIG_INT, "High mark for chunk memory."); } static void initChunksCommon() { #define ROUND_CHUNKS(a) a = (((a) + CHUNK_SIZE - 1) / CHUNK_SIZE) * CHUNK_SIZE; int q; if(CHUNK_SIZE != 1 << log2_ceil(CHUNK_SIZE)) { do_log(L_ERROR, "CHUNK SIZE %d is not a power of two.\n", CHUNK_SIZE); exit(1); } ROUND_CHUNKS(chunkHighMark); ROUND_CHUNKS(chunkCriticalMark); ROUND_CHUNKS(chunkLowMark); if(chunkHighMark < 8 * CHUNK_SIZE) { int mem = physicalMemory(); if(mem > 0) chunkHighMark = mem / 4; else chunkHighMark = 24 * MB; chunkHighMark = MIN(chunkHighMark, 24 * MB); chunkHighMark = MAX(chunkHighMark, 8 * CHUNK_SIZE); } if(chunkHighMark < MB / 2) fprintf(stderr, "Warning: little chunk memory (%d bytes)\n", chunkHighMark); q = 0; if(chunkLowMark <= 0) q = 1; if(chunkLowMark < 4 * CHUNK_SIZE || chunkLowMark > chunkHighMark - 4 * CHUNK_SIZE) { chunkLowMark = MIN(chunkHighMark - 4 * CHUNK_SIZE, chunkHighMark * 3 / 4); ROUND_CHUNKS(chunkLowMark); if(!q) do_log(L_WARN, "Inconsistent chunkLowMark -- setting to %d.\n", chunkLowMark); } q = 0; if(chunkCriticalMark <= 0) q = 1; if(chunkCriticalMark >= chunkHighMark - 2 * CHUNK_SIZE || chunkCriticalMark <= chunkLowMark + 2 * CHUNK_SIZE) { chunkCriticalMark = MIN(chunkHighMark - 2 * CHUNK_SIZE, chunkLowMark + (chunkHighMark - chunkLowMark) * 15 / 16); ROUND_CHUNKS(chunkCriticalMark); if(!q) do_log(L_WARN, "Inconsistent chunkCriticalMark -- " "setting to %d.\n", chunkCriticalMark); } #undef ROUND_CHUNKS } int used_chunks = 0; static void maybe_free_chunks(int arenas, int force) { if(force || used_chunks >= CHUNKS(chunkHighMark)) { discardObjects(force, force); } if(arenas) free_chunk_arenas(); if(used_chunks >= CHUNKS(chunkLowMark) && !objectExpiryScheduled) { TimeEventHandlerPtr event; event = scheduleTimeEvent(1, discardObjectsHandler, 0, NULL); if(event) objectExpiryScheduled = 1; } } #ifdef MALLOC_CHUNKS void initChunks(void) { do_log(L_WARN, "Warning: using malloc(3) for chunk allocation.\n"); used_chunks = 0; initChunksCommon(); } void free_chunk_arenas() { return; } void * get_chunk() { void *chunk; if(used_chunks > CHUNKS(chunkHighMark)) maybe_free_chunks(0, 0); if(used_chunks > CHUNKS(chunkHighMark)) return NULL; chunk = malloc(CHUNK_SIZE); if(!chunk) { maybe_free_chunks(1, 1); chunk = malloc(CHUNK_SIZE); if(!chunk) return NULL; } used_chunks++; return chunk; } void * maybe_get_chunk() { void *chunk; if(used_chunks > CHUNKS(chunkHighMark)) return NULL; chunk = malloc(CHUNK_SIZE); if(chunk) used_chunks++; return chunk; } void dispose_chunk(void *chunk) { assert(chunk != NULL); free(chunk); used_chunks--; } void free_chunks() { return; } int totalChunkArenaSize() { return used_chunks * CHUNK_SIZE; } #else #ifdef MINGW #define MAP_FAILED NULL #define getpagesize() (64 * 1024) static void * alloc_arena(size_t size) { return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE); } static int free_arena(void *addr, size_t size) { int rc; rc = VirtualFree(addr, size, MEM_RELEASE); if(!rc) rc = -1; return rc; } #else #ifndef MAP_FAILED #define MAP_FAILED ((void*)((long int)-1)) #endif static void * alloc_arena(size_t size) { return mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } static int free_arena(void *addr, size_t size) { return munmap(addr, size); } #endif /* Memory is organised into a number of chunks of ARENA_CHUNKS chunks each. Every arena is pointed at by a struct _ChunkArena. */ /* If currentArena is not NULL, it points at the last arena used, which gives very fast dispose/get sequences. */ #define DEFINE_FFS(type, ffs_name) \ int \ ffs_name(type i) \ { \ int n; \ if(i == 0) return 0; \ n = 1; \ while((i & 1) == 0) { \ i >>= 1; \ n++; \ } \ return n; \ } #ifndef LONG_LONG_ARENA_BITMAPS #ifndef LONG_ARENA_BITMAPS #ifndef HAVE_FFS DEFINE_FFS(int, ffs) #endif typedef unsigned int ChunkBitmap; #define BITMAP_FFS(bitmap) (ffs(bitmap)) #else #ifndef HAVE_FFSL DEFINE_FFS(long, ffsl) #endif typedef unsigned long ChunkBitmap; #define BITMAP_FFS(bitmap) (ffsl(bitmap)) #endif #else #ifndef HAVE_FFSLL DEFINE_FFS(long long, ffsll) #endif typedef unsigned long long ChunkBitmap; #define BITMAP_FFS(bitmap) (ffsll(bitmap)) #endif #define ARENA_CHUNKS ((int)sizeof(ChunkBitmap) * 8) #define EMPTY_BITMAP (~(ChunkBitmap)0) #define BITMAP_BIT(i) (((ChunkBitmap)1) << (i)) static int pagesize; typedef struct _ChunkArena { ChunkBitmap bitmap; char *chunks; } ChunkArenaRec, *ChunkArenaPtr; static ChunkArenaPtr chunkArenas, currentArena; static int numArenas; #define CHUNK_IN_ARENA(chunk, arena) \ ((arena)->chunks && \ (char*)(chunk) >= (arena)->chunks && \ (char*)(chunk) < (arena)->chunks + (ARENA_CHUNKS * CHUNK_SIZE)) #define CHUNK_ARENA_INDEX(chunk, arena) \ (((char*)(chunk) - (arena)->chunks) / CHUNK_SIZE) void initChunks(void) { int i; used_chunks = 0; initChunksCommon(); pagesize = getpagesize(); if((CHUNK_SIZE * ARENA_CHUNKS) % pagesize != 0) { do_log(L_ERROR, "The arena size %d (%d x %d) " "is not a multiple of the page size %d.\n", ARENA_CHUNKS * CHUNK_SIZE, ARENA_CHUNKS, CHUNK_SIZE, pagesize); abort(); } numArenas = (CHUNKS(chunkHighMark) + (ARENA_CHUNKS - 1)) / ARENA_CHUNKS; chunkArenas = malloc(numArenas * sizeof(ChunkArenaRec)); if(chunkArenas == NULL) { do_log(L_ERROR, "Couldn't allocate chunk arenas.\n"); polipoExit(); } for(i = 0; i < numArenas; i++) { chunkArenas[i].bitmap = EMPTY_BITMAP; chunkArenas[i].chunks = NULL; } currentArena = NULL; } static ChunkArenaPtr findArena() { ChunkArenaPtr arena = NULL; int i; for(i = 0; i < numArenas; i++) { arena = &(chunkArenas[i]); if(arena->bitmap != 0) break; else arena = NULL; } assert(arena != NULL); if(!arena->chunks) { void *p; p = alloc_arena(CHUNK_SIZE * ARENA_CHUNKS); if(p == MAP_FAILED) { do_log_error(L_ERROR, errno, "Couldn't allocate chunk"); maybe_free_chunks(1, 1); return NULL; } arena->chunks = p; } return arena; } void * get_chunk() { int i; ChunkArenaPtr arena = NULL; if(currentArena && currentArena->bitmap != 0) { arena = currentArena; } else { if(used_chunks >= CHUNKS(chunkHighMark)) maybe_free_chunks(0, 0); if(used_chunks >= CHUNKS(chunkHighMark)) return NULL; arena = findArena(); if(!arena) return NULL; currentArena = arena; } i = BITMAP_FFS(arena->bitmap) - 1; arena->bitmap &= ~BITMAP_BIT(i); used_chunks++; return arena->chunks + CHUNK_SIZE * i; } void * maybe_get_chunk() { int i; ChunkArenaPtr arena = NULL; if(currentArena && currentArena->bitmap != 0) { arena = currentArena; } else { if(used_chunks >= CHUNKS(chunkHighMark)) return NULL; arena = findArena(); if(!arena) return NULL; currentArena = arena; } i = ffs(arena->bitmap) - 1; arena->bitmap &= ~BITMAP_BIT(i); used_chunks++; return arena->chunks + CHUNK_SIZE * i; } void dispose_chunk(void *chunk) { ChunkArenaPtr arena = NULL; int i; assert(chunk != NULL); if(currentArena && CHUNK_IN_ARENA(chunk, currentArena)) { arena = currentArena; } else { for(i = 0; i < numArenas; i++) { arena = &(chunkArenas[i]); if(CHUNK_IN_ARENA(chunk, arena)) break; } assert(arena != NULL); currentArena = arena; } i = CHUNK_ARENA_INDEX(chunk, arena); arena->bitmap |= BITMAP_BIT(i); used_chunks--; } void free_chunk_arenas() { ChunkArenaPtr arena; int i, rc; for(i = 0; i < numArenas; i++) { arena = &(chunkArenas[i]); if(arena->bitmap == EMPTY_BITMAP && arena->chunks) { rc = free_arena(arena->chunks, CHUNK_SIZE * ARENA_CHUNKS); if(rc < 0) { do_log_error(L_ERROR, errno, "Couldn't unmap memory"); continue; } arena->chunks = NULL; } } if(currentArena && currentArena->chunks == NULL) currentArena = NULL; } int totalChunkArenaSize() { ChunkArenaPtr arena; int i, size = 0; for(i = 0; i < numArenas; i++) { arena = &(chunkArenas[i]); if(arena->chunks) size += (CHUNK_SIZE * ARENA_CHUNKS); } return size; } #endif polipo-1.0.4.1/auth.h0000644000175000017500000000224711331407220013577 0ustar chrisdchrisd/* Copyright (c) 2004-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ int checkClientAuth(AtomPtr, AtomPtr, AtomPtr*, AtomPtr*); int buildServerAuthHeaders(char*, int, int, AtomPtr); polipo-1.0.4.1/auth.c0000644000175000017500000000613611331407220013573 0ustar chrisdchrisd/* Copyright (c) 2004-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" int buildClientAuthHeaders(AtomPtr url, char *word, AtomPtr *message_return, AtomPtr *headers_return) { int code; char *h; AtomPtr message, headers; if(urlIsLocal(url->string, url->length)) { code = 401; message = internAtomF("Server authentication %s", word); h = "WWW-Authenticate"; } else { code = 407; message = internAtomF("Proxy authentication %s", word); h = "Proxy-Authenticate"; } headers = internAtomF("\r\n%s: Basic realm=\"%s\"", h, authRealm->string); if(message_return) *message_return = message; else releaseAtom(message); *headers_return = headers; return code; } int checkClientAuth(AtomPtr auth, AtomPtr url, AtomPtr *message_return, AtomPtr *headers_return) { int code = 0; AtomPtr message = NULL, headers = NULL; if(authRealm == NULL || authCredentials == NULL) return 0; if(auth == NULL) code = buildClientAuthHeaders(url, "required", &message, &headers); else if(auth->length >= 6 || lwrcmp(auth->string, "basic ", 6) == 0) { if(b64cmp(auth->string + 6, auth->length - 6, authCredentials->string, authCredentials->length) == 0) return 0; code = buildClientAuthHeaders(url, "incorrect", &message, &headers); } else { code = buildClientAuthHeaders(url, NULL, NULL, &headers); message = internAtom("Unexpected authentication scheme"); } *message_return = message; *headers_return = headers; return code; } int buildServerAuthHeaders(char* buf, int n, int size, AtomPtr authCredentials) { char authbuf[4 * 128 + 3]; int authlen; if(authCredentials->length >= 3 * 128) return -1; authlen = b64cpy(authbuf, parentAuthCredentials->string, parentAuthCredentials->length, 0); n = snnprintf(buf, n, size, "\r\nProxy-Authorization: Basic "); n = snnprint_n(buf, n, size, authbuf, authlen); return n; } polipo-1.0.4.1/atom.h0000644000175000017500000000425711331407220013601 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ typedef struct _Atom { unsigned int refcount; struct _Atom *next; unsigned short length; char string[1]; } AtomRec, *AtomPtr; typedef struct _AtomList { int length; int size; AtomPtr *list; } AtomListRec, *AtomListPtr; #define LOG2_ATOM_HASH_TABLE_SIZE 10 #define LARGE_ATOM_REFCOUNT 0xFFFFFF00U extern int used_atoms; void initAtoms(void); AtomPtr internAtom(const char *string); AtomPtr internAtomN(const char *string, int n); AtomPtr internAtomLowerN(const char *string, int n); AtomPtr atomCat(AtomPtr atom, const char *string); int atomSplit(AtomPtr atom, char c, AtomPtr *return1, AtomPtr *return2); AtomPtr retainAtom(AtomPtr atom); void releaseAtom(AtomPtr atom); AtomPtr internAtomError(int e, const char *f, ...) ATTRIBUTE ((format (printf, 2, 3))); AtomPtr internAtomF(const char *format, ...) ATTRIBUTE ((format (printf, 1, 2))); char *atomString(AtomPtr) ATTRIBUTE ((pure)); AtomListPtr makeAtomList(AtomPtr *atoms, int n); void destroyAtomList(AtomListPtr list); int atomListMember(AtomPtr atom, AtomListPtr list) ATTRIBUTE ((pure)); void atomListCons(AtomPtr atom, AtomListPtr list); polipo-1.0.4.1/atom.c0000644000175000017500000002023311331407220013564 0ustar chrisdchrisd/* Copyright (c) 2003-2006 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include "polipo.h" static AtomPtr *atomHashTable; int used_atoms; void initAtoms() { atomHashTable = calloc((1 << LOG2_ATOM_HASH_TABLE_SIZE), sizeof(AtomPtr)); if(atomHashTable == NULL) { do_log(L_ERROR, "Couldn't allocate atom hash table.\n"); exit(1); } used_atoms = 0; } AtomPtr internAtomN(const char *string, int n) { AtomPtr atom; int h; if(n < 0 || n >= (1 << (8 * sizeof(unsigned short)))) return NULL; h = hash(0, string, n, LOG2_ATOM_HASH_TABLE_SIZE); atom = atomHashTable[h]; while(atom) { if(atom->length == n && (n == 0 || memcmp(atom->string, string, n) == 0)) break; atom = atom->next; } if(!atom) { atom = malloc(sizeof(AtomRec) - 1 + n + 1); if(atom == NULL) { return NULL; } atom->refcount = 0; atom->length = n; /* Atoms are used both for binary data and strings. To make their use as strings more convenient, atoms are always NUL-terminated. */ memcpy(atom->string, string, n); atom->string[n] = '\0'; atom->next = atomHashTable[h]; atomHashTable[h] = atom; used_atoms++; } do_log(D_ATOM_REFCOUNT, "A 0x%lx %d++\n", (unsigned long)atom, atom->refcount); atom->refcount++; return atom; } AtomPtr internAtom(const char *string) { return internAtomN(string, strlen(string)); } AtomPtr atomCat(AtomPtr atom, const char *string) { char buf[128]; char *s = buf; AtomPtr newAtom; int n = strlen(string); if(atom->length + n > 128) { s = malloc(atom->length + n + 1); if(s == NULL) return NULL; } memcpy(s, atom->string, atom->length); memcpy(s + atom->length, string, n); newAtom = internAtomN(s, atom->length + n); if(s != buf) free(s); return newAtom; } int atomSplit(AtomPtr atom, char c, AtomPtr *return1, AtomPtr *return2) { char *p; AtomPtr atom1, atom2; p = memchr(atom->string, c, atom->length); if(p == NULL) return 0; atom1 = internAtomN(atom->string, p - atom->string); if(atom1 == NULL) return -ENOMEM; atom2 = internAtomN(p + 1, atom->length - (p + 1 - atom->string)); if(atom2 == NULL) { releaseAtom(atom1); return -ENOMEM; } *return1 = atom1; *return2 = atom2; return 1; } AtomPtr internAtomLowerN(const char *string, int n) { char *s; char buf[100]; AtomPtr atom; if(n < 0 || n >= 50000) return NULL; if(n < 100) { s = buf; } else { s = malloc(n); if(s == NULL) return NULL; } lwrcpy(s, string, n); atom = internAtomN(s, n); if(s != buf) free(s); return atom; } AtomPtr retainAtom(AtomPtr atom) { if(atom == NULL) return NULL; do_log(D_ATOM_REFCOUNT, "A 0x%lx %d++\n", (unsigned long)atom, atom->refcount); assert(atom->refcount >= 1 && atom->refcount < LARGE_ATOM_REFCOUNT); atom->refcount++; return atom; } void releaseAtom(AtomPtr atom) { if(atom == NULL) return; do_log(D_ATOM_REFCOUNT, "A 0x%lx %d--\n", (unsigned long)atom, atom->refcount); assert(atom->refcount >= 1 && atom->refcount < LARGE_ATOM_REFCOUNT); atom->refcount--; if(atom->refcount == 0) { int h = hash(0, atom->string, atom->length, LOG2_ATOM_HASH_TABLE_SIZE); assert(atomHashTable[h] != NULL); if(atom == atomHashTable[h]) { atomHashTable[h] = atom->next; free(atom); } else { AtomPtr previous = atomHashTable[h]; while(previous->next) { if(previous->next == atom) break; previous = previous->next; } assert(previous->next != NULL); previous->next = atom->next; free(atom); } used_atoms--; } } AtomPtr internAtomF(const char *format, ...) { char *s; char buf[150]; int n; va_list args; AtomPtr atom = NULL; va_start(args, format); n = vsnprintf(buf, 150, format, args); va_end(args); if(n >= 0 && n < 150) { atom = internAtomN(buf, n); } else { va_start(args, format); s = vsprintf_a(format, args); va_end(args); if(s != NULL) { atom = internAtom(s); free(s); } } return atom; } static AtomPtr internAtomErrorV(int e, const char *f, va_list args) { char *es = pstrerror(e); AtomPtr atom; char *s1, *s2; int n, rc; if(f) { s1 = vsprintf_a(f, args); if(s1 == NULL) return NULL; n = strlen(s1); } else { s1 = NULL; n = 0; } s2 = malloc(n + 70); if(s2 == NULL) { free(s1); return NULL; } if(s1) { strcpy(s2, s1); free(s1); } rc = snprintf(s2 + n, 69, f ? ": %s" : "%s", es); if(rc < 0 || rc >= 69) { free(s2); return NULL; } atom = internAtomN(s2, n + rc); free(s2); return atom; } AtomPtr internAtomError(int e, const char *f, ...) { AtomPtr atom; va_list args; va_start(args, f); atom = internAtomErrorV(e, f, args); va_end(args); return atom; } char * atomString(AtomPtr atom) { if(atom) return atom->string; else return "(null)"; } AtomListPtr makeAtomList(AtomPtr *atoms, int n) { AtomListPtr list; list = malloc(sizeof(AtomListRec)); if(list == NULL) return NULL; list->length = 0; list->size = 0; list->list = NULL; if(n > 0) { int i; list->list = malloc(n * sizeof(AtomPtr)); if(list->list == NULL) { free(list); return NULL; } list->size = n; for(i = 0; i < n; i++) list->list[i] = atoms[i]; list->length = n; } return list; } void destroyAtomList(AtomListPtr list) { int i; if(list->list) { for(i = 0; i < list->length; i++) releaseAtom(list->list[i]); list->length = 0; free(list->list); list->list = NULL; list->size = 0; } assert(list->size == 0); free(list); } int atomListMember(AtomPtr atom, AtomListPtr list) { int i; for(i = 0; i < list->length; i++) { if(atom == list->list[i]) return 1; } return 0; } void atomListCons(AtomPtr atom, AtomListPtr list) { if(list->list == NULL) { assert(list->size == 0); list->list = malloc(5 * sizeof(AtomPtr)); if(list->list == NULL) { do_log(L_ERROR, "Couldn't allocate AtomList\n"); return; } list->size = 5; } if(list->size <= list->length) { AtomPtr *new_list; int n = (2 * list->length + 1); new_list = realloc(list->list, n * sizeof(AtomPtr)); if(new_list == NULL) { do_log(L_ERROR, "Couldn't realloc AtomList\n"); return; } list->list = new_list; list->size = n; } list->list[list->length] = atom; list->length++; } polipo-1.0.4.1/README.Windows0000644000175000017500000000242211331407220014771 0ustar chrisdchrisdBuilding Polipo on Windows -*-text-*- ************************** There are two distinct ports of Polipo to Windows -- a port using the Cygwin emulation libraries, and an experimental native port using Mingw. The Cygwin port is identical to the Unix binary. Build it just like you would build under Unix -- just type ``make all'' in the directory where you untarred the Polipo sources. In order to build the native port, cd to the Polipo directory, and do make EXE=.exe LDLIBS=-lwsock32 or, if you've got a regex library, make EXE=.exe EXTRA_DEFINES=-DHAVE_REGEX LDLIBS="-lwsock32 -lregex" In order to cross-compile from a Unix system, you will probably need to point make at the right compiler: make EXE=.exe CC=i586-mingw32msvc-gcc LDLIBS=-lwsock32 The native port currently attempts to access files in locations that are typical for a Unix system; for example, it will attempt to read a configuration file /etc/polipo/config on the current drive. You will probably need to point it at your config file with an explicit ``-c'' command-line argument, and define at least the following configuration variables: dnsNameServer diskCacheRoot forbiddenFile Help with solving this issue would be very much appreciated. Juliusz Chroboczek polipo-1.0.4.1/README0000644000175000017500000000130211331407220013334 0ustar chrisdchrisdPolipo README -*-text-*- ************* Polipo is single-threaded, non blocking caching web proxy that has very modest resource needs. See the file INSTALL for installation instructions. See the texinfo manual (available as HTML after installation) for more information. Current information about Polipo can be found on the Polipo web page, http://www.pps.jussieu.fr/~jch/software/polipo/ I can be reached at the e-mail address below, or on the Polipo-users mailing list: Please see the Polipo web page for subscription information. Juliusz Chroboczek polipo-1.0.4.1/Makefile0000644000175000017500000000775511331407220014136 0ustar chrisdchrisdPREFIX = /usr/local BINDIR = $(PREFIX)/bin MANDIR = $(PREFIX)/man INFODIR = $(PREFIX)/info LOCAL_ROOT = /usr/share/polipo/www DISK_CACHE_ROOT = /var/cache/polipo # To compile with Unix CC: # CDEBUGFLAGS=-O # To compile with GCC: # CC = gcc # CDEBUGFLAGS = -Os -g -Wall -std=gnu99 CDEBUGFLAGS = -Os -g -Wall # CDEBUGFLAGS = -Os -Wall # CDEBUGFLAGS = -g -Wall # To compile on a pure POSIX system: # CC = c89 # CC = c99 # CDEBUGFLAGS=-O # To compile with icc 7, you need -restrict. (Their bug.) # CC=icc # CDEBUGFLAGS = -O -restrict # On System V (Solaris, HP/UX) you need the following: # PLATFORM_DEFINES = -DSVR4 # On Solaris, you need the following: # LDLIBS = -lsocket -lnsl -lresolv # On mingw, you need # EXE=.exe # LDLIBS = -lwsock32 FILE_DEFINES = -DLOCAL_ROOT=\"$(LOCAL_ROOT)/\" \ -DDISK_CACHE_ROOT=\"$(DISK_CACHE_ROOT)/\" # You may optionally also add any of the following to DEFINES: # # -DNO_DISK_CACHE to compile out the on-disk cache and local web server; # -DNO_IPv6 to avoid using the RFC 3493 API and stick to stock # Berkeley sockets; # -DHAVE_IPv6 to force the use of the RFC 3493 API on systems other # than GNU/Linux and BSD (let me know if it works); # -DNO_FANCY_RESOLVER to compile out the asynchronous name resolution # code; # -DNO_STANDARD_RESOLVER to compile out the code that falls back to # gethostbyname/getaddrinfo when DNS requests fail; # -DNO_TUNNEL to compile out the code that handles CONNECT requests; # -DNO_SOCKS to compile out the SOCKS gateway code. # -DNO_FORBIDDEN to compile out the all of the forbidden URL code # -DNO_REDIRECTOR to compile out the Squid-style redirector code # -DNO_SYSLOG to compile out logging to syslog DEFINES = $(FILE_DEFINES) $(PLATFORM_DEFINES) CFLAGS = $(MD5INCLUDES) $(CDEBUGFLAGS) $(DEFINES) $(EXTRA_DEFINES) SRCS = util.c event.c io.c chunk.c atom.c object.c log.c diskcache.c main.c \ config.c local.c http.c client.c server.c auth.c tunnel.c \ http_parse.c parse_time.c dns.c forbidden.c \ md5import.c md5.c ftsimport.c fts_compat.c socks.c mingw.c OBJS = util.o event.o io.o chunk.o atom.o object.o log.o diskcache.o main.o \ config.o local.o http.o client.o server.o auth.o tunnel.o \ http_parse.o parse_time.o dns.o forbidden.o \ md5import.o ftsimport.o socks.o mingw.o polipo$(EXE): $(OBJS) $(CC) $(CFLAGS) $(LDFLAGS) -o polipo$(EXE) $(OBJS) $(MD5LIBS) $(LDLIBS) ftsimport.o: ftsimport.c fts_compat.c md5import.o: md5import.c md5.c .PHONY: all install install.binary install.man all: polipo$(EXE) polipo.info html/index.html localindex.html install: install.binary install.man install.binary: all mkdir -p $(TARGET)$(BINDIR) mkdir -p $(TARGET)$(LOCAL_ROOT) mkdir -p $(TARGET)$(LOCAL_ROOT)/doc rm -f $(TARGET)$(BINDIR)/polipo cp -f polipo $(TARGET)$(BINDIR)/ cp -f html/* $(TARGET)$(LOCAL_ROOT)/doc cp -f localindex.html $(TARGET)$(LOCAL_ROOT)/index.html install.man: all mkdir -p $(TARGET)$(MANDIR)/man1 mkdir -p $(TARGET)$(INFODIR) cp -f polipo.man $(TARGET)$(MANDIR)/man1/polipo.1 cp polipo.info $(TARGET)$(INFODIR)/ install-info --info-dir=$(TARGET)$(INFODIR) polipo.info polipo.info: polipo.texi makeinfo polipo.texi html/index.html: polipo.texi mkdir -p html makeinfo --html -o html polipo.texi polipo.html: polipo.texi makeinfo --html --no-split --no-headers -o polipo.html polipo.texi polipo.pdf: polipo.texi texi2pdf polipo.texi polipo.ps.gz: polipo.ps gzip -c polipo.ps > polipo.ps.gz polipo.ps: polipo.dvi dvips -Pwww -o polipo.ps polipo.dvi polipo.dvi: polipo.texi texi2dvi polipo.texi polipo.man.html: polipo.man groff -man -Thtml polipo.man > polipo.man.html TAGS: $(SRCS) etags $(SRCS) .PHONY: clean clean: -rm -f polipo$(EXE) *.o *~ core TAGS gmon.out -rm -f polipo.cp polipo.fn polipo.log polipo.vr -rm -f polipo.cps polipo.info* polipo.pg polipo.toc polipo.vrs -rm -f polipo.aux polipo.dvi polipo.ky polipo.ps polipo.tp -rm -f polipo.dvi polipo.ps polipo.ps.gz polipo.pdf polipo.html -rm -rf ./html/ -rm -f polipo.man.html polipo-1.0.4.1/INSTALL0000644000175000017500000000403511331407220013513 0ustar chrisdchrisdPolipo installation instructions -*-text-*- ******************************** 1. Building and running polipo ****************************** $ make all $ su -c 'make install' $ man polipo $ polipo & If you want Polipo to put itself into the background, you may replace the last line with: $ polipo daemonise=true logFile="/var/log/polipo.log" On SVR4 systems (Solaris, HP/UX), you will need to use one of the following (whichever works): $ make PLATFORM_DEFINES=-DSVR4 all $ make PLATFORM_DEFINES=-DSVR4 LDLIBS='-lsocket -lnsl -lresolv' all You can also use Polipo without installing: $ make $ nroff -man polipo.man | more $ ./polipo & For information about building on Windows, please see the file README.Windows. 2. Configuring your user-agent ****************************** Once polipo is running, configure your user-agent (web browser) to use the proxy on `http://localhost:8123/'. Depending on the user-agent, this is done either by setting the environment variable http_proxy, e.g. $ http_proxy=http://localhost:8123; export http_proxy or by using the browser's ``preferences'' menu. 3. Configuring polipo ********************* If you want to use an on-disk cache, you will need to create its root directory: $ mkdir /var/cache/polipo/ You should then arrange for cron to run the following on a regular basis: killall -USR1 polipo sleep 1 polipo -x killall -USR2 polipo If you want to use a configuration file, you should put it in one of the locations `/etc/polipo/config' or `~/.polipo'; you can also use the `-c' flag to put it in a non-standard location. See the file `config.sample' for an example. You might also want to create a forbidden URLs file, which you should put either in one of `/etc/polipo/forbidden' or `~/.polipo-forbidden'; you can set the variable `forbiddenFile' in your config file if you want to put it in a non-standard location. See `forbidden.sample' for an example. Juliusz Chroboczek polipo-1.0.4.1/COPYING0000644000175000017500000000205711331407220013517 0ustar chrisdchrisdCopyright (c) 2003-2008 by Juliusz Chroboczek Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. polipo-1.0.4.1/CHANGES0000644000175000017500000005202411331407220013456 0ustar chrisdchrisd31 January 2010: Polipo 1.0.4.1: Cherry-picked fixes from 1.0.5 * Fixed an integer overflow that may lead to a crash (http://secunia.com/advisories/37607/). Discovered by Jeremy Brown. (CVE-2009-4413) * Fixed a crash that occurs when a server sends a malformed Cache-Control: header (CVE-2009-3305). Patch from Stefan Fritsch. * Prevent an infinite loop when a bodyless 204 or 1xx response is encountered. * Don't crash when we get an error while waiting for 100 continue status. 8 January 2008: Polipo 1.0.4: * Fixed the handling of tunnels with a parent proxy (thanks to Richard Šputa). * Fixed a bug that could cause connections to be shut down when a server used the old (RFC 2068) semantics of ``100 Continue'' (thanks to Thomas Wiesel). * Fixed a crash when a request URL was larger than 10 kB (thanks to Fabian Keil). * Fixed a possible failure to read client requests larger than one chunk. 6 October 2007: Polipo 1.0.3 * Changed the default for chunkMemory: it is now 24 MB or one-quarter of physical memory, whichever is smaller. * Support for logging to syslog (thanks to Nix). * Made atom reference counts 32-bit longs; this should fix problems that people were seeing when running Polipo with humongous in-memory caches. * Added Git, Jabber and CVS to default tunnelAllowedPorts. * Fixed a bug that could cause URL matching to fail when using anchored regular expressions (thanks to phuel). 26 August 2007: Polipo 1.0.2: * Fixed a crash that could happen with entities more than 2GB in size. Such entities are still not supported, but Polipo should no longer crash. * Minor HTTP compliance fixes, due to testing with Co-Advisor. * Fixed a crash that would happen when a POST request was aborted by the server. Reported by Tero Pelander. * Worked around a DNS-related bug that I'm unable to track down, waiting for a proper fix. 25 June 2007: Polipo 1.0.1: * Made Polipo slightly more aggressive when speaking to HTTP/1.0 servers (thanks to Fabian Keil for noticing that). * Fixed a crash that would happen when a client used Cache-Control: only-if-cached, and the object was not in cache. (Reported by F. Zappa, A. Patala and V. Ghosal.) * Fixed a descriptor leak when running under Windows. * Made Polipo optionally drop connections after servicing a number of connections (maxConnectionAge and maxConnectionRequests). 6 March 2007: Polipo 1.0.0 * No changes since 0.9.99.2. 7 February 2007: Polipo 0.9.99.2 * Fixed a buffer overflow in urlDirname (the 0.9 branch is not vulnerable) (reported by J. P. Larocque). * Implemented use of IPv6 temporary source addresses (Frank Behrens). * Disabled use of unaligned range requests by default. This is controlled by the variable allowUnalignedRangeRequests (reported by Chris Moore). * Fixed descriptor leaks in SOCKS error handling (reported by Roger Dingledine). * Implemented maxSideBuffering. 6 February 2007: Polipo 0.9.12 * Fixed incorrect caching of redirects (reported by Lawrence Lu). * Fixed a possible hang when falling back to gethostbyname (reported by Chris Moore). 28 December 2006: Polipo 0.9.99.1 * Validation improvements and bug fixes. * Don't use cached data when receiving the output from an HTTP/1.0 CGI. * Allowed tunnelling of IMAP and POP traffic by default. * Changed the disk cache expiry and indexing functions to use chunks. * Made the disk cache unreadable by others by default. * Fixed a bug that could cause stale data to be served after a connection failure (reported by Hondza). * Fixed computation of age and rtt for pipelined requests. * Fixed incorrect cachability of redirects (reported by J.-P. Larocque). * Fixed a bug that would cause uncachable objects to become cachable after being reloaded from the on-disk cache (reported by J.-P. Larocque). * Implemented dontTrustVaryETag. 7 December 2006: Polipo 0.9.11 * Fixed a crash that could happen when a network interface went down while a DNS query was in progress (reported by Francesco Zappa). 20 November 2006: Polipo 0.9.99.0: * Implemented large buffers for headers larger than a chunk's worth. * Made the HTTP parser lax by default (ignores unknown headers). * Implemented the infrastructure for local POST requests and implemented a real configuration interface (thanks to Theo Honohan). * Made timeouts user-configurable and enforced an invariant between timeouts. * Made logging configurable at runtime (thanks to Frank Behrens). * Implemented the infrastructure for asynchronous handling of forbidden URLs. * Implemented the ability to redirect instead of returning an error for forbidden URLs. * Implemented support for Squid-style redirectors. * Implemented User-configurable uncacheable URLs, analogous to forbidden URLs (thanks to Joachim Haga). * Implemented the ability to avoid caching pages with cookies and redirects. * Implemented maxPipelineTrain, which can be used to moderate * Polipo's eagerness to pipeline. * Unified parentHost and parentPort into parentProxy. * Ported Polipo to native Windows (thanks to Dan Kennedy). * Implemented disableVia. * Implemented SOCKS support. * Made disableVia and cacheIsShared to be true by default. * Increased the default value of serverMaxSlots to 8. * Made the disk cache code quote all characters except for a small number of ``known safe'' ones. This is an incompatible change to the on-disk format. * Changed HTTP parser to pass all Pragma headers to the next hop; this should make some media players work through Polipo. * Changed the connection scheduler to avoid pipelining when there are idle connections to a given server. * Made Polipo obey parentProxy when tunnelling (proxying https). * Changed the default value of allowedPorts to be slightly more permissive. * Implemented tweakables for selectively disabling parts of the configuration interface. Indexing and listing known servers are now disabled by default. * Hide variables containing passwords. * Fixed a bug that could cause incorrect validation when speaking to an HTTP/1.0 server. * Fixed a bug that could cause incorrect validation of Vary objects. * Fixed a crash in the redirector code. * Made disableVia the default, and changed the default value of idleTime. * Made polipo delay serving of partial objects until after a a successful validation. This should fix Mozilla's prefetching. * On 64-bit platforms, made CHUNK_SIZE default to 8kB. 2 September 2006: Polipo 0.9.10: * Fixed a crash when a client closes a connection at just the wrong time. * Fixed a crash that could happen when a server returned incorrect headers and closed the connection at just the wrong time. * Fixed restarting of connections on a server-side read error; this should avoid the ``connection reset by peer'' problem. * Corrected work-around for DNS servers that export both AAAA and CNAME. * Fix incorrect error handling when overflowing the buffer when saving an entity to disk. * IPv6 tweaks for OpenBSD (thanks to Jun-ichiro itojun Hagino). * Fixed incorrect error-handling on failure to parse a date. * Fixed a deadlock when a tunnel is shut down and the buffer is full. * Fixed an incorrect use of va_start (guaranteed crash on AMD64). * Fixed a possible race condition with a heavily pipelining client. * Fixed a crash due to incorrect handling of write errors in POST. 23 September 2005: Polipo 0.9.9: * Fixed a bug that could cause objects to be incorrectly determined to be dynamic (thanks to Joachim B. Haga). * Fixed a bug that could cause the local web server to expose files that are not under the local root (thanks to Wessel Dankers). * Fixed an off-by-one bug when parsing NL-terminated headers. * Made Polipo forget about failures when finishing on the client side. * Polipo now sends Host headers even when speaking to an upstream proxy. Some proxies take RFC 2616 literally, and require that (thanks to Zoltan Ivanfi). * Fixed some bugs in staleness computation, and implemented server-side max-age directives (oops!) (thanks to Charley Chu). 24 January 2005: Polipo 0.9.8: * Backported the lax HTTP parser from the head branch. * Fixed a race condition that could cause a crash if a single object was being superseded twice at the same time. * Fixed an incorrect test that caused Polipo to pipeline to all HTTP/1.1 servers, even when they were determined as broken (thanks to Daniel Koukola). * Implemented maxPipelineTrain. * Tweaked for uclibc (thanks to Detlef Riekenberg). 27 December 2004: Polipo 0.9.7: * Fixed a possible crash when tunnelling. * Fixed spurious updates of object sizes when connection is dropped by the client. * Fixed parsing of URLs with explicit port number (thanks to Frank Behrens). * Fixed a possible crash when exiting POST in error. * Fixed a protocol violation when an empty object is not superseded. 31 October 2004: Polipo 0.9.6: * Fixed a possible crash in ServeObject. * Fixed two possible crashes when relaxTransparency is not false. * Modified the config file parser to make it possible to have backslashes in atoms. * Fixed a violated invariant (leading to a crash) when superseding objects. * Fixed a possible crash in ClientFinish when a pipelined request carries no object. * Fixed a bug in handling of client-side Expect: 100-continue (reported by Charley Chu). * Fixed a scheduling bug that caused server-side requests to be issued in the order opposite to a client-side pipeline (many thanks to Joachim Haga). * Abort when the config file couldn't be parsed (thanks to Joachim Haga). * Fixed error handling in POST and PUT requests, which could cause a crash on an I/O error. 17 June 2004: Polipo 0.9.5: * Implemented upstream proxy authentication (HTTP Basic only). * Fixed a possible crash when unable to schedule servicing a request. * Fixed a possible crash when tunnelling (proxying https). * Fixed signedness issues to make allowedClients work on PPC (from Gergely Nagy). 10 May 2004: Polipo 0.9.4: * Fixed a bug (introduced in 0.9.3) that could cause a crash when the download of an object was interrupted and then immediately restarted. * Fixed a bug that could cause stale non-200 replies to be served. * Fixed compilation on FreeBSD 5.2.1 (from Samuel Tardieu). * Fixed definition of *_ROOT in diskcache.c 6 April 2004: Polipo 0.9.3: * Fix incorrect handling of EPIPE when tunnelling; this could cause crashes if a peer closed a connection when we're writing. * Fix a race condition that could cause ``error message lost in transit'' errors if a request was cancelled during connect. * Check for exitFlag in workToDo: faster reaction to signals. 28 March 2004: Polipo 0.9.2: * Fixed a bug that could cause crashes when writing out small objects (thanks to Frank Behrens). * Made buffer allocation in httpParseHeaders dynamic. * Fixed the declaration of pipelineAdditionalRequests. * Fixed a bug that could cause empty directories to be missed when expiring the disk cache. * Switched the forbidden file to use extended regexps, the previous usage was non-portable (thanks to Frank Behrens). 9 March 2004: Polipo 0.9.1: * Fixed a bug that could cause chunked encoding failures when going from a 1.0 server to a 1.1 client. * Fixed a bug that prevented comments after some config lines (thanks to Tom Huckstep). * Fixed a possible buffer overflow in dnsDecodeReply. * Fixed portability to systems where rmdir returns EEXIST instead of ENOTEMPTY. * Fixed error handling on fork failures in fillSpecialObject. * Fixed handling of EINTR in wait in specialRequestHandler. * Fixed a bug that caused objects with no headers to fail. * Fixed a minor memory leak in the config file parser. * Minor build fixes for NetBSD. * Added the 68020 and later to the list of architectures that support unaligned access. 18 February 2004: Polipo 0.9: * Reworked the DNS code to parse ids even when a reply's qdcount is 0. No longer falls back to gethostbyname when DNS server returns FormErr. * Made the DNS code parse resolv.conf. 11 Feburary 2004: Polipo 0.8.99.3: * Minor changes to work around Cygwin mis-design. * Fixed printing of n-state variables. * Fixed proxyOffline handling. * Fixed a bug that would cause errors to be reported with the wrong content-type. * Fixed a bug that would cause ``object vanished'' errors when using HEAD for revalidation. * Fixed a bug that could cause failed requests due to ``client reset connection'' errors. 24 January 2004: Polipo 0.8.99.2: * Cleaned up authentication. * Made authenticated replies cachable in one of the cases allowed by RFC 2616. * Fixed a bug that could, under some circumstances, cause a password-protected object to be cached and returned to a non-authorized client. * Implemented 100-continue, controlled by the variable expectContinue. * Implemented tristate, 4- and 5-state variables. Split proxyOffline into proxyOffline and relaxTransparency. This is an incompatible change to the config file format. * Cleaned up the handling of allowed port ranges. New configuration variable allowedPorts (and new type intlist). * Implemented tunnelling through the CONNECT method (https proxying). * Will now read a request body on error (avoids a deadlock). * Reworked the PUT/POST code to read the reply eagerly rather than waiting for the write to finish (avoids writing the full body on error and avoids the same deadlock as above). * Made server addresses sticky: will now remember which of a servers addresses worked last time, and use that address first. 16 january 2004: Polipo 0.8.99.1: * Fixed an expiry bug that caused DNS queries to be repeated on each request. * Added the variable ``preciseExpiry'' that prevents trusting the mtime during expiry. 14 January 2004: Polipo 0.8.99.0: * Implemented IP address-based authentication. * Implemented HTTP ``basic'' authentication. * Implemented variable body offsets for the on-disk cache; this makes the on-disk cache format incompatible with previous versions. * Made the number of server slots configurable. * Partially rewrote the disk cache code. * Fixed a file descriptor leak in the early error handling code. * Fixed a bug in the base64 encoder; this makes the on-disk cache format incompatible with previous versions. * Implemented proper reporting for 100-Continue replies (100-Continue is not implemented yet). * Made the number of server slots configurable at runtime. 9 January 2004: Polipo 0.8.4: * Log file is now line buffered. * Will reopen the log file on SIGUSR1 and SIGUSR2. * censoredHeaders now defaults to none, and censorReferer to 0. * Fixed a memory allocation bug that could cause a crash. 21 December 2003: Polipo 0.8.3: * Fixed a potential buffer overflow on C89 systems in snnprintf. * Fixed checking of Via headers. * Added configurable log file. * Added code to run as a daemon. * Made the resolver grok names ending in a ``.''. * Changed Makefile to fit Debian better. 7 December 2003: Polipo 0.8.2: * Implemented a version of fts for SVR4 systems. * Implemented a version of mktime_gmt that doesn't use setenv. * Fixed code used to determine FQDN. * More unaligned access fixes. * Disabled queryIPv6 when there is no IPv6 support in kernel. * Enabled IPv6 support by default on FreeBSD and NetBSD. 2 December 2003: Polipo 0.8.1: * Fix a possible crash when doing a POST on a busy server. * Fix a possible crash when socket(2) fails; implement switching to a different address when socket(2) fails (e.g. when accessing a double-stack host from an IPv4-only client). * Fix a problem with servers stuck in the ``probing'' state. * Work around a bug in Konqueror that strips question marks from URLs. * Fix incorrect error handling when dealing with connection failures. * Fix a compile problem in dns.c. * Remove dependency on SSL, include MD5 code instead. * Fix signedness of s_maxage. 23 November 2003: Polipo 0.8: * IPv6 support, on both the client and server side, including DNS support and RFC 2732. * Reworked the DNS code. * Made it possible to compile without any particular resolver and without the on-disk cache. * Fixed a problem with the chunking encoder. * Made the config file parser grok octal and hex values, allowed colons and tildes in unquoted strings. * Implemented tilde expansion in some config variables. * Made Polipo slightly less eager to honour range requests for dynamic instances. Marked generated objects as dynamic. These changes should solve some of the problems with PMM. * Implemented the If-Range header (on both the client and server side). * Implemented support for range requests smaller than one chunk (and hence for pmmSize being smaller than CHUNK_SIZE). * Fixed a bug that caused a deadlock (until a server timeout) when doing a POST/PUT request with no free connection slots. * Fixed a problem when diskCacheRoot didn't end in `/'. * Fixed a refcounting problem that could cause Polipo to crash on a DNS timeout. * Fixed an alignment problem that could cause crashes on architectures that don't like unaligned memory accesses (thanks to Rob Byrnes). * Fixed a bug with the disk cache that caused spurious ``disk entry changed behind our back'' warnings (and in principle could cause data corruption, although that's very unlikely). * Made opening connections slightly less aggressive -- Polipo would sometimes open two connections where only one was needed. * Modified client-side code to notice client shutdowns earlier and notify the server side straight away. 7 October 2003: Polipo 0.7 * Made the Request function a method of objects. * Fixed a bug that could sometimes cause data corruption. * Fixed a bug that could cause sending of incorrect data to the client on a range request. * Fixed POST and PUT requests. * Fixed a bug that could sometimes cause a POST request to use a stale connection. * Included code to do poor man's multiplexing. * Will now open multiple connections to non-persistent servers. * Fixed a bug that could lead to idle connections dying without being noticed. * Fixed probing for pipelining. * Actually use the new time function introduced in 0.5. * Fixed a bug in strcasecmp_n. * forbiddenFile can now be a directory. 26 September 2003: Polipo 0.6 * Fixed precondition handling. * Fixed a bug that could lead to lockups when revalidating an object. 27 June 2003: Polipo 0.5 * Made the presence of a reader or writer explicit on the client side. * Reworked closing client connections. * Reworked reporting of server-side errors. * Made buffer allocation lazy; idle clients and servers no longer use up buffers. * Reworked UTC time handling to use timegm(3) when available. 12 March 2003: Polipo 0.4 * Implemented expiry of the on-disk cache. * Implemented reliable aborting of connections; Polipo should no * longer deadlock when a server falls into a black hole. * Changed direct reads to be more aggressive by using readv in three pieces. * Changed serving of chunked data to be more eager about serving a chunk's end marker. * Implemented better reporting of DNS errors. * Fixed a deadlock with pipelining on the client side. * Removed most of the remaining copies when reading on the * server side. * Fixed a bug that caused some headers to disappear in transit. * Fixed a possible livelock when reading chunked encoding. * Fixed an overflow when pipelining on the server side. * Fixed generation of indexes from the on-disk cache. * Fixed a DNS crash when falling back on gethostbyname. 1 March 2003: Polipo 0.3 * Implemented retrying of timed-out DNS requests. * Implemented configuration mechanisms for case-insensitive atoms, time values and atom lists; censoredHeaders can now be configured. * No longer censors User-Agent. Blame Beppe and Alain. * Changed the handling of hop-by-hop HTTP headers to deal with multiple Connection headers. * Made client-side errors and successful revalidation no longer close the connection. * Fixed a bug that caused the allocation of an extraneous 2MB (!) at startup. Polipo can run in 100KB once again. * Fixed a refcounting bug and some incorrect frees that could lead to crashes when recovering from a server-side error. * Fixed a bug with POST/PUT that could trigger a failed assertion. * Made sure that POST/PUT don't get scheduled in multiple pieces. 17 February 2003: Polipo 0.2 * Fixed an incorrect assertion that could cause crashes when the server is fast. * Fixed (hopefully) logic for 304 replies. * Minor tweaks to scheduling that cause some speed increase when the client is pipelining and the server is fast. * Minor bug fixes and cleanups. * Macro-ified do_log and friends. 3 February 2003: Polipo 0.1 * Initial public release.