untls_proxy/untls_proxy.py

608 lines
21 KiB
Python
Executable File

#!/usr/bin/env python3
# CC0 1.0 Universal
#
# Statement of Purpose
#
# The laws of most jurisdictions throughout the world automatically confer
# exclusive Copyright and Related Rights (defined below) upon the creator and
# subsequent owner(s) (each and all, an "owner") of an original work of
# authorship and/or a database (each, a "Work").
#
# Certain owners wish to permanently relinquish those rights to a Work for the
# purpose of contributing to a commons of creative, cultural and scientific
# works ("Commons") that the public can reliably and without fear of later
# claims of infringement build upon, modify, incorporate in other works, reuse
# and redistribute as freely as possible in any form whatsoever and for any
# purposes, including without limitation commercial purposes. These owners may
# contribute to the Commons to promote the ideal of a free culture and the
# further production of creative, cultural and scientific works, or to gain
# reputation or greater distribution for their Work in part through the use and
# efforts of others.
#
# For these and/or other purposes and motivations, and without any expectation
# of additional consideration or compensation, the person associating CC0 with a
# Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
# and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
# and publicly distribute the Work under its terms, with knowledge of his or her
# Copyright and Related Rights in the Work and the meaning and intended legal
# effect of CC0 on those rights.
#
# 1. Copyright and Related Rights. A Work made available under CC0 may be
# protected by copyright and related or neighboring rights ("Copyright and
# Related Rights"). Copyright and Related Rights include, but are not limited
# to, the following:
#
# i. the right to reproduce, adapt, distribute, perform, display, communicate,
# and translate a Work;
#
# ii. moral rights retained by the original author(s) and/or performer(s);
#
# iii. publicity and privacy rights pertaining to a person's image or likeness
# depicted in a Work;
#
# iv. rights protecting against unfair competition in regards to a Work,
# subject to the limitations in paragraph 4(a), below;
#
# v. rights protecting the extraction, dissemination, use and reuse of data in
# a Work;
#
# vi. database rights (such as those arising under Directive 96/9/EC of the
# European Parliament and of the Council of 11 March 1996 on the legal
# protection of databases, and under any national implementation thereof,
# including any amended or successor version of such directive); and
#
# vii. other similar, equivalent or corresponding rights throughout the world
# based on applicable law or treaty, and any national implementations thereof.
#
# 2. Waiver. To the greatest extent permitted by, but not in contravention of,
# applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
# unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
# and Related Rights and associated claims and causes of action, whether now
# known or unknown (including existing as well as future claims and causes of
# action), in the Work (i) in all territories worldwide, (ii) for the maximum
# duration provided by applicable law or treaty (including future time
# extensions), (iii) in any current or future medium and for any number of
# copies, and (iv) for any purpose whatsoever, including without limitation
# commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
# the Waiver for the benefit of each member of the public at large and to the
# detriment of Affirmer's heirs and successors, fully intending that such Waiver
# shall not be subject to revocation, rescission, cancellation, termination, or
# any other legal or equitable action to disrupt the quiet enjoyment of the Work
# by the public as contemplated by Affirmer's express Statement of Purpose.
#
# 3. Public License Fallback. Should any part of the Waiver for any reason be
# judged legally invalid or ineffective under applicable law, then the Waiver
# shall be preserved to the maximum extent permitted taking into account
# Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
# is so judged Affirmer hereby grants to each affected person a royalty-free,
# non transferable, non sublicensable, non exclusive, irrevocable and
# unconditional license to exercise Affirmer's Copyright and Related Rights in
# the Work (i) in all territories worldwide, (ii) for the maximum duration
# provided by applicable law or treaty (including future time extensions), (iii)
# in any current or future medium and for any number of copies, and (iv) for any
# purpose whatsoever, including without limitation commercial, advertising or
# promotional purposes (the "License"). The License shall be deemed effective as
# of the date CC0 was applied by Affirmer to the Work. Should any part of the
# License for any reason be judged legally invalid or ineffective under
# applicable law, such partial invalidity or ineffectiveness shall not
# invalidate the remainder of the License, and in such case Affirmer hereby
# affirms that he or she will not (i) exercise any of his or her remaining
# Copyright and Related Rights in the Work or (ii) assert any associated claims
# and causes of action with respect to the Work, in either case contrary to
# Affirmer's express Statement of Purpose.
#
# 4. Limitations and Disclaimers.
#
# a. No trademark or patent rights held by Affirmer are waived, abandoned,
# surrendered, licensed or otherwise affected by this document.
#
# b. Affirmer offers the Work as-is and makes no representations or warranties
# of any kind concerning the Work, express, implied, statutory or otherwise,
# including without limitation warranties of title, merchantability, fitness
# for a particular purpose, non infringement, or the absence of latent or
# other defects, accuracy, or the present or absence of errors, whether or not
# discoverable, all to the greatest extent permissible under applicable law.
#
# c. Affirmer disclaims responsibility for clearing rights of other persons
# that may apply to the Work or any use thereof, including without limitation
# any person's Copyright and Related Rights in the Work. Further, Affirmer
# disclaims responsibility for obtaining any necessary consents, permissions
# or other rights required for any use of the Work.
#
# d. Affirmer understands and acknowledges that Creative Commons is not a
# party to this document and has no duty or obligation with respect to this
# CC0 or use of the Work.
#
# For more information, please see
# <http://creativecommons.org/publicdomain/zero/1.0/>
import base64
import enum
import select
import socket
import ssl
import sys
import time
import threading
class contexts(enum.Enum):
text, tagname, attributename, after_attributename, after_equals, attributevalue, attributevalue_sq, attributevalue_dq = range(8)
class HtmlProcessor:
def __init__(self):
self.context = contexts.text
self.tag = None
self.attribute = None
self.value = None
def process_attribute(self):
tag = self.tag.lower()
attribute = self.attribute.lower()
# TODO: handle more attributes
if tag == b'a' and attribute == b'href' or tag == b'img' and attribute == b'src':
# TODO: entities
# TODO: keep leading whitespace already in there
if self.value.strip().lower().startswith(b'https://'):
# Space is to keep the response size constant
return b' http://' + self.value.strip()[len(b'https://'):]
else:
return self.value
else:
return self.value
def process(self, data):
processed = bytearray()
for char in data:
if self.context == contexts.text and char == ord('<'):
self.context = contexts.tagname
self.tag = bytearray()
self.attribute = None
self.value = None
elif self.context not in (contexts.attributevalue_sq, contexts.attributevalue_dq) and char == ord('>'):
if self.context == contexts.attributevalue: processed.extend(self.process_attribute())
self.context = contexts.text
self.tag = None
self.attribute = None
self.value = None
elif self.context in (contexts.tagname, contexts.attributevalue) and chr(char).isspace():
if self.context == contexts.attributevalue: processed.extend(self.process_attribute())
self.context = contexts.attributename
self.attribute = bytearray()
self.value = None
elif self.context == contexts.attributename and chr(char).isspace():
self.context = contexts.after_attributename
elif self.context == contexts.after_attributename and chr(char).isspace():
pass
elif self.context in (contexts.attributename, contexts.after_attributename) and char == ord('='):
self.context = contexts.after_equals
elif self.context == contexts.after_equals and chr(char).isspace():
pass
elif self.context == contexts.after_equals and char == ord("'"):
self.context = contexts.attributevalue_sq
self.value = bytearray()
elif self.context == contexts.after_equals and char == ord('"'):
self.context = contexts.attributevalue_dq
self.value = bytearray()
elif self.context == contexts.attributevalue_sq and char == ord("'"):
processed.extend(self.process_attribute())
self.context = contexts.attributename
elif self.context == contexts.attributevalue_dq and char == ord('"'):
processed.extend(self.process_attribute())
self.context = contexts.attributename
elif self.context == contexts.tagname:
self.tag.append(char)
elif self.context == contexts.attributename:
self.attribute.append(char)
elif self.context == contexts.after_attributename:
self.context = contexts.attributename
self.attribute = bytearray([char])
self.value = None
elif self.context == contexts.after_equals:
self.context = contexts.attributevalue
self.value = bytearray([char])
elif self.context in (contexts.attributevalue, contexts.attributevalue_sq, contexts.attributevalue_dq):
self.value.append(char)
elif self.context == contexts.text:
pass
if self.context == contexts.attributevalue:
pass
elif self.context == contexts.attributevalue_sq and char != ord("'"):
pass
elif self.context == contexts.attributevalue_dq and char != ord('"'):
pass
else:
processed.append(char)
return processed
def finalize(self):
if self.context in (contexts.attributevalue, contexts.attributevalue_sq, contexts.attributevalue_dq):
return self.process_attribute()
return b''
def connect(host, port):
try:
for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC, socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try:
s = socket.socket(af, socktype, proto)
except OSError:
continue
s.settimeout(10)
try:
s.connect((host, port))
except (OSError, socket.timeout):
s.close()
continue
s.settimeout(None)
return s
except socket.gaierror:
return None
return None
def timestamp(): return time.strftime('%Y-%m-%d %H:%M:%SZ', time.gmtime())
users = {}
def authorized(username, password):
try:
username = username.decode('utf-8')
password = password.decode('utf-8')
except UnicodeDecodeError:
return False
return username in users and users[username] == password
def proxy(sock, host):
print(f'{timestamp()} {host}', file=sys.stderr, end=' ')
sock.settimeout(2)
request = bytearray()
while True:
if b'\r\n\r\n' in request: break
try:
data = sock.recv(1024)
except socket.timeout:
print('Timeout', file=sys.stderr)
return
if data == b'':
print('Hung up', file=sys.stderr)
return
request.extend(data)
sock.settimeout(None)
request, _, request_data = request.partition(b'\r\n\r\n')
lines = request.split(b'\r\n')
del request
# Get request method, URL, protocol
fields = lines[0].split()
if len(fields) != 3:
print('Malformed request line', file=sys.stderr)
sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nMalformed request line\n')
return
method = fields[0]
url = fields[1]
protocol = fields[2]
print(f'{method.decode(errors="replace")} {url.decode(errors="replace")}', file=sys.stderr, end = ' ')
# Parse headers
headers = {}
for line in lines[1:]:
fields = line.split(b':', 1)
if len(fields) != 2:
print('Malformed headers', file=sys.stderr)
sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nMalformed headers\n')
return
headers[bytes(fields[0].strip().lower())] = bytes(fields[1].strip())
del lines
# Check authentication
if b'proxy-authorization' not in headers:
print('Proxy authentication required', file=sys.stderr)
sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nProxy authentication required\n')
return
fields = headers[b'proxy-authorization'].split()
if len(fields) != 2 or fields[0].lower() != b'basic':
print('Unrecognized authentication method', file=sys.stderr)
sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nUnrecognized authentication method\n')
return
try:
username, password = base64.b64decode(fields[1], validate=True).split(b':', 1)
except (base64.binascii.Error, ValueError):
print('Malformed credentials', file=sys.stderr)
sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nMalformed credentials\n')
return
if not authorized(username, password):
print('Unathorized', file=sys.stderr)
sock.sendall(b'HTTP/1.0 407 Proxy Authentication Required\r\nProxy-Authenticate: Basic realm="Proxy service"\r\n\r\nUnathorized\n')
return
del username
del password
# Remove headers that don't need forwarding or are overwritten
headers = dict((key, value) for key, value in headers.items() if not key.startswith(b'proxy-') and not key in (b'connection', b'accept-encoding', b'keep-alive'))
headers[b'connection'] = b'close'
headers[b'accept-enoding'] = b'identity'
# Split url into its constituents
fields = url.split(b'://', 1)
if len(fields) != 2 or fields[0] not in (b'http', b'https'):
print('Bad schema', file=sys.stderr)
sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nBad schema\n')
return
remote_host, slash, path = fields[1].partition(b'/')
path = slash + path
if remote_host[-1:] == b']': #IPv6 [::1] syntax
port = None
elif b':' not in remote_host:
port = None
else:
remote_host, port = remote_host.rsplit(b':', 1)
try:
port = int(port)
if port < 1 or port > 0xffff: raise ValueError
except ValueError:
print('Bad port number', file=sys.stderr)
sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nBad port number\n')
return
if remote_host[:1] == b'[' and remote_host[-1:] == b']': #IPv6
remote_host = remote_host[1:-1]
try:
remote_host = remote_host.decode('ascii')
except UnicodeDecodeError:
print('Bad host name', file=sys.stderr)
sock.sendall(b'HTTP/1.0 400 Bad Request\r\n\r\nBad host name\n')
return
# Connect to remote host
remote_sock = connect(remote_host, port if port is not None else 80)
if remote_sock is None:
print('Host not found', file=sys.stderr)
return
tls = False
while True:
# Send request
remote_sock.settimeout(10)
try:
remote_sock.sendall(method + b' ' + path + b' ' + protocol + b'\r\n')
for key, value in headers.items():
remote_sock.sendall(key + b': ' + value + b'\r\n')
remote_sock.sendall(b'\r\n')
remote_sock.sendall(request_data)
except (ConnectionResetError, BrokenPipeError):
print('Remote hung up', file=sys.stderr)
return
except socket.timeout:
print('Remote timed out', file=sys.stderr)
return
remote_sock.settimeout(None)
# Keep sending request body, if any, until we get a response from remote
poll = select.poll()
poll.register(remote_sock, select.POLLIN)
poll.register(sock, select.POLLIN)
no_response = True
while no_response:
for fd, _ in poll.poll():
if fd == remote_sock.fileno():
no_response = False
break
else:
try:
data = sock.recv(1024)
except ConnectionResetError:
break
if data == b'': break
# Save the part we've sent already in case we need to re-send request
request_data.extend(data)
try:
remote_sock.sendall(data)
except (ConnectionResetError, BrokenPipeError):
print('Remote hung up', file=sys.stderr)
return
# Get response headers
remote_sock.settimeout(10)
response = bytearray()
while True:
if b'\r\n\r\n' in response: break
try:
data = remote_sock.recv(1024)
except socket.timeout:
print('Remote timed out', file=sys.stderr)
return
if data == b'':
print('Remote hung up', file=sys.stderr)
return
response.extend(data)
remote_sock.settimeout(None)
response, _, response_data = response.partition(b'\r\n\r\n')
# Process response headers
# Figure out if this is a redirect to HTTPS and if so, rewrite to HTTP
# Figure out whether response is html
tls_redirect = False
is_html = True
rewritten_response = bytearray()
rewritten_response.extend(response.split(b'\r\n')[0]) # Include response line as-is
rewritten_response.extend(b'\r\n')
for line in response.split(b'\r\n')[1:]:
fields = line.split(b':', 1)
if len(fields) == 2 and fields[0].lower() == b'location':
destination_url = fields[1].strip()
if destination_url.startswith(b'https://'):
destination_url = b'http://' + destination_url[len(b'https://'):]
if destination_url.split(b'#')[0] != url:
rewritten_response.extend(b'Location: ')
rewritten_response.extend(destination_url)
rewritten_response.extend(b'\r\n')
else:
# This redirect is of the current URL but TLS
tls_redirect = True
elif len(fields) == 2 and fields[0].lower() == b'content-type':
mimetype = fields[1].split(b';')[0].strip().lower()
is_html = mimetype == b'text/html'
rewritten_response.extend(line)
rewritten_response.extend(b'\r\n')
else:
rewritten_response.extend(line)
rewritten_response.extend(b'\r\n')
if tls_redirect and not tls:
# Do upgrade to TLS transparently to client
print('TLS', file=sys.stderr, end=' ')
remote_sock.close()
remote_sock = connect(remote_host, port if port is not None else 443)
if remote_sock is None:
print('Host not found', file=sys.stderr)
return
ctx = ssl.create_default_context()
remote_sock = ctx.wrap_socket(remote_sock, server_hostname = remote_host)
tls = True
continue
# Forward response to client
sock.sendall(rewritten_response)
sock.sendall(b'\r\n')
break
del request_data
if is_html:
htmlprocessor = HtmlProcessor()
sock.sendall(htmlprocessor.process(response_data))
else:
sock.sendall(response_data)
print('', file=sys.stderr)
sock.settimeout(60)
remote_sock.settimeout(60)
last_transfer = time.monotonic()
ending_connection = False
while not ending_connection:
events = poll.poll(60_000)
if len(events) == 0 and time.monotonic() - last_transfer > 60:
break
for fd, _ in events:
if fd == sock.fileno():
try:
data = sock.recv(1024)
except ConnectionResetError:
ending_connection = True
break
if data != b'':
try:
remote_sock.sendall(data)
except (ConnectionResetError, BrokenPipeError, socket.timeout):
pass
else:
try:
data = remote_sock.recv(1024)
except (ConnectionResetError, socket.timeout):
ending_connection = True
break
if data == b'':
ending_connection = True
break
if is_html:
data = htmlprocessor.process(data)
try:
sock.sendall(data)
except (ConnectionResetError, BrokenPipeError, socket.timeout):
ending_connection = True
break
last_transfer = time.monotonic()
remote_sock.close()
if is_html:
try:
sock.sendall(htmlprocessor.finalize())
except (ConnectionResetError, BrokenPipeError, socket.timeout):
pass
class ProxyThread(threading.Thread):
def __init__(self, sock, host):
self.sock = sock
self.host = host
super().__init__()
def run(self):
proxy(self.sock, self.host)
self.sock.close()
def listen(port):
sockets = []
for res in socket.getaddrinfo(None, port, socket.AF_UNSPEC, socket.SOCK_STREAM, 0, socket.AI_PASSIVE):
af, socktype, proto, canonname, sa = res
try:
s = socket.socket(af, socktype, proto)
except OSError:
continue
# Make IPv6 socket only bind on IPv6 address, otherwise may clash with IPv4 and not get enabled
if af == socket.AF_INET6:
try:
s.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
except OSError:
pass
# Set SO_REUSEADDR for less painful server restarting
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
try:
s.bind(sa)
s.listen(1)
except OSError:
s.close()
continue
sockets.append(s)
if len(sockets) == 0:
print(f'Could not bind to port {port}', file=sys.stderr)
sys.exit(1)
listening = select.poll()
sock_by_fd = {}
for s in sockets:
listening.register(s, select.POLLIN)
sock_by_fd[s.fileno()] = s
del sockets
while True:
for fd, _ in listening.poll():
conn, (host, *_) = sock_by_fd[fd].accept()
ProxyThread(conn, host).start()
if __name__ == '__main__':
if len(sys.argv) != 3:
print(f'Usage: {sys.argv[0]} users-file port', file=sys.stderr)
sys.exit(1)
with open(sys.argv[1]) as f:
for line in f.read().split('\n'):
if line.strip() == '': continue
username, password = line.split(':', 1)
users[username] = password
port = int(sys.argv[2])
listen(port)