From 179ff85f4c7752c48e4a7a996fd3bf57e110ebea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juhani=20Krekel=C3=A4?= Date: Fri, 12 Oct 2018 08:27:16 +0300 Subject: [PATCH] 4KiB was not enough for everyone --- botcmd.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/botcmd.py b/botcmd.py index 6219334..c2c4c18 100644 --- a/botcmd.py +++ b/botcmd.py @@ -125,13 +125,17 @@ def handle_message(*, prefix, message, nick, channel, irc): try: with urllib.request.urlopen(url, timeout = 1) as response: if response.info().get_content_type() == 'text/html': - # First 4KB of a page should be enough for any - first_kb = response.read(4 * 1024) - title = sanitize(extract_title(first_kb)) + # First 4KiB of a page should be enough for any <title> + # Turns out it's not, so download 64KiB + page_source_fragment = response.read(64 * 1024) + title = sanitize(extract_title(page_source_fragment)) domain = sanitize(urllib.parse.urlparse(url).netloc) - message = '%s: %s' % (domain, title) + if title is not None: + message = '%s: %s' % (domain, title) + else: + message = '%s: <no title found>' % domain irc.bot_response(channel, message) possible_titles_left -= 1