Convert HTML into IRC formatting codes

This commit is contained in:
Juhani Krekelä 2020-06-03 03:20:32 +03:00
parent 1843a4b8fa
commit 8469224c8e
1 changed files with 54 additions and 2 deletions

View File

@ -1,8 +1,60 @@
import html.parser
import json
import os
import time
import uuid
class UnknownTagError(Exception): pass
class DeHTMLifier(html.parser.HTMLParser):
def __init__(self):
self.result = []
html.parser.HTMLParser.__init__(self, convert_charrefs = True)
def processed(self):
return ''.join(self.result)
def handle_starttag(self, tag, attrs):
if tag == 'b':
# Bold
self.result.append('\x02') # ^B
elif tag == 'i':
# Italic
self.result.append('\x1d') # ^]
elif tag == 'small':
# Dunno, can't really do anything
pass
elif tag == 'br':
# Slashes work more nicely than a newline symbol I find
if len(self.result) > 0 and self.result[-1][-1:] == ' ':
self.result.append('/ ')
else:
self.result.append(' / ')
else:
raise UnknownTagError(tag)
def handle_endtag(self, tag):
# Since tags are symmetric (and ppl use </br>)
self.handle_starttag(tag, None)
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
def handle_data(self, data):
self.result.append(data)
def handle_entityref(self, name):
assert False
def handle_charref(self, name):
assert False
def dehtml(text):
dehtmlifier = DeHTMLifier()
dehtmlifier.feed(text)
dehtmlifier.close()
return dehtmlifier.processed()
def decks(data):
return data['order']
@ -66,8 +118,8 @@ def gencards(data, deck, nsfw):
black_cards, white_cards = cards(data, deck)
black_cards = [boilerplatify(segmentify(i['text'], i['pick'])) for i in black_cards]
white_cards = [boilerplatify([i]) for i in white_cards]
black_cards = [boilerplatify(segmentify(dehtml(i['text']), i['pick'])) for i in black_cards]
white_cards = [boilerplatify([dehtml(i)]) for i in white_cards]
return {
'calls': black_cards,