diff --git a/jsonah2cardcast.py b/jsonah2cardcast.py index 376594c..4b1adf4 100644 --- a/jsonah2cardcast.py +++ b/jsonah2cardcast.py @@ -1,8 +1,60 @@ +import html.parser import json import os import time import uuid +class UnknownTagError(Exception): pass + +class DeHTMLifier(html.parser.HTMLParser): + def __init__(self): + self.result = [] + html.parser.HTMLParser.__init__(self, convert_charrefs = True) + + def processed(self): + return ''.join(self.result) + + def handle_starttag(self, tag, attrs): + if tag == 'b': + # Bold + self.result.append('\x02') # ^B + elif tag == 'i': + # Italic + self.result.append('\x1d') # ^] + elif tag == 'small': + # Dunno, can't really do anything + pass + elif tag == 'br': + # Slashes work more nicely than a newline symbol I find + if len(self.result) > 0 and self.result[-1][-1:] == ' ': + self.result.append('/ ') + else: + self.result.append(' / ') + else: + raise UnknownTagError(tag) + + def handle_endtag(self, tag): + # Since tags are symmetric (and ppl use
) + self.handle_starttag(tag, None) + + def handle_startendtag(self, tag, attrs): + self.handle_starttag(tag, attrs) + + def handle_data(self, data): + self.result.append(data) + + def handle_entityref(self, name): + assert False + + def handle_charref(self, name): + assert False + +def dehtml(text): + dehtmlifier = DeHTMLifier() + dehtmlifier.feed(text) + dehtmlifier.close() + return dehtmlifier.processed() + def decks(data): return data['order'] @@ -66,8 +118,8 @@ def gencards(data, deck, nsfw): black_cards, white_cards = cards(data, deck) - black_cards = [boilerplatify(segmentify(i['text'], i['pick'])) for i in black_cards] - white_cards = [boilerplatify([i]) for i in white_cards] + black_cards = [boilerplatify(segmentify(dehtml(i['text']), i['pick'])) for i in black_cards] + white_cards = [boilerplatify([dehtml(i)]) for i in white_cards] return { 'calls': black_cards,