import html.parser import json import os import time import uuid class UnknownTagError(Exception): pass class DeHTMLifier(html.parser.HTMLParser): def __init__(self): self.result = [] html.parser.HTMLParser.__init__(self, convert_charrefs = True) def processed(self): return ''.join(self.result) def handle_starttag(self, tag, attrs): if tag == 'b': # Bold self.result.append('\x02') # ^B elif tag == 'i': # Italic self.result.append('\x1d') # ^] elif tag == 'small': # Dunno, can't really do anything pass elif tag == 'br': # Slashes work more nicely than a newline symbol I find if len(self.result) > 0 and self.result[-1][-1:] == ' ': self.result.append('/ ') else: self.result.append(' / ') else: raise UnknownTagError(tag) def handle_endtag(self, tag): # Since tags are symmetric (and ppl use
) self.handle_starttag(tag, None) def handle_startendtag(self, tag, attrs): self.handle_starttag(tag, attrs) def handle_data(self, data): self.result.append(data) def handle_entityref(self, name): assert False def handle_charref(self, name): assert False def dehtml(text): dehtmlifier = DeHTMLifier() dehtmlifier.feed(text) dehtmlifier.close() return dehtmlifier.processed() def decks(data): return data['order'] def deckname(data, deck): return data[deck]['name'] def cards(data, deck): black_ids = data[deck]['black'] white_ids = data[deck]['white'] black_cards = [data['blackCards'][i] for i in black_ids] white_cards = [data['whiteCards'][i] for i in white_ids] return black_cards, white_cards def geninfo(data, deck, official): datetime_str = time.strftime('%Y-%m-%dT%H:%M:%S+00:00', time.gmtime()) copyright_url = 'https://cardsagainsthumanity.com/' if official else 'https://crhallberg.com/cah/' black_cards, white_cards = cards(data, deck) return { 'name': deckname(data, deck), 'code': deck, 'description': 'converted from https://crhallberg.com/cah/ deck %s' % deck, 'unlisted': True, 'created_at': datetime_str, 'updated_at': datetime_str, 'external_copyright': True, 'copyright_holder_url': copyright_url, 'category': 'other', 'call_count': len(black_cards), 'response_count': len(white_cards), 'author': { 'id': '84897553-35f4-40c8-b104-9dd770199cce', 'username': 'jsonah2cardcast' }, 'rating': '0.0' } def gencards(data, deck, nsfw): datetime_str = time.strftime('%Y-%m-%dT%H:%M:%S+00:00', time.gmtime()) def boilerplatify(segments): return { 'id': str(uuid.uuid4()), 'text': segments, 'created_at': datetime_str, 'nsfw': nsfw } def segmentify(text, pick_count): segments = [] remaining = text for _ in range(pick_count): if '_' not in remaining: remaining += ' _' segment, _, remaining = remaining.partition('_') segments.append(segment) segments.append(remaining) return segments black_cards, white_cards = cards(data, deck) black_cards = [boilerplatify(segmentify(dehtml(i['text']), i['pick'])) for i in black_cards] white_cards = [boilerplatify([dehtml(i)]) for i in white_cards] return { 'calls': black_cards, 'responses': white_cards } def main(): with open('official.json', 'r') as f: official_data = json.loads(f.read()) with open('unofficial.json', 'r') as f: unofficial_data = json.loads(f.read()) try: os.mkdir('output') except FileExistsError: pass def writedeck(data, deck, official): try: os.mkdir('output/%s' % deck) except FileExistsError: pass with open('output/%s/index.json' % deck, 'w') as f: f.write(json.dumps(geninfo(data, deck, official))) with open('output/%s/cards' % deck, 'w') as f: # Just default to everything being nsfw f.write(json.dumps(gencards(data, deck, nsfw=True))) for deck in decks(official_data): print(deck) writedeck(official_data, deck, official=True) for deck in decks(unofficial_data): print(deck) writedeck(unofficial_data, deck, official=False) if __name__ == '__main__': main()