jsonah2cardcast/jsonah2cardcast.py

162 lines
3.9 KiB
Python

import html.parser
import json
import os
import time
import uuid
class UnknownTagError(Exception): pass
class DeHTMLifier(html.parser.HTMLParser):
def __init__(self):
self.result = []
html.parser.HTMLParser.__init__(self, convert_charrefs = True)
def processed(self):
return ''.join(self.result)
def handle_starttag(self, tag, attrs):
if tag == 'b':
# Bold
self.result.append('\x02') # ^B
elif tag == 'i':
# Italic
self.result.append('\x1d') # ^]
elif tag == 'small':
# Dunno, can't really do anything
pass
elif tag == 'br':
# Slashes work more nicely than a newline symbol I find
if len(self.result) > 0 and self.result[-1][-1:] == ' ':
self.result.append('/ ')
else:
self.result.append(' / ')
else:
raise UnknownTagError(tag)
def handle_endtag(self, tag):
# Since tags are symmetric (and ppl use </br>)
self.handle_starttag(tag, None)
def handle_startendtag(self, tag, attrs):
self.handle_starttag(tag, attrs)
def handle_data(self, data):
self.result.append(data)
def handle_entityref(self, name):
assert False
def handle_charref(self, name):
assert False
def dehtml(text):
dehtmlifier = DeHTMLifier()
dehtmlifier.feed(text)
dehtmlifier.close()
return dehtmlifier.processed()
def decks(data):
return data['order']
def deckname(data, deck):
return data[deck]['name']
def cards(data, deck):
black_ids = data[deck]['black']
white_ids = data[deck]['white']
black_cards = [data['blackCards'][i] for i in black_ids]
white_cards = [data['whiteCards'][i] for i in white_ids]
return black_cards, white_cards
def geninfo(data, deck, official):
datetime_str = time.strftime('%Y-%m-%dT%H:%M:%S+00:00', time.gmtime())
copyright_url = 'https://cardsagainsthumanity.com/' if official else 'https://crhallberg.com/cah/'
black_cards, white_cards = cards(data, deck)
return {
'name': deckname(data, deck),
'code': deck,
'description': 'converted from https://crhallberg.com/cah/ deck %s' % deck,
'unlisted': True,
'created_at': datetime_str,
'updated_at': datetime_str,
'external_copyright': True,
'copyright_holder_url': copyright_url,
'category': 'other',
'call_count': len(black_cards),
'response_count': len(white_cards),
'author': {
'id': '84897553-35f4-40c8-b104-9dd770199cce',
'username': 'jsonah2cardcast'
},
'rating': '0.0'
}
def gencards(data, deck, nsfw):
datetime_str = time.strftime('%Y-%m-%dT%H:%M:%S+00:00', time.gmtime())
def boilerplatify(segments):
return {
'id': str(uuid.uuid4()),
'text': segments,
'created_at': datetime_str,
'nsfw': nsfw
}
def segmentify(text, pick_count):
segments = []
remaining = text
for _ in range(pick_count):
if '_' not in remaining:
remaining += ' _'
segment, _, remaining = remaining.partition('_')
segments.append(segment)
segments.append(remaining)
return segments
black_cards, white_cards = cards(data, deck)
black_cards = [boilerplatify(segmentify(dehtml(i['text']), i['pick'])) for i in black_cards]
white_cards = [boilerplatify([dehtml(i)]) for i in white_cards]
return {
'calls': black_cards,
'responses': white_cards
}
def main():
with open('official.json', 'r') as f:
official_data = json.loads(f.read())
with open('unofficial.json', 'r') as f:
unofficial_data = json.loads(f.read())
try:
os.mkdir('output')
except FileExistsError:
pass
def writedeck(data, deck, official):
try:
os.mkdir('output/%s' % deck)
except FileExistsError:
pass
with open('output/%s/index.json' % deck, 'w') as f:
f.write(json.dumps(geninfo(data, deck, official)))
with open('output/%s/cards' % deck, 'w') as f:
# Just default to everything being nsfw
f.write(json.dumps(gencards(data, deck, nsfw=True)))
for deck in decks(official_data):
print(deck)
writedeck(official_data, deck, official=True)
for deck in decks(unofficial_data):
print(deck)
writedeck(unofficial_data, deck, official=False)
if __name__ == '__main__':
main()