2022-02-22 22:38:06 +00:00
|
|
|
#!/usr/bin/env python
|
|
|
|
import json
|
|
|
|
import sys
|
|
|
|
|
|
|
|
alphabet = 'abcdefghijklmnopqrstuvwxyz'
|
|
|
|
|
|
|
|
srcpath = sys.argv[1]
|
2022-02-24 17:14:25 +00:00
|
|
|
excludepath = sys.argv[2]
|
|
|
|
rarestword = sys.argv[3]
|
|
|
|
targetpath = sys.argv[4]
|
2022-02-22 22:38:06 +00:00
|
|
|
|
|
|
|
with open(srcpath, 'r') as f:
|
|
|
|
words = json.load(f)
|
|
|
|
|
2022-02-24 17:14:25 +00:00
|
|
|
with open(excludepath, 'r') as f:
|
|
|
|
exclude_all = json.load(f)
|
|
|
|
|
2022-02-22 22:38:06 +00:00
|
|
|
# We only care about 5-letter words
|
|
|
|
words = [word for word in words if len(word) == 5]
|
2022-02-24 17:14:25 +00:00
|
|
|
exclude_all = [word for word in exclude_all if len(word) == 5]
|
|
|
|
|
|
|
|
exclude = set()
|
|
|
|
for word in exclude_all:
|
|
|
|
exclude.add(word)
|
|
|
|
if word == rarestword: break
|
|
|
|
|
|
|
|
# Don't include words in the exclude list
|
|
|
|
words = [word for word in words if word not in exclude]
|
2022-02-22 22:38:06 +00:00
|
|
|
|
|
|
|
# Split dictionary into per-startletter arrays
|
|
|
|
arrays = {letter: [] for letter in alphabet}
|
|
|
|
for word in words:
|
|
|
|
assert word[0] in alphabet
|
|
|
|
number = 0
|
|
|
|
# First letter is implicit
|
|
|
|
for index, letter in enumerate(word[1:]):
|
|
|
|
number += alphabet.index(letter) << (5 * index)
|
|
|
|
packed = bytes([number & 0xff, (number >> 8) & 0xff, number >> 16])
|
|
|
|
arrays[word[0]].append(packed)
|
|
|
|
|
|
|
|
with open(targetpath, 'w') as f:
|
|
|
|
for startletter, array in arrays.items():
|
|
|
|
f.write(f'dictionary_{startletter}:\n')
|
|
|
|
for packed in array:
|
|
|
|
f.write(f'\tdb {", ".join(str(byte) for byte in packed)}\n')
|
|
|
|
f.write('\n')
|
|
|
|
|
|
|
|
f.write('dictionaries:\n')
|
|
|
|
for startletter in arrays:
|
2022-02-23 01:00:03 +00:00
|
|
|
f.write(f'\tdw dictionary_{startletter}, {len(arrays[startletter])}\n')
|