You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
1.3 KiB
50 lines
1.3 KiB
#!/usr/bin/env python |
|
import json |
|
import sys |
|
|
|
alphabet = 'abcdefghijklmnopqrstuvwxyz' |
|
|
|
srcpath = sys.argv[1] |
|
excludepath = sys.argv[2] |
|
rarestword = sys.argv[3] |
|
targetpath = sys.argv[4] |
|
|
|
with open(srcpath, 'r') as f: |
|
words = json.load(f) |
|
|
|
with open(excludepath, 'r') as f: |
|
exclude_all = json.load(f) |
|
|
|
# We only care about 5-letter words |
|
words = [word for word in words if len(word) == 5] |
|
exclude_all = [word for word in exclude_all if len(word) == 5] |
|
|
|
exclude = set() |
|
for word in exclude_all: |
|
exclude.add(word) |
|
if word == rarestword: break |
|
|
|
# Don't include words in the exclude list |
|
words = [word for word in words if word not in exclude] |
|
|
|
# Split dictionary into per-startletter arrays |
|
arrays = {letter: [] for letter in alphabet} |
|
for word in words: |
|
assert word[0] in alphabet |
|
number = 0 |
|
# First letter is implicit |
|
for index, letter in enumerate(word[1:]): |
|
number += alphabet.index(letter) << (5 * index) |
|
packed = bytes([number & 0xff, (number >> 8) & 0xff, number >> 16]) |
|
arrays[word[0]].append(packed) |
|
|
|
with open(targetpath, 'w') as f: |
|
for startletter, array in arrays.items(): |
|
f.write(f'dictionary_{startletter}:\n') |
|
for packed in array: |
|
f.write(f'\tdb {", ".join(str(byte) for byte in packed)}\n') |
|
f.write('\n') |
|
|
|
f.write('dictionaries:\n') |
|
for startletter in arrays: |
|
f.write(f'\tdw dictionary_{startletter}, {len(arrays[startletter])}\n')
|
|
|