hello-dosdl/compress-dict.py

#!/usr/bin/env python
import json
import sys

alphabet = 'abcdefghijklmnopqrstuvwxyz'

srcpath = sys.argv[1]
excludepath = sys.argv[2]
rarestword = sys.argv[3]
targetpath = sys.argv[4]

with open(srcpath, 'r') as f:
	words = json.load(f)

with open(excludepath, 'r') as f:
	exclude_all = json.load(f)

# We only care about 5-letter words
words = [word for word in words if len(word) == 5]
exclude_all = [word for word in exclude_all if len(word) == 5]

exclude = set()
for word in exclude_all:
	exclude.add(word)
	if word == rarestword: break

# Don't include words in the exclude list
words = [word for word in words if word not in exclude]

# Split dictionary into per-startletter arrays
arrays = {letter: [] for letter in alphabet}
for word in words:
	assert word[0] in alphabet
	number = 0
	# First letter is implicit
	for index, letter in enumerate(word[1:]):
		number += alphabet.index(letter) << (5 * index)
	packed = bytes([number & 0xff, (number >> 8) & 0xff, number >> 16])
	arrays[word[0]].append(packed)

with open(targetpath, 'w') as f:
	for startletter, array in arrays.items():
		f.write(f'dictionary_{startletter}:\n')
		for packed in array:
			f.write(f'\tdb {", ".join(str(byte) for byte in packed)}\n')
		f.write('\n')

	f.write('dictionaries:\n')
	for startletter in arrays:
		f.write(f'\tdw dictionary_{startletter}, {len(arrays[startletter])}\n')
First commit 2022-02-22 22:38:06 +00:00			`#!/usr/bin/env python`
			`import json`
			`import sys`

			`alphabet = 'abcdefghijklmnopqrstuvwxyz'`

			`srcpath = sys.argv[1]`
Check target words list when checking the guess word 2022-02-24 17:14:25 +00:00			`excludepath = sys.argv[2]`
			`rarestword = sys.argv[3]`
			`targetpath = sys.argv[4]`
First commit 2022-02-22 22:38:06 +00:00
			`with open(srcpath, 'r') as f:`
			`words = json.load(f)`

Check target words list when checking the guess word 2022-02-24 17:14:25 +00:00			`with open(excludepath, 'r') as f:`
			`exclude_all = json.load(f)`

First commit 2022-02-22 22:38:06 +00:00			`# We only care about 5-letter words`
			`words = [word for word in words if len(word) == 5]`
Check target words list when checking the guess word 2022-02-24 17:14:25 +00:00			`exclude_all = [word for word in exclude_all if len(word) == 5]`

			`exclude = set()`
			`for word in exclude_all:`
			`exclude.add(word)`
			`if word == rarestword: break`

			`# Don't include words in the exclude list`
			`words = [word for word in words if word not in exclude]`
First commit 2022-02-22 22:38:06 +00:00
			`# Split dictionary into per-startletter arrays`
			`arrays = {letter: [] for letter in alphabet}`
			`for word in words:`
			`assert word[0] in alphabet`
			`number = 0`
			`# First letter is implicit`
			`for index, letter in enumerate(word[1:]):`
			`number += alphabet.index(letter) << (5 * index)`
			`packed = bytes([number & 0xff, (number >> 8) & 0xff, number >> 16])`
			`arrays[word[0]].append(packed)`

			`with open(targetpath, 'w') as f:`
			`for startletter, array in arrays.items():`
			`f.write(f'dictionary_{startletter}:\n')`
			`for packed in array:`
			`f.write(f'\tdb {", ".join(str(byte) for byte in packed)}\n')`
			`f.write('\n')`

			`f.write('dictionaries:\n')`
			`for startletter in arrays:`
Check dictionary for word 2022-02-23 01:00:03 +00:00			`f.write(f'\tdw dictionary_{startletter}, {len(arrays[startletter])}\n')`