happybot/happybot/compose/update.py

184 lines
6.5 KiB
Python
Raw Normal View History

from urllib.request import urlopen
from collections import defaultdict
from copy import deepcopy
from os import chdir
import string
import re
import unicodedata
def keysym_names():
result = dict()
keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s*/\*[ (]U\+([0-9A-F]+) (.+?)\s*\*/\s*$')
with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web:
for line in web:
line = line.decode('utf-8', 'ignore').strip()
if m := keysym.match(line):
name, unicode, comment = m.groups()
result[name] = (chr(int(unicode, 16)), comment)
return result
def compose_keys():
compose_line = re.compile(r'^<Multi_key>((?:\s*<[a-zA-Z_0-9]+>)+)\s*:\s*"((?:[^"]|\\.)+)"\s*([a-zA-Z_0-9]*)\s*#\s*(.*)')
char_to_sequence = defaultdict(list)
char_to_name = defaultdict(set)
char_to_comment = dict()
name_to_char = dict()
with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web:
multilinecomment = False
for line in web:
line = line.decode('utf-8', 'error').strip()
if multilinecomment:
try:
index = line.index('*/')
multilinecomment = False
line = line[index + 2:]
except:
continue
else:
try:
index = line.index('/*')
multilinecomment = True
line = line[:index]
except:
pass
if m := compose_line.match(line):
sequence, char, name, comment = m.groups()
sequence = [key[1:-1] for key in sequence.strip().split(' ')]
char = re.sub(r'\\(.)', r'\1', char)
char_to_sequence[char].append(sequence)
if name:
char_to_name[char].add(name)
if name in name_to_char:
try:
assert name_to_char[name] == char
except:
print('Line:', line)
print('Name:', name)
print('Had char:', name_to_char[name])
print('Given char:', char)
raise
else:
name_to_char[name] = char
if char in char_to_comment:
try:
assert char_to_comment[char] == comment
except:
print('Line:', line)
print('Char:', char)
print('Had comment:', char_to_comment[char])
print('Given comment:', comment)
raise
else:
char_to_comment[char] = comment
return char_to_sequence, char_to_name, name_to_char, char_to_comment
def merged():
char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys()
for name, (char, comment) in keysym_names().items():
if name in name_to_char:
try:
assert name_to_char[name] == char
except:
print('Name:', name)
print('Compose char:', name_to_char[name])
print('Keysym char:', char)
raise
else:
name_to_char[name] = char
char_to_name[char].add(name)
if char in char_to_comment:
if char_to_comment[char] != comment:
char_to_comment[char] += ';' + comment
else:
char_to_comment[char] = comment
# Compose symbol:
try:
assert '\u2384' not in char_to_name
except:
raise ValueError('Please use different symbol for compose key.')
# Space symbol:
space_names = char_to_name['\u2423']
for name in char_to_name[' ']:
name_to_char[name] = '\u2423'
# Tab symbol:
tab_sym_names = char_to_name['\u21e5']
name_to_char['Tab'] = '\u21e5'
# Filter out sequences that have keys we don't know how to display nicely.
# Includes stuff like deadkeys.
for char in list(char_to_sequence.keys()):
sequences = char_to_sequence[char]
new_sequences = []
for sequence in sequences:
if any(key in space_names for key in sequence):
raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
if any(key in tab_sym_names for key in sequence):
raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
for key in sequence:
if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key):
name_to_char[key] = chr(int(key[1:], 16))
if all(key in name_to_char for key in sequence):
new_sequences.append(sequence)
if new_sequences:
char_to_sequence[char] = new_sequences
else:
del char_to_sequence[char]
# Add spaces so that combining keys become more obvious as combining keys...
# ...but this is still not obvious, but oh well.
for name in list(name_to_char.keys()):
if unicodedata.combining(name_to_char[name]):
name_to_char[name] = '\u25cc' + name_to_char[name]
return char_to_sequence, char_to_name, name_to_char, char_to_comment
def uni(char, zf=4):
return hex(ord(char))[2:].upper().zfill(zf)
def main():
char_to_sequence, char_to_name, name_to_char, char_to_comment = merged()
comments_str = ''
chars_str = ''
sequences_str = ''
for char, sequences in char_to_sequence.items():
comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char]
if len(char) == 1:
comments_str += ';U+' + uni(char) + ';U' + uni(char)
comments_str += '\n'
chars_str += char + '\n'
sequences_str += char + '' + ' '.join(
'\u2384' + ''.join(name_to_char[name] for name in sequence)
for sequence in sequences
) + '\n'
chdir('/home/zgrep/offtopiabday/happybot/compose/')
with open('comments.txt', 'w') as fh:
fh.write(comments_str)
with open('chars.txt', 'w') as fh:
fh.write(chars_str)
with open('sequences.txt', 'w') as fh:
fh.write(sequences_str)
print('Success!')
# TODO: Deadkey combinations decoding?
# TODO: Allow deadkeys in compose-key combinations?
if __name__ == '__main__':
main()