happybot/happybot/compose/update.py

from urllib.request import urlopen
from collections import defaultdict
from copy import deepcopy
from os import chdir
import string
import re
import unicodedata

def keysym_names():
    result = dict()
    keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s*/\*[ (]U\+([0-9A-F]+) (.+?)\s*\*/\s*$')
    with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web:
        for line in web:
            line = line.decode('utf-8', 'ignore').strip()
            if m := keysym.match(line):
                name, unicode, comment = m.groups()
                result[name] = (chr(int(unicode, 16)), comment)
    return result

def compose_keys():
    compose_line = re.compile(r'^<Multi_key>((?:\s*<[a-zA-Z_0-9]+>)+)\s*:\s*"((?:[^"]|\\.)+)"\s*([a-zA-Z_0-9]*)\s*#\s*(.*)')

    char_to_sequence = defaultdict(list)
    char_to_name = defaultdict(set)
    char_to_comment = dict()
    name_to_char = dict()

    with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web:
        multilinecomment = False
        for line in web:
            line = line.decode('utf-8', 'error').strip()
            if multilinecomment:
                try:
                    index = line.index('*/')
                    multilinecomment = False
                    line = line[index + 2:]
                except:
                    continue
            else:
                try:
                    index = line.index('/*')
                    multilinecomment = True
                    line = line[:index]
                except:
                    pass

            if m := compose_line.match(line):
                sequence, char, name, comment = m.groups()

                sequence = [key[1:-1] for key in sequence.strip().split(' ')]
                char = re.sub(r'\\(.)', r'\1', char)

                char_to_sequence[char].append(sequence)

                if name:
                    char_to_name[char].add(name)

                    if name in name_to_char:
                        try:
                            assert name_to_char[name] == char
                        except:
                            print('Line:', line)
                            print('Name:', name)
                            print('Had char:', name_to_char[name])
                            print('Given char:', char)
                            raise
                    else:
                        name_to_char[name] = char

                if char in char_to_comment:
                    try:
                        assert char_to_comment[char] == comment
                    except:
                        print('Line:', line)
                        print('Char:', char)
                        print('Had comment:', char_to_comment[char])
                        print('Given comment:', comment)
                        raise
                else:
                    char_to_comment[char] = comment

    return char_to_sequence, char_to_name, name_to_char, char_to_comment

def merged():
    char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys()

    for name, (char, comment) in keysym_names().items():
        if name in name_to_char:
            try:
                assert name_to_char[name] == char
            except:
                print('Name:', name)
                print('Compose char:', name_to_char[name])
                print('Keysym char:', char)
                raise
        else:
            name_to_char[name] = char

        char_to_name[char].add(name)

        if char in char_to_comment:
            if char_to_comment[char] != comment:
                char_to_comment[char] += ';' + comment
        else:
            char_to_comment[char] = comment

    # Compose symbol:
    try:
        assert '\u2384' not in char_to_name
    except:
        raise ValueError('Please use different symbol for compose key.')
    # Space symbol:
    space_names = char_to_name['\u2423']
    for name in char_to_name[' ']:
        name_to_char[name] = '\u2423'
    # Tab symbol:
    tab_sym_names = char_to_name['\u21e5']
    name_to_char['Tab'] = '\u21e5'

    # Filter out sequences that have keys we don't know how to display nicely.
    # Includes stuff like deadkeys.
    for char in list(char_to_sequence.keys()):
        sequences = char_to_sequence[char]
        new_sequences = []
        for sequence in sequences:
            if any(key in space_names for key in sequence):
                raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
            if any(key in tab_sym_names for key in sequence):
                raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
            for key in sequence:
                if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key):
                    name_to_char[key] = chr(int(key[1:], 16))
            if all(key in name_to_char for key in sequence):
                new_sequences.append(sequence)
        if new_sequences:
            char_to_sequence[char] = new_sequences
        else:
            del char_to_sequence[char]

    # Add spaces so that combining keys become more obvious as combining keys...
    # ...but this is still not obvious, but oh well.
    for name in list(name_to_char.keys()):
        if unicodedata.combining(name_to_char[name]):
            name_to_char[name] = '\u25cc' + name_to_char[name]

    return char_to_sequence, char_to_name, name_to_char, char_to_comment

def uni(char, zf=4):
    return hex(ord(char))[2:].upper().zfill(zf)

def main():
    char_to_sequence, char_to_name, name_to_char, char_to_comment = merged()

    comments_str = ''
    chars_str = ''
    sequences_str = ''

    for char, sequences in char_to_sequence.items():
        comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char] 
        if len(char) == 1:
            comments_str += ';U+' + uni(char) + ';U' + uni(char)
        comments_str += '\n'
        chars_str += char + '\n'
        sequences_str += char + ' ← ' + ' '.join(
                '\u2384' + ''.join(name_to_char[name] for name in sequence)
                for sequence in sequences
                ) + '\n'

    chdir('/home/zgrep/offtopiabday/happybot/compose/')
    with open('comments.txt', 'w') as fh:
        fh.write(comments_str)
    with open('chars.txt', 'w') as fh:
        fh.write(chars_str)
    with open('sequences.txt', 'w') as fh:
        fh.write(sequences_str)

    print('Success!')

# TODO: Deadkey combinations decoding?
# TODO: Allow deadkeys in compose-key combinations?

if __name__ == '__main__':
    main()
We shall compose a sonnet or two together! 2021-07-11 03:21:12 +00:00			`from urllib.request import urlopen`
			`from collections import defaultdict`
			`from copy import deepcopy`
			`from os import chdir`
			`import string`
			`import re`
			`import unicodedata`

			`def keysym_names():`
			`result = dict()`
			`keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s/\[ (]U\+([0-9A-F]+) (.+?)\s\/\s*$')`
			`with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web:`
			`for line in web:`
			`line = line.decode('utf-8', 'ignore').strip()`
			`if m := keysym.match(line):`
			`name, unicode, comment = m.groups()`
			`result[name] = (chr(int(unicode, 16)), comment)`
			`return result`

			`def compose_keys():`
			`compose_line = re.compile(r'^<Multi_key>((?:\s<[a-zA-Z_0-9]+>)+)\s:\s"((?:[^"]\|\\.)+)"\s([a-zA-Z_0-9])\s#\s(.)')`

			`char_to_sequence = defaultdict(list)`
			`char_to_name = defaultdict(set)`
			`char_to_comment = dict()`
			`name_to_char = dict()`

			`with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web:`
			`multilinecomment = False`
			`for line in web:`
			`line = line.decode('utf-8', 'error').strip()`
			`if multilinecomment:`
			`try:`
			`index = line.index('*/')`
			`multilinecomment = False`
			`line = line[index + 2:]`
			`except:`
			`continue`
			`else:`
			`try:`
			`index = line.index('/*')`
			`multilinecomment = True`
			`line = line[:index]`
			`except:`
			`pass`

			`if m := compose_line.match(line):`
			`sequence, char, name, comment = m.groups()`

			`sequence = [key[1:-1] for key in sequence.strip().split(' ')]`
			`char = re.sub(r'\\(.)', r'\1', char)`

			`char_to_sequence[char].append(sequence)`

			`if name:`
			`char_to_name[char].add(name)`

			`if name in name_to_char:`
			`try:`
			`assert name_to_char[name] == char`
			`except:`
			`print('Line:', line)`
			`print('Name:', name)`
			`print('Had char:', name_to_char[name])`
			`print('Given char:', char)`
			`raise`
			`else:`
			`name_to_char[name] = char`

			`if char in char_to_comment:`
			`try:`
			`assert char_to_comment[char] == comment`
			`except:`
			`print('Line:', line)`
			`print('Char:', char)`
			`print('Had comment:', char_to_comment[char])`
			`print('Given comment:', comment)`
			`raise`
			`else:`
			`char_to_comment[char] = comment`

			`return char_to_sequence, char_to_name, name_to_char, char_to_comment`

			`def merged():`
			`char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys()`

			`for name, (char, comment) in keysym_names().items():`
			`if name in name_to_char:`
			`try:`
			`assert name_to_char[name] == char`
			`except:`
			`print('Name:', name)`
			`print('Compose char:', name_to_char[name])`
			`print('Keysym char:', char)`
			`raise`
			`else:`
			`name_to_char[name] = char`

			`char_to_name[char].add(name)`

			`if char in char_to_comment:`
			`if char_to_comment[char] != comment:`
			`char_to_comment[char] += ';' + comment`
			`else:`
			`char_to_comment[char] = comment`

			`# Compose symbol:`
			`try:`
			`assert '\u2384' not in char_to_name`
			`except:`
			`raise ValueError('Please use different symbol for compose key.')`
			`# Space symbol:`
			`space_names = char_to_name['\u2423']`
			`for name in char_to_name[' ']:`
			`name_to_char[name] = '\u2423'`
			`# Tab symbol:`
			`tab_sym_names = char_to_name['\u21e5']`
			`name_to_char['Tab'] = '\u21e5'`

			`# Filter out sequences that have keys we don't know how to display nicely.`
			`# Includes stuff like deadkeys.`
			`for char in list(char_to_sequence.keys()):`
			`sequences = char_to_sequence[char]`
			`new_sequences = []`
			`for sequence in sequences:`
			`if any(key in space_names for key in sequence):`
			`raise ValueError('Please switch to using \\u2420 (\u2420) for space.')`
			`if any(key in tab_sym_names for key in sequence):`
			`raise ValueError('Please switch to using \\u2420 (\u2420) for space.')`
			`for key in sequence:`
			`if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key):`
			`name_to_char[key] = chr(int(key[1:], 16))`
			`if all(key in name_to_char for key in sequence):`
			`new_sequences.append(sequence)`
			`if new_sequences:`
			`char_to_sequence[char] = new_sequences`
			`else:`
			`del char_to_sequence[char]`

			`# Add spaces so that combining keys become more obvious as combining keys...`
			`# ...but this is still not obvious, but oh well.`
			`for name in list(name_to_char.keys()):`
			`if unicodedata.combining(name_to_char[name]):`
			`name_to_char[name] = '\u25cc' + name_to_char[name]`

			`return char_to_sequence, char_to_name, name_to_char, char_to_comment`

			`def uni(char, zf=4):`
			`return hex(ord(char))[2:].upper().zfill(zf)`

			`def main():`
			`char_to_sequence, char_to_name, name_to_char, char_to_comment = merged()`

			`comments_str = ''`
			`chars_str = ''`
			`sequences_str = ''`

			`for char, sequences in char_to_sequence.items():`
			`comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char]`
			`if len(char) == 1:`
			`comments_str += ';U+' + uni(char) + ';U' + uni(char)`
			`comments_str += '\n'`
			`chars_str += char + '\n'`
			`sequences_str += char + ' ← ' + ' '.join(`
			`'\u2384' + ''.join(name_to_char[name] for name in sequence)`
			`for sequence in sequences`
			`) + '\n'`

			`chdir('/home/zgrep/offtopiabday/happybot/compose/')`
			`with open('comments.txt', 'w') as fh:`
			`fh.write(comments_str)`
			`with open('chars.txt', 'w') as fh:`
			`fh.write(chars_str)`
			`with open('sequences.txt', 'w') as fh:`
			`fh.write(sequences_str)`

			`print('Success!')`

			`# TODO: Deadkey combinations decoding?`
			`# TODO: Allow deadkeys in compose-key combinations?`

			`if __name__ == '__main__':`
			`main()`