We shall compose a sonnet or two together!

2021-07-10 23:21:12 -04:00 · 2021-07-10 23:21:12 -04:00 · d58a32d07d
commit d58a32d07d
parent 855375ea18
2 changed files with 207 additions and 0 deletions
--- a/happybot/compose/compose.sh
+++ b/happybot/compose/compose.sh
@ -0,0 +1,24 @@
+#!/usr/bin/env sh
+
+. /home/zgrep/offtopiabday/happybot/common.sh
+
+dir="happybot/compose"
+
+irc | while read -r n m; do
+	if hreg '^(?:@?happybot[:,] |!)compose(?:key)? (.+)$' "$m"; then
+		query="$(m 1)"
+		echo "Got: $query"
+		lines="$(grep -Fin -e "$query" "$dir/chars.txt" | sed 's/:.*$/p/' | tr '\n' ';')"
+		if [ -z "$lines" ]; then
+			echo "| Did not match chars.txt."
+			lines="$(grep -Fin -e "$query" "$dir/comments.txt" | sed 's/:.*$/p/' | tr '\n' ';')"
+		fi
+		echo "| Lines: $lines"
+		if [ -z "$lines" ]; then
+			echo "| No results."
+			var "$n: No results." | zwsp | say
+		else
+			var "$n: $(sed -n "$lines" "$dir/sequences.txt" | sed 's/$/ | /' | tr -d '\n' | sed 's/ | $/\n/')" | zwsp | say
+		fi
+	fi
+done;
--- a/happybot/compose/update.py
+++ b/happybot/compose/update.py
@ -0,0 +1,183 @@
+from urllib.request import urlopen
+from collections import defaultdict
+from copy import deepcopy
+from os import chdir
+import string
+import re
+import unicodedata
+
+def keysym_names():
+    result = dict()
+    keysym = re.compile(r'^#define XK_([a-zA-Z_0-9]+)\s+0x[0-9a-f]+\s*/\*[ (]U\+([0-9A-F]+) (.+?)\s*\*/\s*$')
+    with urlopen('https://cgit.freedesktop.org/xorg/proto/x11proto/plain/keysymdef.h') as web:
+        for line in web:
+            line = line.decode('utf-8', 'ignore').strip()
+            if m := keysym.match(line):
+                name, unicode, comment = m.groups()
+                result[name] = (chr(int(unicode, 16)), comment)
+    return result
+
+def compose_keys():
+    compose_line = re.compile(r'^<Multi_key>((?:\s*<[a-zA-Z_0-9]+>)+)\s*:\s*"((?:[^"]|\\.)+)"\s*([a-zA-Z_0-9]*)\s*#\s*(.*)')
+
+    char_to_sequence = defaultdict(list)
+    char_to_name = defaultdict(set)
+    char_to_comment = dict()
+    name_to_char = dict()
+
+    with urlopen('https://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre') as web:
+        multilinecomment = False
+        for line in web:
+            line = line.decode('utf-8', 'error').strip()
+            if multilinecomment:
+                try:
+                    index = line.index('*/')
+                    multilinecomment = False
+                    line = line[index + 2:]
+                except:
+                    continue
+            else:
+                try:
+                    index = line.index('/*')
+                    multilinecomment = True
+                    line = line[:index]
+                except:
+                    pass
+
+            if m := compose_line.match(line):
+                sequence, char, name, comment = m.groups()
+
+                sequence = [key[1:-1] for key in sequence.strip().split(' ')]
+                char = re.sub(r'\\(.)', r'\1', char)
+
+                char_to_sequence[char].append(sequence)
+
+                if name:
+                    char_to_name[char].add(name)
+
+                    if name in name_to_char:
+                        try:
+                            assert name_to_char[name] == char
+                        except:
+                            print('Line:', line)
+                            print('Name:', name)
+                            print('Had char:', name_to_char[name])
+                            print('Given char:', char)
+                            raise
+                    else:
+                        name_to_char[name] = char
+
+                if char in char_to_comment:
+                    try:
+                        assert char_to_comment[char] == comment
+                    except:
+                        print('Line:', line)
+                        print('Char:', char)
+                        print('Had comment:', char_to_comment[char])
+                        print('Given comment:', comment)
+                        raise
+                else:
+                    char_to_comment[char] = comment
+
+    return char_to_sequence, char_to_name, name_to_char, char_to_comment
+
+def merged():
+    char_to_sequence, char_to_name, name_to_char, char_to_comment = compose_keys()
+
+    for name, (char, comment) in keysym_names().items():
+        if name in name_to_char:
+            try:
+                assert name_to_char[name] == char
+            except:
+                print('Name:', name)
+                print('Compose char:', name_to_char[name])
+                print('Keysym char:', char)
+                raise
+        else:
+            name_to_char[name] = char
+
+        char_to_name[char].add(name)
+
+        if char in char_to_comment:
+            if char_to_comment[char] != comment:
+                char_to_comment[char] += ';' + comment
+        else:
+            char_to_comment[char] = comment
+
+    # Compose symbol:
+    try:
+        assert '\u2384' not in char_to_name
+    except:
+        raise ValueError('Please use different symbol for compose key.')
+    # Space symbol:
+    space_names = char_to_name['\u2423']
+    for name in char_to_name[' ']:
+        name_to_char[name] = '\u2423'
+    # Tab symbol:
+    tab_sym_names = char_to_name['\u21e5']
+    name_to_char['Tab'] = '\u21e5'
+
+    # Filter out sequences that have keys we don't know how to display nicely.
+    # Includes stuff like deadkeys.
+    for char in list(char_to_sequence.keys()):
+        sequences = char_to_sequence[char]
+        new_sequences = []
+        for sequence in sequences:
+            if any(key in space_names for key in sequence):
+                raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
+            if any(key in tab_sym_names for key in sequence):
+                raise ValueError('Please switch to using \\u2420 (\u2420) for space.')
+            for key in sequence:
+                if key not in name_to_char and re.match(r'^U[0-9a-fA-F]+$', key):
+                    name_to_char[key] = chr(int(key[1:], 16))
+            if all(key in name_to_char for key in sequence):
+                new_sequences.append(sequence)
+        if new_sequences:
+            char_to_sequence[char] = new_sequences
+        else:
+            del char_to_sequence[char]
+
+    # Add spaces so that combining keys become more obvious as combining keys...
+    # ...but this is still not obvious, but oh well.
+    for name in list(name_to_char.keys()):
+        if unicodedata.combining(name_to_char[name]):
+            name_to_char[name] = '\u25cc' + name_to_char[name]
+
+    return char_to_sequence, char_to_name, name_to_char, char_to_comment
+
+def uni(char, zf=4):
+    return hex(ord(char))[2:].upper().zfill(zf)
+
+def main():
+    char_to_sequence, char_to_name, name_to_char, char_to_comment = merged()
+
+    comments_str = ''
+    chars_str = ''
+    sequences_str = ''
+
+    for char, sequences in char_to_sequence.items():
+        comments_str += ';'.join(char_to_name[char]) + ';' + char_to_comment[char] 
+        if len(char) == 1:
+            comments_str += ';U+' + uni(char) + ';U' + uni(char)
+        comments_str += '\n'
+        chars_str += char + '\n'
+        sequences_str += char + ' ← ' + ' '.join(
+                '\u2384' + ''.join(name_to_char[name] for name in sequence)
+                for sequence in sequences
+                ) + '\n'
+
+    chdir('/home/zgrep/offtopiabday/happybot/compose/')
+    with open('comments.txt', 'w') as fh:
+        fh.write(comments_str)
+    with open('chars.txt', 'w') as fh:
+        fh.write(chars_str)
+    with open('sequences.txt', 'w') as fh:
+        fh.write(sequences_str)
+
+    print('Success!')
+
+# TODO: Deadkey combinations decoding?
+# TODO: Allow deadkeys in compose-key combinations?
+
+if __name__ == '__main__':
+    main()