From 3a9c4baa4b99d77627092c12d27a693c5cdf711a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juhani=20Krekel=C3=A4?= Date: Thu, 30 Aug 2018 00:16:22 +0300 Subject: [PATCH] Move to a space-separated format --- src/hashing.py | 42 ++++++++++++++++++++ src/read_file.py | 97 +++++++++++++++++++++------------------------- src/write_file.py | 22 +++++++---- sshwot-format.text | 32 +++++++-------- 4 files changed, 117 insertions(+), 76 deletions(-) diff --git a/src/hashing.py b/src/hashing.py index e6a33d4..0b8ff23 100644 --- a/src/hashing.py +++ b/src/hashing.py @@ -1,3 +1,4 @@ +import base64 import hashlib import os @@ -23,3 +24,44 @@ def hash_host(host): salt = generate_salt() hashed_host = hash_with_salt(host, salt) return salt, hashed_host + +def base64enc(b): + """base64enc(bytes) → bytes + Uses no padding""" + # Base 64 encodes 3 bytes as 4 characters + # /byte 1\/byte 2\/byte 3\ + # ABCDEFGHijklmnopQRSTUVWX + # \64 1/\64 2/\64 3/\64 4/ + # + # If you have only one or two bytes, you don't have enough bits to + # fill all of the characters. The rest of the bits will be taken to + # be zeroes. + # /byte 1\ + # ABCDEFGH0000 + # \64 1/\64 2/ + # /byte 1\ + # + # /byte 1\/byte 2\ + # ABCDEFGHijklmnop00 + # \64 1/\64 2/\64 3/ + # + # This way you end up with only 2 or 3 characters containing info. + # This usually gets padded into a multiple of 4 with =. However, + # since the amount of bytes left over mod 4 is enough to generate + # the padding back, we can strip it out + return base64.b64encode(b).replace(b'=', b'') + +def base64dec(b64): + """base64dec(bytes) → bytes + Can handle lack of padding.""" + assert type(b64) == bytes + + # Padded base64 is always a multiple of 4 bytes in length. The + # reasoning for this is because base64 decoding operates in groups + # of 4 base64 characters. + # Since we know the length of the string minus the padding, we can + # just pad it to the nearest multiple of 4 + + missing_padding_len = (4 - len(b64)%4) % 4 + padding = b'=' * missing_padding_len + return base64.b64decode(b64 + padding, validate = True) diff --git a/src/read_file.py b/src/read_file.py index 619a645..8135104 100644 --- a/src/read_file.py +++ b/src/read_file.py @@ -1,4 +1,4 @@ -import base64 +import hashing import entry @@ -30,88 +30,79 @@ def parse_header(header): (if any) if it is""" assert type(header) == bytes - magic = header[0:6] - if magic != b'SSHWOT': + # Check that it ends in a newline + if len(header) == 0 or header[-1] != 0x0a: + raise FileFormatError('No newline after header') + + # Split it into fields and make sure we have at least the magic and + # the version + fields = header[:-1].split(b' ', 3) + if len(fields) < 2: + raise FileFormatError('Too few fields in the header, expected at least magic and version') + + # Check the magic + if fields[0] != b'SSHWOT': raise FileFormatError('Invalid magic') - # Version 0 is the current one - version = header[6:7] - if version == b'': - raise FileFormatError('No newline after header') - if version != b'0': - raise VersionMismatch('Version %i not supported' % version[0]) - # See if we have a comment - if header[7:8] == b' ': + if len(fields) == 3: # It says we have - if header[8:9] == b'\n': + if len(fields[2]) == 0: # No, we don't, but we do have a space telling we # have. The header is malformed raise FileFormatError('Missing comment or spurious space in the header') else: - # Yes, we do - # Check it ends with a newline - if header[-1] != 0x0a: - raise FileFormatError('Missing newline at the end of the header') - + # Yes, we do. Extract it try: - file_comment = header[8:-1].decode('utf-8') + file_comment = fields[2].decode('utf-8') except UnicodeDecodeError: raise FileFormatError('Comment is not valid utf-8') - return file_comment - - elif header[7:8] == b'\n': - # No, we have newline - return '' - else: - # No, we have something else - raise FileFormatError("Expected a space or a newline but got '%s' instead" % header[7:].decode('utf-8')) + file_comment = '' + + return file_comment def parse_entry(line): """parse_entry(bytes) → Entry""" assert type(line) == bytes - def extract_b64_field(rest): - """extract_b64_field(bytes) → (bytes: decoded_field, bytes:rest)""" - field_b64 = rest[0:44] - if len(field_b64) != 44: - raise FileFormatError('Unexpected end of line') + def decode_b64_field(b64): try: - field = base64.b64decode(field_b64, validate = True) + return hashing.base64dec(b64) except (ValueError, base64.binascii.Error) as err: - raise FileFormatError('Malformed base64 string: %s' % field_b64.decode('utf-8')) from err + raise FileFormatError('Malformed base64 string: %s' % b64.decode('utf-8')) from err - return field, rest[44:] + # Check that it ends in a newline + if len(line) == 0 or line[-1] != 0x0a: + raise FileFormatError('No newline after entry') - salt, rest = extract_b64_field(line) - hashed_host, rest = extract_b64_field(rest) - fingerprint, rest = extract_b64_field(rest) + # Split the line into fields and make sure we have at least the + # salt, the hashed host, and the fingerprint + fields = line[:-1].split(b' ', 3) + if len(fields) < 3: + raise FileFormatError('Too few fields in the entry, expected in the very least salt, hashed host, and fingerprint') - # What do we have after that? - if rest[0:1] == b' ': - # A comment? - if rest[1:2] == b'\n': - # No, but it says we have. It's malformed + salt = decode_b64_field(fields[0]) + hashed_host = decode_b64_field(fields[1]) + fingerprint = decode_b64_field(fields[2]) + + # See if we have a comment + if len(fields) == 4: + # It says we have + if len(fields[3]) == 0: + # No, we don't, but we do have a space telling we + # have. The header is malformed raise FileFormatError('Missing comment or spurious space in the entry') else: - # Yes. Make sure it ends in a newline - if rest[-1] != 0x0a: - raise FileFormatError('No newline after entry') - + # Yes, we do. Extract it try: - comment = rest[1:-1].decode('utf-8') + comment = fields[3].decode('utf-8') except UnicodeDecodeError: raise FileFormatError('Comment is not valid utf-8') - elif rest[0:1] == b'\n': - # A newline - comment = '' - else: - # Something else - raise FileFormatError('Expected a space or a newline but got "%s" instead' % rest.decode('utf-8')) + comment = '' return entry.Entry(salt, hashed_host, fingerprint, comment) diff --git a/src/write_file.py b/src/write_file.py index 60614eb..0230384 100644 --- a/src/write_file.py +++ b/src/write_file.py @@ -1,4 +1,4 @@ -import base64 +import hashing def write_header(f, file_comment): """write_header(file(wb), str) @@ -6,6 +6,8 @@ def write_header(f, file_comment): assert type(file_comment) == str # b'SSHWOT' magic f.write(b'SSHWOT') + # Separating space + f.write(b' ') # Version number f.write(b'0') # b' ' + file_comment, if there is one @@ -24,14 +26,20 @@ def write_entry(f, salt, hashed_host, fingerprint, comment): assert type(fingerprint) == bytes and len(fingerprint) == 32 assert type(comment) == str - # base64 encoded (44 bytes): salt - f.write(base64.b64encode(salt)) + # base64 encoded salt + f.write(hashing.base64enc(salt)) - # base64 encoded (44 bytes): hashed_host - f.write(base64.b64encode(hashed_host)) + # Separating space + f.write(b' ') - # base64 encoded (44 bytes): fingerprint - f.write(base64.b64encode(fingerprint)) + # base64 encoded hashed_host + f.write(hashing.base64enc(hashed_host)) + + # Separating space + f.write(b' ') + + # base64 encoded fingerprint + f.write(hashing.base64enc(fingerprint)) # b' ' + comment, if there is one if len(comment) > 0: diff --git a/sshwot-format.text b/sshwot-format.text index 04e6764..5c84651 100644 --- a/sshwot-format.text +++ b/sshwot-format.text @@ -1,34 +1,34 @@ Please note that all text insire quotes in the EBNF here is to be taken to -mean bytes that would decode as that using either the ASCII or the UTF-8 -character encoding. "\n" refers specifically to the byte 0x0a, and no -alternative newlines are acceptable. +mean bytes that would decode as that using the ASCII. character encoding. +"\n" refers specifically to the byte 0x0a, and no alternative newlines are +acceptable. The file has a header like: magic = "SSHWOT" ; version = "0" ; -comment = " ", ? General comment about the file. Valid utf-8, no '\n'. ? ; -header = magic, version, [comment], "\n" ; +comment = ? General comment about the file. Valid utf-8, no '\n'. ? ; +header = magic, " ", version, [" ", comment], "\n" ; -Examples of valid headers would be "SSHWOT0\n" and "SSHWOT0 Emma G. 2018\n". +Examples of valid headers would be "SSHWOT 0\n" and "SSHWOT 0 Emma G. 2018\n". -"SSHWOT0 \n" is not valid, since a space marks that there will be a comment. +"SSHWOT 0 \n" is not valid, since a space marks that there will be a comment. After the header the entries are laid out as: -salt = ? base64 encoded salt, 44 bytes long ? ; -hashed host = ? base64 encoded sha256(host concat salt), 44 bytes long ? ; -fingerprint = ? base64 encoded sha256-fingerprint, 44 bytes long ? ; -comment = " ", ? Comment about the host/key. Valid utf-8, no '\n'. ? ; -entry = salt, hashed host, fingerprint, [comment], "\n" ; +salt = ? base64(salt) ? ; +hashed host = ? base64(sha256(host concat salt)) ? ; +fingerprint = ? base64(sha256-fingerprint) ? ; +comment = ? Comment about the host/key. Valid utf-8, no '\n'. ? ; +entry = salt, " ", hashed host, " ", fingerprint, [" ", comment], "\n" ; -The version of base64 used uses + for 62 and / for 63, uses = for padding, -and contains no breaks. +The version of base64 used uses + for 62 and / for 63, doesn't use = for +padding, and contains no breaks. Examples of valid entries are -"Yixx+B6zrFoubPhBddgyx0nXHmbqMW1Wzneo4JqJv0U=yPUACFC/zPt/ENoIluOuWiTXor3r7oHhac63qej637E=QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2A=\n" +"Yixx+B6zrFoubPhBddgyx0nXHmbqMW1Wzneo4JqJv0U yPUACFC/zPt/ENoIluOuWiTXor3r7oHhac63qej637E QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2A\n" and -"bd/MfFs+DMVqNQQoZGGCvpTopeS0/Jt6GS5vg7J+638=cbbdTnuIh0ZwnM+/r3sAu4iHgaN3mpkcP9kJND4vBUo=YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4A= The old one\n" +"bd/MfFs+DMVqNQQoZGGCvpTopeS0/Jt6GS5vg7J+638 cbbdTnuIh0ZwnM+/r3sAu4iHgaN3mpkcP9kJND4vBUo YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4A The old one\n" Again, if there is a space following the necessary parts, there must also be a comment or else the entry is malformed.