Move to a space-separated format

2018-08-30 00:16:22 +03:00 · 2018-08-30 00:16:22 +03:00 · 3a9c4baa4b
parent 5397293f33
commit 3a9c4baa4b
4 changed files with 117 additions and 76 deletions
--- a/src/hashing.py
+++ b/src/hashing.py
@ -1,3 +1,4 @@
+import base64
 import hashlib
 import os

@ -23,3 +24,44 @@ def hash_host(host):
 	salt = generate_salt()
 	hashed_host = hash_with_salt(host, salt)
 	return salt, hashed_host
+
+def base64enc(b):
+	"""base64enc(bytes) → bytes
+	Uses no padding"""
+	# Base 64 encodes 3 bytes as 4 characters
+	# /byte 1\/byte 2\/byte 3\
+	# ABCDEFGHijklmnopQRSTUVWX
+	# \64 1/\64 2/\64 3/\64 4/
+	#
+	# If you have only one or two bytes, you don't have enough bits to
+	# fill all of the characters. The rest of the bits will be taken to
+	# be zeroes.
+	# /byte 1\
+	# ABCDEFGH0000
+	# \64 1/\64 2/
+	# /byte 1\
+	#
+	# /byte 1\/byte 2\
+	# ABCDEFGHijklmnop00
+	# \64 1/\64 2/\64 3/
+	#
+	# This way you end up with only 2 or 3 characters containing info.
+	# This usually gets padded into a multiple of 4 with =. However,
+	# since the amount of bytes left over mod 4 is enough to generate
+	# the padding back, we can strip it out
+	return base64.b64encode(b).replace(b'=', b'')
+
+def base64dec(b64):
+	"""base64dec(bytes) → bytes
+	Can handle lack of padding."""
+	assert type(b64) == bytes
+
+	# Padded base64 is always a multiple of 4 bytes in length. The
+	# reasoning for this is because base64 decoding operates in groups
+	# of 4 base64 characters.
+	# Since we know the length of the string minus the padding, we can
+	# just pad it to the nearest multiple of 4
+
+	missing_padding_len = (4 - len(b64)%4) % 4
+	padding = b'=' * missing_padding_len
+	return base64.b64decode(b64 + padding, validate = True)
--- a/src/read_file.py
+++ b/src/read_file.py
@ -1,4 +1,4 @@
-import base64
+import hashing

 import entry

@ -30,88 +30,79 @@ def parse_header(header):
 	(if any) if it is"""
 	assert type(header) == bytes

-	magic = header[0:6]
-	if magic != b'SSHWOT':
+	# Check that it ends in a newline
+	if len(header) == 0 or header[-1] != 0x0a:
+		raise FileFormatError('No newline after header')
+
+	# Split it into fields and make sure we have at least the magic and
+	# the version
+	fields = header[:-1].split(b' ', 3)
+	if len(fields) < 2:
+		raise FileFormatError('Too few fields in the header, expected at least magic and version')
+
+	# Check the magic
+	if fields[0] != b'SSHWOT':
 		raise FileFormatError('Invalid magic')

-	# Version 0 is the current one
-	version = header[6:7]
-	if version == b'':
-		raise FileFormatError('No newline after header')
-	if version != b'0':
-		raise VersionMismatch('Version %i not supported' % version[0])
-
 	# See if we have a comment
-	if header[7:8] == b' ':
+	if len(fields) == 3:
 		# It says we have
-		if header[8:9] == b'\n':
+		if len(fields[2]) == 0:
 			# No, we don't, but we do have a space telling we
 			# have. The header is malformed
 			raise FileFormatError('Missing comment or spurious space in the header')
 		else:
-			# Yes, we do
-			# Check it ends with a newline
-			if header[-1] != 0x0a:
-				raise FileFormatError('Missing newline at the end of the header')
-
+			# Yes, we do. Extract it
 			try:
-				file_comment = header[8:-1].decode('utf-8')
+				file_comment = fields[2].decode('utf-8')
 			except UnicodeDecodeError:
 				raise FileFormatError('Comment is not valid utf-8')

-			return file_comment
-
-	elif header[7:8] == b'\n':
-		# No, we have newline
-		return ''
-
 	else:
-		# No, we have something else
-		raise FileFormatError("Expected a space or a newline but got '%s' instead" % header[7:].decode('utf-8'))
+		file_comment = ''
+
+	return file_comment

 def parse_entry(line):
 	"""parse_entry(bytes) → Entry"""
 	assert type(line) == bytes

-	def extract_b64_field(rest):
-		"""extract_b64_field(bytes) → (bytes: decoded_field, bytes:rest)"""
-		field_b64 = rest[0:44]
-		if len(field_b64) != 44:
-			raise FileFormatError('Unexpected end of line')
+	def decode_b64_field(b64):
 		try:
-			field = base64.b64decode(field_b64, validate = True)
+			return hashing.base64dec(b64)
 		except (ValueError, base64.binascii.Error) as err:
-			raise FileFormatError('Malformed base64 string: %s' % field_b64.decode('utf-8')) from err
+			raise FileFormatError('Malformed base64 string: %s' % b64.decode('utf-8')) from err

-		return field, rest[44:]
+	# Check that it ends in a newline
+	if len(line) == 0 or line[-1] != 0x0a:
+		raise FileFormatError('No newline after entry')

-	salt, rest = extract_b64_field(line)
-	hashed_host, rest = extract_b64_field(rest)
-	fingerprint, rest = extract_b64_field(rest)
+	# Split the line into fields and make sure we have at least the
+	# salt, the hashed host, and the fingerprint
+	fields = line[:-1].split(b' ', 3)
+	if len(fields) < 3:
+		raise FileFormatError('Too few fields in the entry, expected in the very least salt, hashed host, and fingerprint')

-	# What do we have after that?
-	if rest[0:1] == b' ':
-		# A comment?
-		if rest[1:2] == b'\n':
-			# No, but it says we have. It's malformed
+	salt = decode_b64_field(fields[0])
+	hashed_host = decode_b64_field(fields[1])
+	fingerprint = decode_b64_field(fields[2])
+
+	# See if we have a comment
+	if len(fields) == 4:
+		# It says we have
+		if len(fields[3]) == 0:
+			# No, we don't, but we do have a space telling we
+			# have. The header is malformed
 			raise FileFormatError('Missing comment or spurious space in the entry')
 		else:
-			# Yes. Make sure it ends in a newline
-			if rest[-1] != 0x0a:
-				raise FileFormatError('No newline after entry')
-
+			# Yes, we do. Extract it
 			try:
-				comment = rest[1:-1].decode('utf-8')
+				comment = fields[3].decode('utf-8')
 			except UnicodeDecodeError:
 				raise FileFormatError('Comment is not valid utf-8')

-	elif rest[0:1] == b'\n':
-		# A newline
-		comment = ''
-
 	else:
-		# Something else
-		raise FileFormatError('Expected a space or a newline but got "%s" instead' % rest.decode('utf-8'))
+		comment = ''

 	return entry.Entry(salt, hashed_host, fingerprint, comment)

--- a/src/write_file.py
+++ b/src/write_file.py
@ -1,4 +1,4 @@
-import base64
+import hashing

 def write_header(f, file_comment):
 	"""write_header(file(wb), str)
@ -6,6 +6,8 @@ def write_header(f, file_comment):
 	assert type(file_comment) == str
 	# b'SSHWOT' magic
 	f.write(b'SSHWOT')
+	# Separating space
+	f.write(b' ')
 	# Version number
 	f.write(b'0')
 	# b' ' + file_comment, if there is one
@ -24,14 +26,20 @@ def write_entry(f, salt, hashed_host, fingerprint, comment):
 	assert type(fingerprint) == bytes and len(fingerprint) == 32
 	assert type(comment) == str

-	# base64 encoded (44 bytes): salt
-	f.write(base64.b64encode(salt))
+	# base64 encoded salt
+	f.write(hashing.base64enc(salt))

-	# base64 encoded (44 bytes): hashed_host
-	f.write(base64.b64encode(hashed_host))
+	# Separating space
+	f.write(b' ')

-	# base64 encoded (44 bytes): fingerprint
-	f.write(base64.b64encode(fingerprint))
+	# base64 encoded hashed_host
+	f.write(hashing.base64enc(hashed_host))
+
+	# Separating space
+	f.write(b' ')
+
+	# base64 encoded fingerprint
+	f.write(hashing.base64enc(fingerprint))

 	# b' ' + comment, if there is one
 	if len(comment) > 0:
--- a/sshwot-format.text
+++ b/sshwot-format.text
@ -1,34 +1,34 @@
 Please note that all text insire quotes in the EBNF here is to be taken to
-mean bytes that would decode as that using either the ASCII or the UTF-8
-character encoding. "\n" refers specifically to the byte 0x0a, and no
-alternative newlines are acceptable.
+mean bytes that would decode as that using the ASCII. character encoding.
+"\n" refers specifically to the byte 0x0a, and no alternative newlines are
+acceptable.

 The file has a header like:

 magic   = "SSHWOT" ;
 version = "0" ;
-comment = " ", ? General comment about the file. Valid utf-8, no '\n'. ? ;
-header  = magic, version, [comment], "\n" ;
+comment = ? General comment about the file. Valid utf-8, no '\n'. ? ;
+header  = magic, " ", version, [" ", comment], "\n" ;

-Examples of valid headers would be "SSHWOT0\n" and "SSHWOT0 Emma G. 2018\n".
+Examples of valid headers would be "SSHWOT 0\n" and "SSHWOT 0 Emma G. 2018\n".

-"SSHWOT0 \n" is not valid, since a space marks that there will be a comment.
+"SSHWOT 0 \n" is not valid, since a space marks that there will be a comment.

 After the header the entries are laid out as:

-salt        = ? base64 encoded salt, 44 bytes long ? ;
-hashed host = ? base64 encoded sha256(host concat salt), 44 bytes long ? ;
-fingerprint = ? base64 encoded sha256-fingerprint, 44 bytes long ? ;
-comment     = " ", ? Comment about the host/key. Valid utf-8, no '\n'. ? ;
-entry       = salt, hashed host, fingerprint, [comment], "\n" ;
+salt        = ? base64(salt) ? ;
+hashed host = ? base64(sha256(host concat salt)) ? ;
+fingerprint = ? base64(sha256-fingerprint) ? ;
+comment     = ? Comment about the host/key. Valid utf-8, no '\n'. ? ;
+entry       = salt, " ", hashed host, " ", fingerprint, [" ", comment], "\n" ;

-The version of base64 used uses + for 62 and / for 63, uses = for padding,
-and contains no breaks.
+The version of base64 used uses + for 62 and / for 63, doesn't use = for
+padding, and contains no breaks.

 Examples of valid entries are
-"Yixx+B6zrFoubPhBddgyx0nXHmbqMW1Wzneo4JqJv0U=yPUACFC/zPt/ENoIluOuWiTXor3r7oHhac63qej637E=QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2A=\n"
+"Yixx+B6zrFoubPhBddgyx0nXHmbqMW1Wzneo4JqJv0U yPUACFC/zPt/ENoIluOuWiTXor3r7oHhac63qej637E QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2A\n"
 and
-"bd/MfFs+DMVqNQQoZGGCvpTopeS0/Jt6GS5vg7J+638=cbbdTnuIh0ZwnM+/r3sAu4iHgaN3mpkcP9kJND4vBUo=YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4A= The old one\n"
+"bd/MfFs+DMVqNQQoZGGCvpTopeS0/Jt6GS5vg7J+638 cbbdTnuIh0ZwnM+/r3sAu4iHgaN3mpkcP9kJND4vBUo YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4A The old one\n"

 Again, if there is a space following the necessary parts, there must also be
 a comment or else the entry is malformed.