Move to a space-separated format

2018-08-30 00:16:22 +03:00 · 2018-08-30 00:16:22 +03:00 · 3a9c4baa4b
parent 5397293f33
commit 3a9c4baa4b
4 changed files with 117 additions and 76 deletions
--- a/src/hashing.py
+++ b/src/hashing.py
@ -1,3 +1,4 @@
 import base64
 import hashlib
 import os
@ -23,3 +24,44 @@ def hash_host(host):
 	salt = generate_salt()
 	hashed_host = hash_with_salt(host, salt)
 	return salt, hashed_host
 def base64enc(b):
 	"""base64enc(bytes) → bytes
 	Uses no padding"""
 	# Base 64 encodes 3 bytes as 4 characters
 	# /byte 1\/byte 2\/byte 3\
 	# ABCDEFGHijklmnopQRSTUVWX
 	# \64 1/\64 2/\64 3/\64 4/
 	#
 	# If you have only one or two bytes, you don't have enough bits to
 	# fill all of the characters. The rest of the bits will be taken to
 	# be zeroes.
 	# /byte 1\
 	# ABCDEFGH0000
 	# \64 1/\64 2/
 	# /byte 1\
 	#
 	# /byte 1\/byte 2\
 	# ABCDEFGHijklmnop00
 	# \64 1/\64 2/\64 3/
 	#
 	# This way you end up with only 2 or 3 characters containing info.
 	# This usually gets padded into a multiple of 4 with =. However,
 	# since the amount of bytes left over mod 4 is enough to generate
 	# the padding back, we can strip it out
 	return base64.b64encode(b).replace(b'=', b'')
 def base64dec(b64):
 	"""base64dec(bytes) → bytes
 	Can handle lack of padding."""
 	assert type(b64) == bytes
 	# Padded base64 is always a multiple of 4 bytes in length. The
 	# reasoning for this is because base64 decoding operates in groups
 	# of 4 base64 characters.
 	# Since we know the length of the string minus the padding, we can
 	# just pad it to the nearest multiple of 4
 	missing_padding_len = (4 - len(b64)%4) % 4
 	padding = b'=' * missing_padding_len
 	return base64.b64decode(b64 + padding, validate = True)
--- a/src/read_file.py
+++ b/src/read_file.py
@ -1,4 +1,4 @@
-import base64
+import hashing
 import entry
@ -30,88 +30,79 @@ def parse_header(header):
 	(if any) if it is"""
 	assert type(header) == bytes
-	magic = header[0:6]
+	# Check that it ends in a newline
-	if magic != b'SSHWOT':
+	if len(header) == 0 or header[-1] != 0x0a:
 		raise FileFormatError('No newline after header')
 	# Split it into fields and make sure we have at least the magic and
 	# the version
 	fields = header[:-1].split(b' ', 3)
 	if len(fields) < 2:
 		raise FileFormatError('Too few fields in the header, expected at least magic and version')
 	# Check the magic
 	if fields[0] != b'SSHWOT':
 		raise FileFormatError('Invalid magic')
 	# Version 0 is the current one
 	version = header[6:7]
 	if version == b'':
 		raise FileFormatError('No newline after header')
 	if version != b'0':
 		raise VersionMismatch('Version %i not supported' % version[0])
 	# See if we have a comment
-	if header[7:8] == b' ':
+	if len(fields) == 3:
 		# It says we have
-		if header[8:9] == b'\n':
+		if len(fields[2]) == 0:
 			# No, we don't, but we do have a space telling we
 			# have. The header is malformed
 			raise FileFormatError('Missing comment or spurious space in the header')
 		else:
-			# Yes, we do
+			# Yes, we do. Extract it
 			# Check it ends with a newline
 			if header[-1] != 0x0a:
 				raise FileFormatError('Missing newline at the end of the header')
 			try:
-				file_comment = header[8:-1].decode('utf-8')
+				file_comment = fields[2].decode('utf-8')
 			except UnicodeDecodeError:
 				raise FileFormatError('Comment is not valid utf-8')
 			return file_comment
 	elif header[7:8] == b'\n':
 		# No, we have newline
 		return ''
 	else:
-		# No, we have something else
+		file_comment = ''
-		raise FileFormatError("Expected a space or a newline but got '%s' instead" % header[7:].decode('utf-8'))
+
 	return file_comment
 def parse_entry(line):
 	"""parse_entry(bytes) → Entry"""
 	assert type(line) == bytes
-	def extract_b64_field(rest):
+	def decode_b64_field(b64):
 		"""extract_b64_field(bytes) → (bytes: decoded_field, bytes:rest)"""
 		field_b64 = rest[0:44]
 		if len(field_b64) != 44:
 			raise FileFormatError('Unexpected end of line')
 		try:
-			field = base64.b64decode(field_b64, validate = True)
+			return hashing.base64dec(b64)
 		except (ValueError, base64.binascii.Error) as err:
-			raise FileFormatError('Malformed base64 string: %s' % field_b64.decode('utf-8')) from err
+			raise FileFormatError('Malformed base64 string: %s' % b64.decode('utf-8')) from err
-		return field, rest[44:]
+	# Check that it ends in a newline
 	if len(line) == 0 or line[-1] != 0x0a:
 		raise FileFormatError('No newline after entry')
-	salt, rest = extract_b64_field(line)
+	# Split the line into fields and make sure we have at least the
-	hashed_host, rest = extract_b64_field(rest)
+	# salt, the hashed host, and the fingerprint
-	fingerprint, rest = extract_b64_field(rest)
+	fields = line[:-1].split(b' ', 3)
 	if len(fields) < 3:
 		raise FileFormatError('Too few fields in the entry, expected in the very least salt, hashed host, and fingerprint')
-	# What do we have after that?
+	salt = decode_b64_field(fields[0])
-	if rest[0:1] == b' ':
+	hashed_host = decode_b64_field(fields[1])
-		# A comment?
+	fingerprint = decode_b64_field(fields[2])
-		if rest[1:2] == b'\n':
+
-			# No, but it says we have. It's malformed
+	# See if we have a comment
 	if len(fields) == 4:
 		# It says we have
 		if len(fields[3]) == 0:
 			# No, we don't, but we do have a space telling we
 			# have. The header is malformed
 			raise FileFormatError('Missing comment or spurious space in the entry')
 		else:
-			# Yes. Make sure it ends in a newline
+			# Yes, we do. Extract it
 			if rest[-1] != 0x0a:
 				raise FileFormatError('No newline after entry')
 			try:
-				comment = rest[1:-1].decode('utf-8')
+				comment = fields[3].decode('utf-8')
 			except UnicodeDecodeError:
 				raise FileFormatError('Comment is not valid utf-8')
 	elif rest[0:1] == b'\n':
 		# A newline
 		comment = ''
 	else:
-		# Something else
+		comment = ''
 		raise FileFormatError('Expected a space or a newline but got "%s" instead' % rest.decode('utf-8'))
 	return entry.Entry(salt, hashed_host, fingerprint, comment)
--- a/src/write_file.py
+++ b/src/write_file.py
@ -1,4 +1,4 @@
-import base64
+import hashing
 def write_header(f, file_comment):
 	"""write_header(file(wb), str)
@ -6,6 +6,8 @@ def write_header(f, file_comment):
 	assert type(file_comment) == str
 	# b'SSHWOT' magic
 	f.write(b'SSHWOT')
 	# Separating space
 	f.write(b' ')
 	# Version number
 	f.write(b'0')
 	# b' ' + file_comment, if there is one
@ -24,14 +26,20 @@ def write_entry(f, salt, hashed_host, fingerprint, comment):
 	assert type(fingerprint) == bytes and len(fingerprint) == 32
 	assert type(comment) == str
-	# base64 encoded (44 bytes): salt
+	# base64 encoded salt
-	f.write(base64.b64encode(salt))
+	f.write(hashing.base64enc(salt))
-	# base64 encoded (44 bytes): hashed_host
+	# Separating space
-	f.write(base64.b64encode(hashed_host))
+	f.write(b' ')
-	# base64 encoded (44 bytes): fingerprint
+	# base64 encoded hashed_host
-	f.write(base64.b64encode(fingerprint))
+	f.write(hashing.base64enc(hashed_host))
 	# Separating space
 	f.write(b' ')
 	# base64 encoded fingerprint
 	f.write(hashing.base64enc(fingerprint))
 	# b' ' + comment, if there is one
 	if len(comment) > 0:
--- a/sshwot-format.text
+++ b/sshwot-format.text
@ -1,34 +1,34 @@
 Please note that all text insire quotes in the EBNF here is to be taken to
-mean bytes that would decode as that using either the ASCII or the UTF-8
+mean bytes that would decode as that using the ASCII. character encoding.
-character encoding. "\n" refers specifically to the byte 0x0a, and no
+"\n" refers specifically to the byte 0x0a, and no alternative newlines are
-alternative newlines are acceptable.
+acceptable.
 The file has a header like:
 magic   = "SSHWOT" ;
 version = "0" ;
-comment = " ", ? General comment about the file. Valid utf-8, no '\n'. ? ;
+comment = ? General comment about the file. Valid utf-8, no '\n'. ? ;
-header  = magic, version, [comment], "\n" ;
+header  = magic, " ", version, [" ", comment], "\n" ;
-Examples of valid headers would be "SSHWOT0\n" and "SSHWOT0 Emma G. 2018\n".
+Examples of valid headers would be "SSHWOT 0\n" and "SSHWOT 0 Emma G. 2018\n".
-"SSHWOT0 \n" is not valid, since a space marks that there will be a comment.
+"SSHWOT 0 \n" is not valid, since a space marks that there will be a comment.
 After the header the entries are laid out as:
-salt        = ? base64 encoded salt, 44 bytes long ? ;
+salt        = ? base64(salt) ? ;
-hashed host = ? base64 encoded sha256(host concat salt), 44 bytes long ? ;
+hashed host = ? base64(sha256(host concat salt)) ? ;
-fingerprint = ? base64 encoded sha256-fingerprint, 44 bytes long ? ;
+fingerprint = ? base64(sha256-fingerprint) ? ;
-comment     = " ", ? Comment about the host/key. Valid utf-8, no '\n'. ? ;
+comment     = ? Comment about the host/key. Valid utf-8, no '\n'. ? ;
-entry       = salt, hashed host, fingerprint, [comment], "\n" ;
+entry       = salt, " ", hashed host, " ", fingerprint, [" ", comment], "\n" ;
-The version of base64 used uses + for 62 and / for 63, uses = for padding,
+The version of base64 used uses + for 62 and / for 63, doesn't use = for
-and contains no breaks.
+padding, and contains no breaks.
 Examples of valid entries are
-"Yixx+B6zrFoubPhBddgyx0nXHmbqMW1Wzneo4JqJv0U=yPUACFC/zPt/ENoIluOuWiTXor3r7oHhac63qej637E=QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2A=\n"
+"Yixx+B6zrFoubPhBddgyx0nXHmbqMW1Wzneo4JqJv0U yPUACFC/zPt/ENoIluOuWiTXor3r7oHhac63qej637E QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVpbXF1eX2A\n"
 and
-"bd/MfFs+DMVqNQQoZGGCvpTopeS0/Jt6GS5vg7J+638=cbbdTnuIh0ZwnM+/r3sAu4iHgaN3mpkcP9kJND4vBUo=YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4A= The old one\n"
+"bd/MfFs+DMVqNQQoZGGCvpTopeS0/Jt6GS5vg7J+638 cbbdTnuIh0ZwnM+/r3sAu4iHgaN3mpkcP9kJND4vBUo YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXp7fH1+f4A The old one\n"
 Again, if there is a space following the necessary parts, there must also be
 a comment or else the entry is malformed.