Add checksum(1).

This commit is contained in:
Jonas 'Sortie' Termansen 2020-09-27 22:15:20 +02:00
parent f22d736633
commit 6f7162a0b1
5 changed files with 759 additions and 0 deletions

View file

@ -11,6 +11,7 @@ dispd \
libmount \
bench \
carray \
checksum \
disked \
editor \
ext \

2
checksum/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
checksum
*.o

34
checksum/Makefile Normal file
View file

@ -0,0 +1,34 @@
SOFTWARE_MEANT_FOR_SORTIX=1
include ../build-aux/platform.mak
include ../build-aux/compiler.mak
include ../build-aux/version.mak
include ../build-aux/dirs.mak
OPTLEVEL?=$(DEFAULT_OPTLEVEL)
CFLAGS?=$(OPTLEVEL)
CFLAGS += -Wall -Wextra
all: checksum
.PHONY: all install clean
install: all
mkdir -p $(DESTDIR)$(BINDIR)
install checksum $(DESTDIR)$(BINDIR)
ln -f $(DESTDIR)$(BINDIR)/checksum $(DESTDIR)$(BINDIR)/sha224sum
ln -f $(DESTDIR)$(BINDIR)/checksum $(DESTDIR)$(BINDIR)/sha256sum
ln -f $(DESTDIR)$(BINDIR)/checksum $(DESTDIR)$(BINDIR)/sha384sum
ln -f $(DESTDIR)$(BINDIR)/checksum $(DESTDIR)$(BINDIR)/sha512sum
mkdir -p $(DESTDIR)$(MANDIR)/man1
cp checksum.1 $(DESTDIR)$(MANDIR)/man1/checksum.1
ln -f $(DESTDIR)$(MANDIR)/man1/checksum.1 $(DESTDIR)$(MANDIR)/man1/sha224sum.1
ln -f $(DESTDIR)$(MANDIR)/man1/checksum.1 $(DESTDIR)$(MANDIR)/man1/sha256sum.1
ln -f $(DESTDIR)$(MANDIR)/man1/checksum.1 $(DESTDIR)$(MANDIR)/man1/sha384sum.1
ln -f $(DESTDIR)$(MANDIR)/man1/checksum.1 $(DESTDIR)$(MANDIR)/man1/sha512sum.1
%: %.c
$(CC) -std=gnu11 $(CFLAGS) $(CPPFLAGS) $< -o $@
clean:
rm -f checksum

216
checksum/checksum.1 Normal file
View file

@ -0,0 +1,216 @@
.Dd September 27, 2020
.Dt CHECKSUM 1
.Os
.Sh NAME
.Nm checksum
.Nm sha224sum
.Nm sha256sum
.Nm sha384sum
.Nm sha512sum
.Nd compute and check cryptographic hashes
.Sh SYNOPSIS
.Nm checksum
.Op Fl ciqs
.Fl a Ar algorithm
.Op Fl C Ar checklist
.Op Ar
.Nm sha224sum
.Op Fl ciqs
.Op Fl C Ar checklist
.Op Ar
.Nm sha256sum
.Op Fl ciqs
.Op Fl C Ar checklist
.Op Ar
.Nm sha384sum
.Op Fl ciqs
.Op Fl C Ar checklist
.Op Ar
.Nm sha512sum
.Op Fl ciqs
.Op Fl C Ar checklist
.Op Ar
.Sh DESCRIPTION
.Nm
is used to check the cryptographic integrity of files by calculating their
cryptographic hashes and later check the files retain the same hash, thus
guaranteeing it would be vanishingly unlikely the files have been modified
unless the cryptographic hash algorithm has been broken.
.Pp
.Nm
uses the requested cryptographic hash
.Ar algorithm
to calculate the hashes of the input files, or the standard input if no files
are specified.
The standard input can be specified using the
.Sq -
path.
.Pp
.Nm
writes a checklist of the inputs' hashes that can later be checked using the
.Fl c
or
.Fl C
options.
Checklists have a line for each file consisting of its checksum (the
cryptographic hash) in lowercase hexadecimal followed by two spaces and the
file's path
.Sq ( -
in case of the standard input).
.Pp
If the
.Fl c
or
.Fl C
options are set,
.Nm
instead checks the files.
It writes a line for each file containing its path followed by a colon and a
space, and
.Sq OK
if the file's hash matched the checksum or
.Sq FAILED
if it did not.
After each checklist has been processed, a diagnostic is written to the standard
error with how many files couldn't be read (if any couldn't be read), and a
diagnostic is written to the standard error with how many checksums didn't match
(if any didn't match).
.Pp
The options are as follows:
.Bl -tag -width "12345678"
.It Fl a , Fl \-algorithm Ns "=" Ns Ar algorithm
Use the case-insensitive cryptographic hash
.Ar algorithm :
.Bl -bullet -compact
.It
SHA224
.It
SHA256
.It
SHA384
.It
SHA512/256
.It
SHA512
.El
.Pp
The algorithm is set by default if
.Nm
is invoked by the
.Nm sha224sum ,
.Nm sha256sum ,
.Nm sha384sum ,
or
.Nm sha512sum
names.
.It Fl c , Fl \-check
Each input is interpreted as a checklist of files to be checked.
.It Fl C , Fl \-checklist Ns "=" Ns Ar checklist
Check the inputs using the
.Ar checklist
file
.Sq ( -
for the standard input).
This option is useful for checking a subset of files in a checklist.
.It Fl i , Fl \-ignore-missing
Ignore non-existent files when checking.
.It Fl q , Fl \-quiet
Only mention files with the wrong hash when checking.
.It Fl s , Fl \-status
Don't mention any files when checking and only provide the exit status.
.El
.Sh EXIT STATUS
If
.Fl c
or
.Fl C
are set,
.Nm
will exit 1 if any error occurred or the checklist was malformed; and otherwise
exit 2 if any files had the wrong hash, and exit 0 if all files passed the
check.
.Pp
Otherwise
.Nm
will exit 0 if all files were hashed, or exit 1 if an error occurred.
.Sh EXAMPLES
Compute the SHA256 hash of a file:
.Bd -literal
$ sha256sum foo
b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c foo
.Ed
.Pp
Check the SHA256 hash of a file:
.Bd -literal
$ sha256sum foo > foo.sha256sum
$ sha256sum -c foo.sha256sum
foo: OK
.Ed
.Pp
Check every file in a checklist and only mention failures:
.Bd -literal
$ echo foo > foo
$ echo bar > bar
$ sha256sum foo bar > checklist
$ sha256sum -cq checklist
$ echo foo > bar
$ sha256sum -cq checklist
bar: FAILED
sha256sum: WARNING: 1 computed checksum did NOT match
.Ed
.Pp
Check the hash of only some files in a checklist:
.Bd -literal
$ sha256sum foo bar qux > checklist
$ sha256sum -C checklist foo qux
foo: OK
qux: OK
.Ed
.Pp
Check the standard input is expected:
.Bd -literal
$ sha256sum < reference > checklist
$ sha256sum -C checklist < input
-: OK
.Ed
.Sh SEE ALSO
.Xr cmp 1 ,
.Xr sha2 3
.Sh HISTORY
.Nm
originally appeared in Sortix 1.1.
.Pp
.Nm
is similar to a subset of GNU
.Nm sha256sum ,
mixed with the BSD
.Fl a
and
.Fl C
extensions to POSIX
.Nm cksum .
The
.Fl iqs
short options are extensions to GNU
.Nm sha256sum ,
which only provides these features through the long options.
.Nm
is always strict and errors on malformed checklists unlike GNU
.Nm sha256sum.
.Sh CAVEATS
Insecure cryptographic hash algorithms such as MD5 and SHA1 are not
implemented.
.Pp
.Nm
does not have the
.Fl b
and
.Fl t
options from GNU sha256sum to select binary/text mode.
The text mode being default is poor design but only matters on some
implementations for Windows.
This implementation removes that complexity and always operates in binary mode.
The double space checklist delimiter is used for simplicity and compatibility as
all sensible implementations are always in binary mode by default.
The space asterisk checklist delimiter to explicitly request binary mode is not
supported for simplicity.

506
checksum/checksum.c Normal file
View file

@ -0,0 +1,506 @@
/*
* Copyright (c) 2017, 2020 Jonas 'Sortie' Termansen.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* checksum.c
* Compute and check cryptographic hashes.
*/
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <sha2.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
static const char hexchars[] = "0123456789abcdef";
static uint8_t buffer[65536];
#define DIGEST_MAX_LENGTH SHA512_DIGEST_LENGTH
union ctx
{
SHA2_CTX sha2;
};
struct hash
{
const char* name;
size_t digest_size;
void (*init)(union ctx* ctx);
void (*update)(union ctx* ctx, const uint8_t* buffer, size_t size);
void (*final)(uint8_t digest[], union ctx* ctx);
};
#define WRAP(ctx_member, algorithm) \
static void Wrap##algorithm##Init(union ctx* ctx) \
{ \
algorithm##Init(&ctx->ctx_member); \
} \
\
static void Wrap##algorithm##Update(union ctx* ctx, \
const uint8_t* buffer, \
size_t size) \
{ \
algorithm##Update(&ctx->ctx_member, buffer, size); \
} \
\
static void Wrap##algorithm##Final(uint8_t digest[], union ctx* ctx) \
{ \
algorithm##Final(digest, &ctx->ctx_member); \
}
WRAP(sha2, SHA224)
WRAP(sha2, SHA256)
WRAP(sha2, SHA384)
WRAP(sha2, SHA512_256)
WRAP(sha2, SHA512)
#define HASH(variable, name, algorithm) \
static struct hash variable = \
{ \
name, \
algorithm##_DIGEST_LENGTH, \
Wrap##algorithm##Init, \
Wrap##algorithm##Update, \
Wrap##algorithm##Final, \
}
HASH(sha224, "SHA224", SHA224);
HASH(sha256, "SHA256", SHA256);
HASH(sha384, "SHA384", SHA384);
HASH(sha512_256, "SHA512/256", SHA512_256);
HASH(sha512, "SHA512", SHA512);
static struct hash* hashes[] =
{
&sha224,
&sha256,
&sha384,
&sha512_256,
&sha512,
NULL,
};
static struct hash* hash = NULL;
static const char* algorithm = NULL;
static const char* checklist = NULL;
static bool check = false;
static bool ignore_missing = false;
static bool quiet = false;
static bool silent = false;
int debase(char c)
{
if ( '0' <= c && c <= '9' )
return c - '0';
if ( 'a' <= c && c <= 'f' )
return c - 'a' + 10;
if ( 'A' <= c && c <= 'F' )
return c - 'A' + 10;
return -1;
}
static void printhex(const uint8_t* buffer, size_t size)
{
for ( size_t i = 0; i < size; i++ )
{
putchar(hexchars[buffer[i] >> 4]);
putchar(hexchars[buffer[i] & 0xF]);
}
}
static int digest_fd(uint8_t digest[DIGEST_MAX_LENGTH],
int fd,
const char* path)
{
union ctx ctx;
hash->init(&ctx);
ssize_t amount;
while ( 0 < (amount = read(fd, buffer, sizeof(buffer))) )
hash->update(&ctx, buffer, amount);
if ( amount < 0 )
{
warn("%s", path);
return 1;
}
hash->final(digest, &ctx);
return 0;
}
static int digest_path(uint8_t digest[DIGEST_MAX_LENGTH], const char* path)
{
if ( !strcmp(path, "-") )
return digest_fd(digest, 0, "-");
int fd = open(path, O_RDONLY);
if ( fd < 0 )
{
if ( errno == ENOENT && ignore_missing )
return -1;
warn("%s", path);
return 1;
}
int result = digest_fd(digest, fd, path);
close(fd);
return result;
}
static int verify_path(uint8_t checksum[], const char* path)
{
uint8_t digest[DIGEST_MAX_LENGTH];
int status = digest_path(digest, path);
if ( status == -1 )
return status;
if ( status == 0 &&
timingsafe_memcmp(checksum, digest, hash->digest_size) != 0 )
status = 2;
explicit_bzero(digest, sizeof(digest));
if ( !silent && (!quiet || status != 0) )
printf("%s: %s\n", path, status == 0 ? "OK" : "FAILED");
return status;
}
struct checklist
{
const char* file;
uint8_t checksum[DIGEST_MAX_LENGTH];
bool initialized;
};
static int compare_checklist_file(const void* a_ptr, const void* b_ptr)
{
struct checklist* a = *(struct checklist**) a_ptr;
struct checklist* b = *(struct checklist**) b_ptr;
return strcmp(a->file, b->file);
}
static int search_checklist_file(const void* file_ptr, const void* elem_ptr)
{
const char* file = (const char*) file_ptr;
struct checklist* elem = *(struct checklist**) elem_ptr;
return strcmp(file, elem->file);
}
static int checklist_fp(FILE* fp,
const char* path,
size_t files_count,
const char* const* files)
{
struct checklist* checklist = NULL;
struct checklist** checklist_sorted = NULL;
if ( files )
{
checklist = calloc(sizeof(struct checklist), files_count);
checklist_sorted = calloc(sizeof(struct checklist*), files_count);
if ( !checklist || !checklist_sorted )
err(1, "malloc");
for ( size_t i = 0; i < files_count; i++ )
{
checklist[i].file = files[i];
checklist_sorted[i] = &checklist[i];
}
qsort(checklist_sorted, files_count, sizeof(struct checklist*),
compare_checklist_file);
}
uint8_t checksum[DIGEST_MAX_LENGTH];
bool any = false;
char* line = NULL;
size_t line_size = 0;
ssize_t line_length;
off_t line_number = 0;
size_t read_failures = 0;
size_t check_failures = 0;
while ( 0 < (line_length = getline(&line, &line_size, fp)) )
{
line_number++;
if ( line[line_length - 1] != '\n' )
errx(1, "%s:%ji: Line was not terminated with a newline",
path, (intmax_t) line_number);
line[--line_length] = '\0';
if ( (size_t) line_length < 2 * hash->digest_size )
errx(1, "%s:%ji: Improperly formatted %s checksum line",
path, (intmax_t) line_number, hash->name);
for ( size_t i = 0; i < hash->digest_size; i++ )
{
int higher = debase(line[i*2 + 0]);
int lower = debase(line[i*2 + 1]);
if ( higher == -1 || lower == -1 )
errx(1, "%s:%ji: Improperly formatted %s checksum line",
path, (intmax_t) line_number, hash->name);
checksum[i] = higher << 4 | lower;
}
if ( line[2 * hash->digest_size + 0] != ' ' ||
line[2 * hash->digest_size + 1] != ' ' ||
line[2 * hash->digest_size + 2] == '\0' )
errx(1, "%s:%ji: Improperly formatted %s checksum line",
path, (intmax_t) line_number, hash->name);
const char* file = line + 2 * hash->digest_size + 2;
if ( !strcmp(path, "-") && !strcmp(file, "-") )
errx(1, "%s:%ji: Improperly formatted %s checksum line",
path, (intmax_t) line_number, hash->name);
if ( files )
{
struct checklist** entry_ptr =
bsearch(file, checklist_sorted, files_count,
sizeof(struct checksum*), search_checklist_file);
if ( entry_ptr )
{
struct checklist* entry = *entry_ptr;
if ( entry->initialized )
errx(1, "%s:%ji: Duplicate hash found for: %s", path,
(intmax_t) line_number, file);
memcpy(entry->checksum, checksum, DIGEST_MAX_LENGTH);
entry->initialized = true;
}
}
else
{
int status = verify_path(checksum, file);
if ( status == 1 )
read_failures++;
else if ( status == 2 )
check_failures++;
}
any = true;
}
free(line);
if ( ferror(fp) )
err(1, "%s", path);
if ( !any )
errx(1, "%s: No properly formatted %s checksum lines found",
path, hash->name);
for ( size_t i = 0; i < files_count; i++ )
{
const char* file = files[i];
struct checklist* entry = &checklist[i];
if ( !entry->initialized )
errx(1, "%s: No hash found for: %s", path, file);
int status = verify_path(entry->checksum, file);
if ( status == 1 )
read_failures++;
else if ( status == 2 )
check_failures++;
}
explicit_bzero(checksum, sizeof(checksum));
free(checklist);
free(checklist_sorted);
if ( read_failures )
warnx("WARNING: %zu listed %s could not be read",
read_failures, read_failures == 1 ? "file" : "files");
if ( check_failures )
warnx("WARNING: %zu computed %s did NOT match",
check_failures, check_failures == 1 ? "checksum" : "checksums");
return read_failures ? 1 : check_failures ? 2 : 0;
}
static int checklist_path(const char* path,
size_t files_count,
const char* const* files)
{
if ( !strcmp(path, "-") )
return checklist_fp(stdin, "-", files_count, files);
FILE* fp = fopen(path, "r");
if ( !fp )
err(1, "%s", path);
int result = checklist_fp(fp, path, files_count, files);
fclose(fp);
return result;
}
static void compact_arguments(int* argc, char*** argv)
{
for ( int i = 0; i < *argc; i++ )
{
while ( i < *argc && !(*argv)[i] )
{
for ( int n = i; n < *argc; n++ )
(*argv)[n] = (*argv)[n+1];
(*argc)--;
}
}
}
int main(int argc, char* argv[])
{
char* argv0_last_slash = strrchr(argv[0], '/');
const char* argv0_basename =
argv0_last_slash ? argv0_last_slash + 1 : argv[0];
for ( int i = 1; i < argc; i++ )
{
const char* arg = argv[i];
if ( arg[0] != '-' || !arg[1] )
continue;
argv[i] = NULL;
if ( !strcmp(arg, "--") )
break;
if ( arg[1] != '-' )
{
char c;
while ( (c = *++arg) ) switch ( c )
{
case 'a':
if ( !*(algorithm = arg + 1) )
{
if ( i + 1 == argc )
errx(1, "option requires an argument -- 'a'");
algorithm = argv[i+1];
argv[++i] = NULL;
}
arg = "a";
break;
case 'c': check = true; break;
case 'C':
if ( !*(checklist = arg + 1) )
{
if ( i + 1 == argc )
errx(1, "option requires an argument -- 'C'");
checklist = argv[i+1];
argv[++i] = NULL;
}
arg = "C";
break;
case 'i': ignore_missing = true; break;
case 'q': quiet = true; break;
case 's': silent = true; break;
default:
errx(1, "unknown option -- '%c'", c);
}
}
else if ( !strcmp(arg, "--algorithm") )
{
if ( i + 1 == argc )
errx(1, "option '--algorithm' requires an argument");
algorithm = argv[i+1];
argv[++i] = NULL;
}
else if ( !strncmp(arg, "--algorithm=", strlen("--algorithm=")) )
algorithm = arg + strlen("--algorithm=");
else if ( !strcmp(arg, "--check") )
check = true;
else if ( !strcmp(arg, "--checklist") )
{
if ( i + 1 == argc )
errx(1, "option '--checklist' requires an argument");
checklist = argv[i+1];
argv[++i] = NULL;
}
else if ( !strncmp(arg, "--checklist=", strlen("--checklist=")) )
checklist = arg + strlen("--checklist=");
else if ( !strcmp(arg, "--ignore-missing") )
ignore_missing = true;
else if ( !strcmp(arg, "--quiet") )
quiet = true;
else if ( !strcmp(arg, "--status") )
silent = true;
else
errx(1, "unknown option: %s", arg);
}
compact_arguments(&argc, &argv);
if ( check && checklist )
errx(1, "The -c and -C options are mutually incompatible");
if ( !(check || checklist) && (ignore_missing || quiet || silent) )
errx(1, "The -iqs options require -c or -C");
if ( algorithm )
{
for ( size_t i = 0; !hash && hashes[i]; i++ )
if ( !strcasecmp(hashes[i]->name, algorithm) )
hash = hashes[i];
if ( !hash )
errx(1, "No such hash algorithm: %s", algorithm);
}
else if ( !strcmp(argv0_basename, "sha224sum") )
hash = &sha224;
else if ( !strcmp(argv0_basename, "sha256sum") )
hash = &sha256;
else if ( !strcmp(argv0_basename, "sha384sum") )
hash = &sha384;
else if ( !strcmp(argv0_basename, "sha512sum") )
hash = &sha512;
else
errx(1, "No hash algorithm was specified with -a");
bool read_failures = false;
bool check_failures = false;
if ( checklist )
{
int result =
checklist_path(checklist, argc - 1, (const char* const*) argv + 1);
if ( result == 1 )
read_failures = true;
else if ( result == 2 )
check_failures = true;
}
else if ( argc == 1 )
{
if ( check )
{
int result = checklist_fp(stdin, "-", 0, NULL);
if ( result == 1 )
read_failures = true;
else if ( result == 2 )
check_failures = true;
}
else
{
uint8_t digest[DIGEST_MAX_LENGTH];
int result = digest_fd(digest, 0, "-");
if ( result == 0 )
{
printhex(digest, hash->digest_size);
puts(" -");
explicit_bzero(digest, sizeof(digest));
}
else if ( result == 1 )
read_failures = true;
}
}
else for ( int i = 1; i < argc; i++ )
{
if ( check )
{
int result = checklist_path(argv[i], 0, NULL);
if ( result == 1 )
read_failures = true;
else if ( result == 2 )
check_failures = true;
}
else
{
uint8_t digest[DIGEST_MAX_LENGTH];
int result = digest_path(digest, argv[i]);
if ( result == 0 )
{
printhex(digest, hash->digest_size);
printf(" %s\n", argv[i]);
explicit_bzero(digest, sizeof(digest));
}
else if ( result == 1 )
read_failures = true;
}
}
if ( ferror(stdout) || fflush(stdout) == EOF )
return 1;
return read_failures ? 1 : check_failures ? 2 : 0;
}