diff --git a/utils/sort.c b/utils/sort.c index 5774cbb8..35fb47b8 100644 --- a/utils/sort.c +++ b/utils/sort.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014, 2015, 2018 Jonas 'Sortie' Termansen. + * Copyright (c) 2014, 2015, 2018, 2021 Jonas 'Sortie' Termansen. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -17,8 +17,11 @@ * Sort, merge, or sequence check text files. */ +#include #include #include +#include +#include #include #include #include @@ -26,8 +29,386 @@ #include #include -// TODO: Implement all the features mandated by POSIX. -// TODO: Implement the useful GNU extensions. +#define MODIFIER_BLANK (1 << 0) +#define MODIFIER_DICTIONARY (1 << 1) +#define MODIFIER_IGNORE_CASE (1 << 2) +#define MODIFIER_GENERAL_NUMERIC (1 << 3) +#define MODIFIER_HUMAN (1 << 4) +#define MODIFIER_IGNORE_NONPRINTING (1 << 5) +#define MODIFIER_MONTH (1 << 6) +#define MODIFIER_NUMERIC (1 << 7) +#define MODIFIER_REVERSE (1 << 8) +#define MODIFIER_RANDOM (1 << 9) +#define MODIFIER_VERSION (1 << 10) +#define MODIFIER_UNIQUE (1 << 11) + +static char separator; +static bool separator_is_blank = true; +static int modifiers; + +struct key +{ + size_t start_field; + size_t first_character; + size_t end_field; + size_t last_character; + int start_modifiers; + int end_modifiers; +}; + +static struct key* keys; +static size_t keys_count; + +static size_t field_length(const char* string) +{ + size_t length = 0; + if ( separator_is_blank ) + { + while ( string[length] && isblank((unsigned char) string[length]) ) + length++; + } + while ( string[length] && + (separator_is_blank ? + !isblank((unsigned char) string[length]) : + string[length] != separator) ) + length++; + return length; +} + +static size_t separator_length(const char* string) +{ + if ( separator_is_blank ) + return 0; + return string[0] == separator ? 1 : 0; +} + +static int strdoublecmp(const char* a, const char* b) +{ + double ad = strtod(a, NULL); + double bd = strtod(b, NULL); + return ad < bd ? -1 : ad > bd ? 1 : 0; +} + +static int month_index(const char* string) +{ + const char* months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + for ( int i = 0; i < 12; i++ ) + if ( !strncasecmp(string, months[i], 3) ) + return i + 1; + return 0; +} + +static int strmonthcmp(const char* a, const char* b) +{ + int am = month_index(a); + int bm = month_index(b); + return am < bm ? -1 : am > bm ? 1 : 0; +} + +static int strnumcmp(const char* a, const char* b, bool human) +{ + while ( isspace((unsigned char) *a) ) + a++; + while ( isspace((unsigned char) *b) ) + b++; + bool a_negative = *a == '-'; + if ( a_negative ) + a++; + bool b_negative = *b == '-'; + if ( b_negative ) + b++; + while ( *a == '0' ) + a++; + while ( *b == '0' ) + b++; + size_t a_length = 0; + size_t b_length = 0; + while ( isdigit((unsigned char) a[a_length]) ) + a_length++; + while ( isdigit((unsigned char) b[b_length]) ) + b_length++; + size_t a_fractionals = 0; + bool a_nonzero = false; + if ( a[a_length] == '.' ) + { + a_fractionals = 1; + while ( isdigit((unsigned char) a[a_length + a_fractionals]) ) + { + if ( a[a_length + a_fractionals] != '0' ) + a_nonzero = true; + a_fractionals++; + } + } + size_t b_fractionals = 0; + bool b_nonzero = false; + if ( b[b_length] == '.' ) + { + b_fractionals = 1; + while ( isdigit((unsigned char) b[b_length + b_fractionals]) ) + { + if ( b[b_length + b_fractionals] != '0' ) + b_nonzero = true; + b_fractionals++; + } + } + // All zero representations are equal regardless of sign. + if ( !human && !a_length && !a_nonzero && !b_length && !b_nonzero ) + return 0; + if ( a_negative && !b_negative ) + return -1; + if ( !a_negative && b_negative ) + return 1; + int bigger = a_negative ? -1 : 1; + if ( human ) + { + if ( !(a_length || a_fractionals) && (b_length || b_fractionals) ) + return -bigger; + if ( (a_length || a_fractionals) && !(b_length || b_fractionals) ) + return bigger; + const char* magnitudes = "YZEPTGMK"; + unsigned char a_unit = (unsigned char) a[a_length + a_fractionals]; + unsigned char b_unit = (unsigned char) b[b_length + b_fractionals]; + if ( a_unit == 'k' ) + a_unit = 'K'; + if ( b_unit == 'k' ) + b_unit = 'K'; + const char* a_magnitude = strchrnul(magnitudes, a_unit); + const char* b_magnitude = strchrnul(magnitudes, b_unit); + if ( a_magnitude > b_magnitude ) + return -bigger; + if ( a_magnitude < b_magnitude ) + return bigger; + } + if ( a_length < b_length ) + return -bigger; + if ( a_length > b_length ) + return bigger; + for ( size_t i = 0; i < a_length; i++ ) + { + if ( a[i] < b[i] ) + return -bigger; + if ( a[i] > b[i] ) + return bigger; + } + for ( size_t i = 1; i < a_fractionals || i < b_fractionals; i++ ) + { + char a_digit = i < a_fractionals ? a[a_length + i] : '0'; + char b_digit = i < b_fractionals ? b[b_length + i] : '0'; + if ( a_digit < b_digit ) + return -bigger; + if ( a_digit > b_digit ) + return bigger; + } + return 0; +} + +static int is_dictionary(int c) +{ + return isblank(c) || + ('a' <= c && c <= 'z') || + ('A' <= c && c <= 'Z') || + ('0' <= c && c <= '9'); +} + +static int string_compare(const char* a, + const char* b, + bool dictionary, + bool uppercase, + bool printable) +{ + size_t ai = 0; + size_t bi = 0; + while ( true ) + { + unsigned char ac = (unsigned char) a[ai]; + unsigned char bc = (unsigned char) b[bi]; + if ( !ac && !bc ) + break; + if ( uppercase ) + { + ac = toupper(ac); + bc = toupper(bc); + } + if ( ac && ((dictionary && !is_dictionary(ac)) || + (printable && !isprint(ac))) ) + { + ai++; + continue; + } + if ( bc && ((dictionary && !is_dictionary(bc)) || + (printable && !isprint(bc))) ) + { + bi++; + continue; + } + if ( ac < bc ) + return -1; + if ( ac > bc ) + return 1; + if ( ac ) + ai++; + if ( bc ) + bi++; + } + return 0; +} + +static int relate_field(char* a, char* b, int field_modifiers) +{ + int rel = 0; + if ( field_modifiers & MODIFIER_HUMAN ) + rel = strnumcmp(a, b, true); + else if ( field_modifiers & MODIFIER_GENERAL_NUMERIC ) + rel = strdoublecmp(a, b); + else if ( field_modifiers & MODIFIER_MONTH ) + rel = strmonthcmp(a, b); + else if ( field_modifiers & MODIFIER_NUMERIC ) + rel = strnumcmp(a, b, false); + else if ( field_modifiers & MODIFIER_VERSION ) + rel = strverscmp(a, b); + else if ( field_modifiers & (MODIFIER_DICTIONARY | + MODIFIER_IGNORE_CASE | + MODIFIER_IGNORE_NONPRINTING) ) + rel = string_compare(a, b, field_modifiers & MODIFIER_DICTIONARY, + field_modifiers & MODIFIER_IGNORE_CASE, + field_modifiers & MODIFIER_IGNORE_NONPRINTING); + else + rel = strcoll(a, b); + if ( field_modifiers & MODIFIER_REVERSE ) + rel = rel < 0 ? 1 : 0 < rel ? -1 : 0; + return rel; +} + +static int relate(char* a, char* b, int modifiers) +{ + size_t a_len = strlen(a); + size_t b_len = strlen(b); + bool was_trivial = true; + for ( size_t key_index = 0; key_index < keys_count; key_index++ ) + { + const struct key* key = &keys[key_index]; + size_t a_start = a_len; + size_t b_start = b_len; + size_t a_end = a_len; + size_t b_end = b_len; + int start_modifiers = key->start_modifiers | key->end_modifiers ? + key->start_modifiers : modifiers; + int end_modifiers = key->start_modifiers | key->end_modifiers ? + key->end_modifiers : modifiers; + for ( size_t field = 0, a_offset = 0, b_offset = 0; + (field <= key->start_field || field <= key->end_field) && + (a_offset < a_len || b_offset < b_len); + field++ ) + { + size_t a_field = a_offset; + size_t b_field = b_offset; + size_t a_field_length = field_length(a + a_field); + size_t b_field_length = field_length(b + b_field); + a_offset += a_field_length; + b_offset += b_field_length; + a_offset += separator_length(a + a_offset); + b_offset += separator_length(b + b_offset); + if ( field == key->start_field ) + { + size_t a_start_field = a_field; + size_t a_start_field_length = a_field_length; + size_t b_start_field = b_field; + size_t b_start_field_length = b_field_length; + if ( start_modifiers & MODIFIER_BLANK ) + { + while ( a_start_field_length && + isblank((unsigned char) a[a_start_field]) ) + { + a_start_field++; + a_start_field_length--; + } + while ( b_start_field_length && + isblank((unsigned char) b[b_start_field]) ) + { + b_start_field++; + b_start_field_length--; + } + } + size_t a_left = a_len - a_start_field; + size_t b_left = b_len - b_start_field; + size_t a_first = key->first_character <= a_left ? + key->first_character : a_left; + size_t b_first = key->first_character <= b_left ? + key->first_character : b_left; + a_start = a_start_field + a_first; + b_start = b_start_field + b_first; + } + if ( field == key->end_field ) + { + size_t a_end_field = a_field; + size_t a_end_field_length = a_field_length; + size_t b_end_field = b_field; + size_t b_end_field_length = b_field_length; + if ( end_modifiers & MODIFIER_BLANK ) + { + while ( a_end_field_length && + isblank((unsigned char) a[a_end_field]) ) + { + a_end_field++; + a_end_field_length--; + } + while ( b_end_field_length && + isblank((unsigned char) b[b_end_field]) ) + { + b_end_field++; + b_end_field_length--; + } + } + size_t a_last = key->last_character <= a_end_field_length ? + key->last_character : a_end_field_length; + size_t b_last = key->last_character <= b_end_field_length ? + key->last_character : b_end_field_length; + a_end = a_end_field + a_last + 1; + b_end = b_end_field + b_last + 1; + if ( a_len < a_end ) + a_end = a_len; + if ( b_len < b_end ) + b_end = b_len; + } + } + if ( a_end < a_start ) + a_start = a_end; + if ( b_end < b_start ) + b_start = b_end; + char a_separator = a[a_end]; + char b_separator = b[b_end]; + a[a_end] = '\0'; + b[b_end] = '\0'; + int key_modifiers = start_modifiers | end_modifiers; + if ( a_start != 0 || b_start != 0 || a_end != a_len || b_end != b_len || + (key_modifiers & ~MODIFIER_RANDOM) != 0 ) + was_trivial = false; + int rel = relate_field(a + a_start, b + b_start, key_modifiers); + a[a_end] = a_separator; + b[b_end] = b_separator; + if ( rel != 0 ) + return rel; + } + if ( was_trivial || (modifiers & MODIFIER_UNIQUE) ) + return 0; + return strcoll(a, b); +} + +static int compare(char* a, char* b) +{ + int rel = relate(a, b, modifiers & ~MODIFIER_REVERSE); + if ( modifiers & MODIFIER_REVERSE ) + rel = rel < 0 ? 1 : 0 < rel ? -1 : 0; + return rel; +} + +static int indirect_compare(const void* a_ptr, const void* b_ptr) +{ + char* a = *(char* const*) a_ptr; + char* b = *(char* const*) b_ptr; + return compare(a, b); +} static size_t pick_uniform(size_t upper) { @@ -40,59 +421,6 @@ static size_t pick_uniform(size_t upper) return selection % upper; } -static int flip_comparison(int rel) -{ - return rel < 0 ? 1 : 0 < rel ? -1 : 0; -} - -static int indirect_compare(int (*compare)(const char*, const char*), - const void* a_ptr, const void* b_ptr) -{ - const char* a = *(const char* const*) a_ptr; - const char* b = *(const char* const*) b_ptr; - return compare(a, b); -} - -static int compare_line(const char* a, const char* b) -{ - return strcoll(a, b); -} - -static int indirect_compare_line(const void* a_ptr, const void* b_ptr) -{ - return indirect_compare(compare_line, a_ptr, b_ptr); -} - -static int compare_line_reverse(const char* a, const char* b) -{ - return flip_comparison(compare_line(a, b)); -} - -static int indirect_compare_line_reverse(const void* a_ptr, const void* b_ptr) -{ - return indirect_compare(compare_line_reverse, a_ptr, b_ptr); -} - -static int compare_version(const char* a, const char* b) -{ - return strverscmp(a, b); -} - -static int indirect_compare_version(const void* a_ptr, const void* b_ptr) -{ - return indirect_compare(compare_version, a_ptr, b_ptr); -} - -static int compare_version_reverse(const char* a, const char* b) -{ - return flip_comparison(compare_version(a, b)); -} - -static int indirect_compare_version_reverse(const void* a_ptr, const void* b_ptr) -{ - return indirect_compare(compare_version_reverse, a_ptr, b_ptr); -} - struct input_stream { const char* const* files; @@ -184,6 +512,155 @@ static char** read_input_stream_lines(size_t* result_num_lines, return *result_num_lines = lines_used, lines; } +static size_t parse_modifiers(const char* keystring, int* modifiers) +{ + size_t offset = 0; + while ( keystring[offset] ) + { + switch ( keystring[offset] ) + { + case 'b': *modifiers |= MODIFIER_BLANK; break; + case 'd': *modifiers |= MODIFIER_DICTIONARY; break; + case 'f': *modifiers |= MODIFIER_IGNORE_CASE; break; + case 'g': *modifiers |= MODIFIER_GENERAL_NUMERIC; break; + case 'h': *modifiers |= MODIFIER_HUMAN; break; + case 'i': *modifiers |= MODIFIER_IGNORE_NONPRINTING; break; + case 'M': *modifiers |= MODIFIER_MONTH; break; + case 'n': *modifiers |= MODIFIER_NUMERIC; break; + case 'R': *modifiers |= MODIFIER_RANDOM; break; + case 'r': *modifiers |= MODIFIER_REVERSE; break; + case 'V': *modifiers |= MODIFIER_VERSION; break; + default: return offset; + } + offset++; + } + return offset; +} + +static void check_modifiers(int modifiers) +{ + modifiers &= ~(MODIFIER_BLANK | MODIFIER_UNIQUE | MODIFIER_REVERSE); + bool incompatible = false; + if ( modifiers & (MODIFIER_DICTIONARY | + MODIFIER_IGNORE_CASE | + MODIFIER_IGNORE_NONPRINTING) ) + { + if ( modifiers & ~(MODIFIER_DICTIONARY | + MODIFIER_IGNORE_CASE | + MODIFIER_IGNORE_NONPRINTING) ) + incompatible = true; + } + else if ( modifiers & MODIFIER_NUMERIC ) + { + if ( modifiers & ~(MODIFIER_NUMERIC | MODIFIER_IGNORE_CASE) ) + incompatible = true; + } + else if ( modifiers && 1 << (ffs(modifiers) - 1) != modifiers ) + incompatible = true; + if ( incompatible ) + { + char options[sizeof(int) * CHAR_BIT]; + size_t offset = 0; + if ( modifiers & MODIFIER_DICTIONARY ) options[offset++] = 'd'; + if ( modifiers & MODIFIER_IGNORE_CASE ) options[offset++] = 'f'; + if ( modifiers & MODIFIER_GENERAL_NUMERIC ) options[offset++] = 'g'; + if ( modifiers & MODIFIER_HUMAN ) options[offset++] = 'h'; + if ( modifiers & MODIFIER_IGNORE_NONPRINTING ) options[offset++] = 'i'; + if ( modifiers & MODIFIER_MONTH ) options[offset++] = 'M'; + if ( modifiers & MODIFIER_NUMERIC ) options[offset++] = 'n'; + if ( modifiers & MODIFIER_RANDOM ) options[offset++] = 'R'; + if ( modifiers & MODIFIER_VERSION ) options[offset++] = 'V'; + options[offset] = '\0'; + errx(2, "options '-%s' are incompatible", options); + } +} + +static void parse_key(const char* keystring) +{ + + const char* str = keystring; + if ( !(keys = reallocarray(keys, sizeof(struct key), ++keys_count)) ) + err(2, "malloc"); + struct key* key = &keys[keys_count - 1]; + memset(key, 0, sizeof(*key)); + for ( size_t i = 0; str[i]; i++ ) + { + if ( isblank((unsigned char) str[i]) || + str[i] == '-' || str[i] == '+' ) + errx(2, "invalid key: %s", keystring); + } + char* end; + uintmax_t value; + errno = 0; + value = strtoumax(str, &end, 10); + if ( end == str || errno || value != (size_t) value ) + errx(2, "invalid key starting field: %s", keystring); + str += end - str; + if ( value == 0 ) + errx(2, "invalid key with zero field: %s", keystring); + key->start_field = value - 1; + if ( *str == '.' ) + { + str++; + errno = 0; + value = strtoumax(str, &end, 10); + if ( end == str || errno || value != (size_t) value ) + errx(2, "invalid key starting character offset: %s", keystring); + str += end - str; + if ( value == 0 ) + errx(2, "invalid key with zero character offset: %s", keystring); + key->first_character = value - 1; + } + else + key->first_character = 0; + str += parse_modifiers(str, &key->start_modifiers); + if ( !*str ) + { + key->end_field = SIZE_MAX - 1; + key->last_character = SIZE_MAX; + key->end_modifiers = 0; + return; + } + if ( *str != ',' ) + errx(2, "invalid key modifier '%c': %s", *str, keystring); + str++; + errno = 0; + value = strtoumax(str, &end, 10); + if ( end == str || errno || value != (size_t) value ) + errx(2, "invalid key ending field: %s", keystring); + key->end_field = value - 1; + if ( value == 0 ) + errx(2, "invalid key with zero field: %s", keystring); + str += end - str; + if ( *str == '.' ) + { + str++; + errno = 0; + value = strtoumax(str, &end, 10); + if ( end == str || errno || value != (size_t) value ) + errx(2, "invalid key ending character offset: %s", keystring); + str += end - str; + if ( value == 0 ) + key->last_character = SIZE_MAX; + else + key->last_character = value - 1; + } + else + key->last_character = SIZE_MAX; + str += parse_modifiers(str, &key->end_modifiers); + if ( *str ) + errx(2, "invalid key modifier '%c': %s", *str, keystring); + check_modifiers(key->start_modifiers | key->end_modifiers); +} + +static void parse_separator(const char* parameter) +{ + if ( strlen(parameter) != 1 ) + errx(2, "invalid separator: %s", parameter); + separator = parameter[0]; + separator_is_blank = false; +} + static void compact_arguments(int* argc, char*** argv) { for ( int i = 0; i < *argc; i++ ) @@ -205,10 +682,8 @@ int main(int argc, char* argv[]) bool check_quiet = false; bool merge = false; const char* output = NULL; - bool random = false; - bool reverse = false; + const char* parameter = NULL; bool unique = false; - bool version_sort = false; bool zero_terminated = false; for ( int i = 1; i < argc; i++ ) @@ -224,9 +699,28 @@ int main(int argc, char* argv[]) char c; while ( (c = *++arg) ) switch ( c ) { + case 'b': modifiers |= MODIFIER_BLANK; break; case 'C': check = true, check_quiet = true; break; case 'c': check = true, check_quiet = false; break; + case 'd': modifiers |= MODIFIER_DICTIONARY; break; + case 'f': modifiers |= MODIFIER_IGNORE_CASE; break; + case 'g': modifiers |= MODIFIER_GENERAL_NUMERIC; break; + case 'i': modifiers |= MODIFIER_IGNORE_NONPRINTING; break; + case 'k': + if ( !*(parameter = arg + 1) ) + { + if ( i + 1 == argc ) + errx(2, "parameter requires an argument -- 'k'"); + parameter = argv[i+1]; + argv[++i] = NULL; + } + parse_key(parameter); + arg = "k"; + break; + case 'h': modifiers |= MODIFIER_HUMAN; break; case 'm': merge = true; break; + case 'M': modifiers |= MODIFIER_MONTH; break; + case 'n': modifiers |= MODIFIER_NUMERIC; break; case 'o': if ( !*(output = arg + 1) ) { @@ -237,15 +731,85 @@ int main(int argc, char* argv[]) } arg = "o"; break; - case 'R': random = true; break; - case 'r': reverse = true; break; + case 'R': modifiers |= MODIFIER_RANDOM; break; + case 'r': modifiers |= MODIFIER_REVERSE; break; + case 't': + if ( !*(parameter = arg + 1) ) + { + if ( i + 1 == argc ) + errx(2, "parameter requires an argument -- 't'"); + parameter = argv[i+1]; + argv[++i] = NULL; + } + parse_separator(parameter); + arg = "t"; + break; case 'u': unique = true; break; - case 'V': version_sort = true; break; + case 'V': modifiers |= MODIFIER_VERSION; break; case 'z': zero_terminated = true; break; default: errx(2, "unknown option -- '%c'", c); } } + else if ( !strcmp(arg, "--ignore-leading-blanks") ) + modifiers |= MODIFIER_BLANK; + else if ( !strcmp(arg, "--dictionary-order") ) + modifiers |= MODIFIER_DICTIONARY; + else if ( !strcmp(arg, "--ignore-case") ) + modifiers |= MODIFIER_IGNORE_CASE; + else if ( !strcmp(arg, "--general-numeric-sort") ) + modifiers |= MODIFIER_GENERAL_NUMERIC; + else if ( !strcmp(arg, "--ignore-nonprinting") ) + modifiers |= MODIFIER_IGNORE_NONPRINTING; + else if ( !strcmp(arg, "--human-numeric-sort") ) + modifiers |= MODIFIER_HUMAN; + else if ( !strcmp(arg, "--month-sort") ) + modifiers |= MODIFIER_MONTH; + else if ( !strncmp(arg, "--key=", strlen("--key=")) ) + parse_key(arg + strlen("--key=")); + else if ( !strcmp(arg, "--key") ) + { + if ( i + 1 == argc ) + errx(2, "option '--key' requires an argument"); + parse_key(argv[i+1]); + argv[++i] = NULL; + } + else if ( !strncmp(arg, "--field-separator=", strlen("--field-separator=")) ) + parse_separator(arg + strlen("--field-separator=")); + else if ( !strcmp(arg, "--field-separator") ) + { + if ( i + 1 == argc ) + errx(2, "option '--field-separator' requires an argument"); + parse_separator(argv[i+1]); + argv[++i] = NULL; + } + else if ( !strcmp(arg, "--sort") || + !strncmp(arg, "--sort=", strlen("--sort=")) ) + { + if ( !strncmp(arg, "--sort=", strlen("--sort=")) ) + parameter = arg + strlen("--sort="); + else + { + if ( i + 1 == argc ) + errx(2, "option '--sort' requires an argument"); + parameter = argv[i+1]; + argv[++i] = NULL; + } + if ( !strcmp(parameter, "general-numeric") ) + modifiers |= MODIFIER_GENERAL_NUMERIC; + else if ( !strcmp(parameter, "human-numeric") ) + modifiers |= MODIFIER_HUMAN; + else if ( !strcmp(parameter, "month") ) + modifiers |= MODIFIER_MONTH; + else if ( !strcmp(parameter, "numeric") ) + modifiers |= MODIFIER_NUMERIC; + else if ( !strcmp(parameter, "random") ) + modifiers |= MODIFIER_RANDOM; + else if ( !strcmp(parameter, "version") ) + modifiers |= MODIFIER_VERSION; + else + errx(1, "invalid sort: %s", parameter); + } else if ( !strcmp(arg, "--check") || !strcmp(arg, "--check=diagnose-first") ) check = true, check_quiet = false; @@ -254,6 +818,8 @@ int main(int argc, char* argv[]) check = true, check_quiet = true; else if ( !strcmp(arg, "--merge") ) merge = true; + else if ( !strcmp(arg, "--numeric-sort") ) + modifiers |= MODIFIER_NUMERIC; else if ( !strncmp(arg, "--output=", strlen("--output=")) ) output = arg + strlen("--output="); else if ( !strcmp(arg, "--output") ) @@ -264,13 +830,13 @@ int main(int argc, char* argv[]) argv[++i] = NULL; } else if ( !strcmp(arg, "--random-sort") ) - random = true; + modifiers |= MODIFIER_RANDOM; else if ( !strcmp(arg, "--reverse") ) - reverse = true; + modifiers |= MODIFIER_REVERSE; else if ( !strcmp(arg, "--unique") ) unique = true; else if ( !strcmp(arg, "--version-sort") ) - version_sort = true; + modifiers |= MODIFIER_VERSION; else if ( !strcmp(arg, "--zero-terminated") ) zero_terminated = true; else @@ -280,31 +846,31 @@ int main(int argc, char* argv[]) compact_arguments(&argc, &argv); if ( check_quiet && output ) - errx(1, "the -C and -o options are incompatible"); + errx(2, "the -C and -o options are incompatible"); if ( check && output ) - errx(1, "the -c and -o options are incompatible"); - if ( check_quiet && random ) - errx(1, "the -C and -R options are incompatible"); - if ( check && random ) - errx(1, "the -c and -R options are incompatible"); + errx(2, "the -c and -o options are incompatible"); + if ( check_quiet && (modifiers & MODIFIER_RANDOM) ) + errx(2, "the -C and -R options are incompatible"); + if ( check && (modifiers & MODIFIER_RANDOM) ) + errx(2, "the -c and -R options are incompatible"); + + check_modifiers(modifiers); int delim = zero_terminated ? '\0' : '\n'; - int (*compare)(const char*, const char*); - int (*qsort_compare)(const void*, const void*); - - if ( version_sort && reverse ) - compare = compare_version_reverse, - qsort_compare = indirect_compare_version_reverse; - else if ( version_sort ) - compare = compare_version, - qsort_compare = indirect_compare_version; - else if ( reverse ) - compare = compare_line_reverse, - qsort_compare = indirect_compare_line_reverse; - else - compare = compare_line, - qsort_compare = indirect_compare_line; + if ( !keys_count ) + { + keys = malloc(sizeof(struct key)); + if ( !keys ) + err(2, "malloc"); + keys->start_field = 0; + keys->first_character = 0; + keys->end_field = SIZE_MAX - 1; + keys->last_character = SIZE_MAX; + keys->start_modifiers = 0; + keys->end_modifiers = 0; + keys_count = 1; + } struct input_stream is; memset(&is, 0, sizeof(is)); @@ -314,6 +880,9 @@ int main(int argc, char* argv[]) if ( check ) { + if ( unique ) + modifiers |= MODIFIER_UNIQUE; + int needed_relation = unique ? 1 : 0; char* prev_line = NULL; char* line; @@ -338,10 +907,10 @@ int main(int argc, char* argv[]) size_t lines_used = 0; char** lines = read_input_stream_lines(&lines_used, &is, delim); - if ( !random || unique ) - qsort(lines, lines_used, sizeof(*lines), qsort_compare); + if ( !(modifiers & MODIFIER_RANDOM) || unique ) + qsort(lines, lines_used, sizeof(*lines), indirect_compare); - if ( random ) + if ( (modifiers & MODIFIER_RANDOM) ) { if ( unique ) { @@ -370,6 +939,9 @@ int main(int argc, char* argv[]) if ( output && !freopen(output, "w", stdout) ) err(2, "%s", output); + if ( unique ) + modifiers |= MODIFIER_UNIQUE; + for ( size_t i = 0; i < lines_used; i++ ) { if ( unique && i && compare(lines[i-1], lines[i]) == 0 )