/******************************************************************************* Copyright(C) Jonas 'Sortie' Termansen 2013. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . wc.cpp Counts bytes, characters, words and lines. *******************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include #if !defined(VERSIONSTR) #define VERSIONSTR "unknown version" #endif const int FLAG_PRINT_NUM_BYTES = 1 << 0; const int FLAG_PRINT_NUM_CHARACTERS = 1 << 1; const int FLAG_PRINT_NUM_WORDS = 1 << 2; const int FLAG_PRINT_NUM_LINES = 1 << 3; const int FLAG_PRINT_COMPACT = 1 << 4; const int DEFAULT_FLAGS = FLAG_PRINT_NUM_BYTES | FLAG_PRINT_NUM_WORDS | FLAG_PRINT_NUM_LINES; struct word_count { uintmax_t num_bytes; uintmax_t num_characters; uintmax_t num_words; uintmax_t num_lines; }; static struct word_count count_words(FILE* fp) { struct word_count stats; memset(&stats, 0, sizeof(stats)); mbstate_t mbstate; memset(&mbstate, 0, sizeof(mbstate)); bool word_begun = false; bool line_begun = false; int ic; while ( (ic = fgetc(fp)) != EOF ) { stats.num_bytes++; char c = (char) ((unsigned char) ic); wchar_t wc; size_t num_converted = mbrtowc(&wc, &c, 1, &mbstate); if ( num_converted == (size_t) -1 ) { memset(&mbstate, 0, sizeof(mbstate)); continue; } if ( num_converted == (size_t) -2 ) continue; // TODO: Is this strictly speaking needed? if ( !num_converted ) wc = L'\0'; stats.num_characters++; word_begun = !iswspace(wc) || (word_begun ? (stats.num_words++, false) : false); line_begun = wc != L'\n' || (stats.num_lines++, false); } if ( word_begun ) stats.num_words++; if ( line_begun ) stats.num_lines++; return stats; } static void print_stat(FILE* fp, uintmax_t value, int flags, int cond) { if ( !(flags & cond) ) return; if ( flags & FLAG_PRINT_COMPACT ) { fprintf(fp, "%ju", value); return; } if ( value < 100000 ) { fprintf(fp, "%6ju", value); return; } fprintf(fp, " %ju ", value); } static void print_stats(struct word_count stats, FILE* fp, int flags, const char* path) { // TODO: Proper columnization of large values will require knowing all the // row values in advance - so we'll have to remember the statistics // for every file we process before printing! print_stat(fp, stats.num_lines, flags, FLAG_PRINT_NUM_LINES); print_stat(fp, stats.num_words, flags, FLAG_PRINT_NUM_WORDS); print_stat(fp, stats.num_bytes, flags, FLAG_PRINT_NUM_BYTES); print_stat(fp, stats.num_characters, flags, FLAG_PRINT_NUM_CHARACTERS); if ( path ) fprintf(fp, " %s", path); fprintf(fp, "\n"); } static void usage(FILE* fp, const char* argv0) { fprintf(fp, "Usage: %s [OPTION]...\n", argv0); fprintf(fp, "Print newline, word, and byte counts for each FILE, and a total line if\n"); fprintf(fp, "more than one FILE is specified. With no FILE, or when FILE is -,\n"); fprintf(fp, "read standard input. A word is a non-zero-length sequence of characters\n"); fprintf(fp, "delimited by white space.\n"); fprintf(fp, "The options below may be used to select which counts are printed, always in\n"); fprintf(fp, "the following order: newline, word, character, byte.\n"); fprintf(fp, "\n"); fprintf(fp, " -c, --bytes print the byte counts\n"); fprintf(fp, " -m, --chars print the character counts\n"); fprintf(fp, " -l, --lines print the newline counts\n"); fprintf(fp, " -w, --words print the word counts\n"); fprintf(fp, " --help display this help and exit\n"); fprintf(fp, " --usage display this help and exit\n"); fprintf(fp, " --version output version information and exit\n"); } static void help(FILE* fp, const char* argv0) { usage(fp, argv0); } static void version(FILE* fp, const char* argv0) { fprintf(fp, "%s (Sortix) %s\n", argv0, VERSIONSTR); fprintf(fp, "License GPLv3+: GNU GPL version 3 or later .\n"); fprintf(fp, "This is free software: you are free to change and redistribute it.\n"); fprintf(fp, "There is NO WARRANTY, to the extent permitted by law.\n"); } static void compact_arguments(int* argc, char*** argv) { for ( int i = 0; i < *argc; i++ ) while ( i < *argc && !(*argv)[i] ) { for ( int n = i; n < *argc; n++ ) (*argv)[n] = (*argv)[n+1]; (*argc)--; } } bool word_count_file(FILE* fp, const char* path, int flags, struct word_count* total) { struct stat st; if ( fstat(fileno(fp), &st) == 0 && S_ISDIR(st.st_mode) ) { struct word_count word_count; memset(&word_count, 0, sizeof(word_count)); error(0, EISDIR, "`%s'", path); print_stats(word_count, stdout, flags, path); return false; } struct word_count word_count = count_words(fp); // TODO: Possible overflow here! if ( total ) { total->num_bytes += word_count.num_bytes; total->num_characters += word_count.num_characters; total->num_words += word_count.num_words; total->num_lines += word_count.num_lines; } if ( ferror(fp) ) { error(0, errno, "`%s'", path); print_stats(word_count, stdout, flags, path); return false; } print_stats(word_count, stdout, flags, path); return true; } int word_count_files(int argc, char* argv[], int flags) { if ( argc <= 1 ) return word_count_file(stdin, NULL, flags, NULL); struct word_count total_count; memset(&total_count, 0, sizeof(total_count)); bool success = true; for ( int i = 1; i < argc; i++ ) { if ( !strcmp(argv[i], "-") ) { if ( !word_count_file(stdin, "-", flags, NULL) ) success = false; continue; } FILE* fp = fopen(argv[i], "r"); if ( !fp ) { error(0, errno, "`%s'", argv[i]); struct word_count word_count; memset(&word_count, 0, sizeof(word_count)); print_stats(word_count, stdout, flags, argv[i]); success = false; continue; } if ( !word_count_file(fp, argv[i], flags, &total_count) ) success = false; fclose(fp); } if ( 3 <= argc ) print_stats(total_count, stdout, flags, "total"); return success; } int main(int argc, char* argv[]) { setlocale(LC_ALL, ""); int flags = 0; const char* argv0 = argv[0]; for ( int i = 1; i < argc; i++ ) { const char* arg = argv[i]; if ( arg[0] != '-' || !arg[1] ) continue; argv[i] = NULL; if ( !strcmp(arg, "--") ) break; if ( arg[1] != '-' ) { while ( char c = *++arg ) switch ( c ) { case 'c': flags |= FLAG_PRINT_NUM_BYTES; break; case 'l': flags |= FLAG_PRINT_NUM_LINES; break; case 'm': flags |= FLAG_PRINT_NUM_CHARACTERS; break; case 'w': flags |= FLAG_PRINT_NUM_WORDS; break; default: fprintf(stderr, "%s: unknown option -- '%c'\n", argv0, c); usage(stderr, argv0); exit(1); } } else if ( !strcmp(arg, "--help") ) help(stdout, argv0), exit(0); else if ( !strcmp(arg, "--usage") ) usage(stdout, argv0), exit(0); else if ( !strcmp(arg, "--version") ) version(stdout, argv0), exit(0); else if ( !strcmp(arg, "--bytes") ) flags |= FLAG_PRINT_NUM_BYTES; else if ( !strcmp(arg, "--chars") ) flags |= FLAG_PRINT_NUM_CHARACTERS; else if ( !strcmp(arg, "--lines") ) flags |= FLAG_PRINT_NUM_LINES; else if ( !strcmp(arg, "--words") ) flags |= FLAG_PRINT_NUM_WORDS; else { fprintf(stderr, "%s: unknown option: %s\n", argv0, arg); usage(stderr, argv0); exit(1); } } compact_arguments(&argc, &argv); if ( !flags ) flags = DEFAULT_FLAGS; if ( flags && flags == 1 << (ffs(flags)-1) && argc <= 2 ) flags |= FLAG_PRINT_COMPACT; return word_count_files(argc, argv, flags) ? 0 : 1; }