diff --git a/libc/inttypes/strtoimax.cpp b/libc/inttypes/strtoimax.cpp index 0f8e7f86..f397b5a6 100644 --- a/libc/inttypes/strtoimax.cpp +++ b/libc/inttypes/strtoimax.cpp @@ -24,6 +24,9 @@ #define STRTOL strtoimax #define STRTOL_INT intmax_t +#define STRTOL_UNSIGNED_INT uintmax_t +#define STRTOL_INT_MIN INTMAX_MIN +#define STRTOL_INT_MAX INTMAX_MAX #define STRTOL_INT_IS_UNSIGNED false #include "../stdlib/strtol.cpp" diff --git a/libc/inttypes/strtoumax.cpp b/libc/inttypes/strtoumax.cpp index cc8d3e80..b5f4d57e 100644 --- a/libc/inttypes/strtoumax.cpp +++ b/libc/inttypes/strtoumax.cpp @@ -24,6 +24,9 @@ #define STRTOL strtoumax #define STRTOL_INT uintmax_t +#define STRTOL_UNSIGNED_INT uintmax_t +#define STRTOL_INT_MIN 0 +#define STRTOL_INT_MAX UINTMAX_MAX #define STRTOL_INT_IS_UNSIGNED true #include "../stdlib/strtol.cpp" diff --git a/libc/stdlib/strtol.cpp b/libc/stdlib/strtol.cpp index e7dcfc5d..eda7a68f 100644 --- a/libc/stdlib/strtol.cpp +++ b/libc/stdlib/strtol.cpp @@ -25,15 +25,26 @@ #ifndef STRTOL #define STRTOL strtol #define STRTOL_INT long +#define STRTOL_UNSIGNED_INT unsigned long +#define STRTOL_INT_MIN LONG_MIN +#define STRTOL_INT_MAX LONG_MAX #define STRTOL_INT_IS_UNSIGNED false #endif +#include #include +#include #include #include +#include #include -static int Debase(char c) +// Nasty, nasty hack to get LLONG_* macros. +#define __STDC_VERSION__ 199901L +#include + +// Convert a character into a digit. +static int debase(char c) { if ( '0' <= c && c <= '9' ) return c - '0'; @@ -44,59 +55,171 @@ static int Debase(char c) return -1; } -template -INT ParseInteger(const char* str, char** endptr, int base) +// Determine whether a multiplication of two integers would overflow/underflow. +// This is easy if we have a larger integer type, otherwise we'll be creative. +template +static bool would_multiplication_overflow(T_INT a, T_INT b) +{ + // Prevent accidental divisons by zero in this simple case. + if ( !a || !b ) + return false; + + // Check if we have a 64-bit integer that it is large enough. + if ( sizeof(T_INT)*2 <= sizeof(int64_t) ) + { + int64_t ret_large = (int64_t) a * (int64_t) b; + return ret_large < (int64_t) T_INT_MIN || + (int64_t) T_INT_MAX < ret_large; + } + + // Check if we have a 128-bit integer that it is large enough. +#if 64 <= __WORDSIZE + if ( sizeof(T_INT)*2 <= sizeof(__int128) ) + { + __int128 ret_large = (__int128) a * (__int128) b; + return ret_large < (__int128) T_INT_MIN || + (__int128) T_INT_MAX < ret_large; + } +#endif + + // The fallback strategy is to determine the largest b given a that will not + // overflow and then see if b is within range. This is trivial if in the + // unsigned integer case. + if ( T_INT_IS_UNSIGNED ) + { + T_INT max_b = a / T_INT_MAX; + return max_b < b; + } + + // We have to deal with some cases for signed integers. We'll assume signed + // integers are in two's complement and use - (unsigned int) value to take + // the absolute value of a negative value as an unsigned integer in a manner + // that is defined in C. Whether we use the smallest or largest value depend + // on whether the sign of a and b is identical. + else + { + T_UNSIGNED_INT a_abs = a < 0 ? - (T_UNSIGNED_INT) a : a; + T_UNSIGNED_INT b_abs = b < 0 ? - (T_UNSIGNED_INT) b : b; + T_UNSIGNED_INT min_abs = - (T_UNSIGNED_INT) T_INT_MIN; + T_UNSIGNED_INT max_abs = T_INT_MAX; + T_UNSIGNED_INT limit_pos = (0 <= a && 0 <= b) || ( a < 0 && b < 0 ); + T_UNSIGNED_INT limit = limit_pos ? max_abs : min_abs; + T_UNSIGNED_INT max_b = a_abs / limit; + return max_b < b_abs; + } +} + +extern "C" +STRTOL_INT STRTOL(const char* restrict str, char** restrict endptr, int base) { const char* origstr = str; int origbase = base; - while ( isspace(*str) ) { str++; } + + // Skip any leading white space. + while ( isspace(*str) ) + str++; + + // Reject bad bases. if ( base < 0 || 36 < base ) { if ( endptr ) *endptr = (char*) str; - return 0; + return errno = EINVAL, 0; } - INT result = 0; + bool negative = false; char c = *str; - if ( !UNSIGNED && c == '-' ) + + // Handle a leading sign character. + if ( c == '-' ) str++, negative = true; - if ( !UNSIGNED && c == '+' ) + if ( c == '+' ) str++, negative = false; + + // Autodetect base 8 or base 16. if ( !base && str[0] == '0' ) { if ( str[1] == 'x' || str[1] == 'X' ) str += 2, base = 16; - else if ( 0 <= Debase(str[1]) && Debase(str[1]) < 8 ) + else if ( 0 <= debase(str[1]) && debase(str[1]) < 8 ) str++, base = 8; } + + // Default to base 10. if ( !base ) base = 10; + + // Skip the leading '0x' prefix in base 16 for hexadecimal integers. if ( origbase == 16 && str[0] == '0' && (str[1] == 'x' || str[1] == 'X') ) str += 2; + + // Determine what value will be returned on overflow/underflow. + STRTOL_INT overflow_value = negative && !STRTOL_INT_IS_UNSIGNED ? + STRTOL_INT_MIN : + STRTOL_INT_MAX; + + // Convert a single character at a time. + STRTOL_INT result = 0; size_t numconvertedchars = 0; + bool overflow_occured = false; while ( (c = *str ) ) { - int val = Debase(c); - if ( val < 0 ) + // Stop if we encounterd a character that doesn't fit in this base. + int val = debase(c); + if ( val < 0 || base <= val ) break; - if ( base <= val ) - break; - if ( !UNSIGNED && negative ) - val = -val; - // TODO: Detect overflow! - result = result * (INT) base + (INT) val; + + // Attempt to multiply the accumulator with the current base. + if ( would_multiplication_overflow + (result, (STRTOL_INT) base) ) + overflow_occured = true, result = overflow_value; + else + { + STRTOL_INT new_result = result * (STRTOL_INT) base; + assert(result <= new_result); + result = new_result; + } + + // Attempt to add the latest digit to the accumulator (positive). + if ( (STRTOL_INT_IS_UNSIGNED || !negative) && + (STRTOL_INT) val <= (STRTOL_INT) (STRTOL_INT_MAX - result) ) + result += (STRTOL_INT) val; + + // Attempt to subtract the latest digit to the accumulator (negative). + else if ( (!STRTOL_INT_IS_UNSIGNED && negative) && + (STRTOL_INT) val <= (STRTOL_INT) (result - STRTOL_INT_MIN) ) + result -= (STRTOL_INT) val; + + // Handle the case where the addition/subtract would overflow/underflow. + else + overflow_occured = true, result = overflow_value; + str++; numconvertedchars++; } + + // If no characters were successfully converted, rewind to the start, also + // rewinding past skipped whitespace and sign characters and such. if ( !numconvertedchars ) str = origstr, result = 0; + + // Let the caller know where we got to. if ( endptr ) *endptr = (char*) str; + + // Handle the special case where we are creating an unsigned integer and the + // string was negative and non-zero and no overflow occured, then we treat + // it as (the maximum value+1) minus (the negative string as integer). + if ( STRTOL_INT_IS_UNSIGNED && negative && result && !overflow_occured ) + result = STRTOL_INT_MAX - (result-1); + return result; } - -extern "C" STRTOL_INT STRTOL(const char* str, char** endptr, int base) -{ - return ParseInteger(str, endptr, base); -} diff --git a/libc/stdlib/strtoll.cpp b/libc/stdlib/strtoll.cpp index 7e1e34d9..8a395eb7 100644 --- a/libc/stdlib/strtoll.cpp +++ b/libc/stdlib/strtoll.cpp @@ -24,6 +24,9 @@ #define STRTOL strtoll #define STRTOL_INT long long +#define STRTOL_UNSIGNED_INT unsigned long long +#define STRTOL_INT_MIN LLONG_MIN +#define STRTOL_INT_MAX LLONG_MAX #define STRTOL_INT_IS_UNSIGNED true #include "strtol.cpp" diff --git a/libc/stdlib/strtoul.cpp b/libc/stdlib/strtoul.cpp index da0d0d11..24280b88 100644 --- a/libc/stdlib/strtoul.cpp +++ b/libc/stdlib/strtoul.cpp @@ -24,6 +24,9 @@ #define STRTOL strtoul #define STRTOL_INT unsigned long +#define STRTOL_UNSIGNED_INT unsigned long +#define STRTOL_INT_MIN 0 +#define STRTOL_INT_MAX ULONG_MAX #define STRTOL_INT_IS_UNSIGNED true #include "strtol.cpp" diff --git a/libc/stdlib/strtoull.cpp b/libc/stdlib/strtoull.cpp index 07092358..5b2b8c58 100644 --- a/libc/stdlib/strtoull.cpp +++ b/libc/stdlib/strtoull.cpp @@ -24,6 +24,9 @@ #define STRTOL strtoull #define STRTOL_INT unsigned long long +#define STRTOL_UNSIGNED_INT unsigned long long +#define STRTOL_INT_MIN 0 +#define STRTOL_INT_MAX ULLONG_MAX #define STRTOL_INT_IS_UNSIGNED true #include "strtol.cpp"