From 1986bc2ba2d78f07c122a2bce5011c424bfad920 Mon Sep 17 00:00:00 2001 From: Jonas 'Sortie' Termansen Date: Sun, 19 Jan 2014 17:27:36 +0100 Subject: [PATCH] Add vscanf_callback(3). --- libc/Makefile | 1 + libc/include/stdio.h | 5 + libc/stdio/vfscanf.cpp | 291 +---------------------------- libc/stdio/vscanf_callback.cpp | 322 +++++++++++++++++++++++++++++++++ libc/stdio/vsscanf.cpp | 54 ++++-- 5 files changed, 370 insertions(+), 303 deletions(-) create mode 100644 libc/stdio/vscanf_callback.cpp diff --git a/libc/Makefile b/libc/Makefile index 3bdc55cd..0008e95d 100644 --- a/libc/Makefile +++ b/libc/Makefile @@ -115,6 +115,7 @@ stdio/ungetc.o \ stdio/vdprintf.o \ stdio/vfscanf.o \ stdio/vprintf_callback.o \ +stdio/vscanf_callback.o \ stdio/vsnprintf.o \ stdio/vsprintf.o \ stdio/vsscanf.o \ diff --git a/libc/include/stdio.h b/libc/include/stdio.h index f19c5250..0e27237c 100644 --- a/libc/include/stdio.h +++ b/libc/include/stdio.h @@ -207,6 +207,11 @@ size_t vprintf_callback(size_t (*callback)(void*, const char*, size_t), void* user, const char* __restrict format, __gnuc_va_list ap); +int vscanf_callback(void* fp, + int (*fgetc)(void*), + int (*ungetc)(int, void*), + const char* __restrict format, + __gnuc_va_list ap); #endif __END_DECLS diff --git a/libc/stdio/vfscanf.cpp b/libc/stdio/vfscanf.cpp index 6c614b20..51898680 100644 --- a/libc/stdio/vfscanf.cpp +++ b/libc/stdio/vfscanf.cpp @@ -1,6 +1,6 @@ /******************************************************************************* - Copyright(C) Jonas 'Sortie' Termansen 2012. + Copyright(C) Jonas 'Sortie' Termansen 2012, 2014. This file is part of the Sortix C Library. @@ -22,296 +22,19 @@ *******************************************************************************/ -#define __STDC_LIMIT_MACROS -#include -#include -#include -#include #include -#include -enum scanmode +static int wrap_fgetc(void* fp) { - MODE_INIT, - MODE_CONVSPEC, - MODE_SCANINT, - MODE_SCANINT_REAL, - MODE_SCANSTRING, - MODE_SCANSTRING_REAL, -}; - -enum scantype -{ - TYPE_SHORT, - TYPE_SHORTSHORT, - TYPE_INT, - TYPE_LONG, - TYPE_LONGLONG, - TYPE_SIZE, - TYPE_PTRDIFF, - TYPE_MAX, -}; - -static bool IsTypeModifier(char c) -{ - return c == 'h' || c == 'j' || c == 'l' || c == 'L' || c == 't' || c == 'z'; + return fgetc((FILE*) fp); } -static int debase(char c, int base) +static int wrap_ungetc(int c, void* fp) { - if ( c == '0' ) - return 0; - int ret = -1; - if ( '0' <= c && c <= '9' ) { ret = c - '0' + 0; } - if ( 'a' <= c && c <= 'f' ) { ret = c - 'a' + 10; } - if ( 'A' <= c && c <= 'F' ) { ret = c - 'A' + 10; } - if ( base <= ret ) - return -1; - return ret; + return ungetc(c, (FILE*) fp); } -extern "C" int vfscanf(FILE* fp, const char* origformat, va_list ap) +extern "C" int vfscanf(FILE* fp, const char* format, va_list ap) { - union { const char* format; const unsigned char* formatuc; }; - format = origformat; - int matcheditems = 0; - size_t fieldwidth = 0; - bool escaped = false; - bool discard = false; - bool negint = false; - bool intunsigned = false; - bool leadingzero = false; - bool hasprefix = false; - bool string = false; - size_t intparsed = 0; - uintmax_t intvalue = 0; - int ic; - int base = 0; - int cval; - const size_t UNDO_MAX = 4; - int undodata[UNDO_MAX]; - size_t undoable = 0; - size_t strwritten = 0; - char* strdest = NULL; - enum scantype scantype = TYPE_INT; - enum scanmode scanmode = MODE_INIT; - while ( true ) - { - ic = fgetc(fp); - unsigned char uc = ic; char c = uc; - switch (scanmode) - { - case MODE_INIT: - if ( !*format ) - goto break_loop; - if ( isspace(*formatuc) ) - { - if ( isspace(ic) ) - continue; - else - do format++; - while ( isspace(*formatuc) ); - } - if ( *format == '%' && !escaped ) - { - format++; - scanmode = MODE_CONVSPEC; - ungetc(ic, fp); - continue; - } - escaped = false; - if ( *format != c ) { ungetc(ic, fp); goto break_loop; } - format++; - break; - case MODE_CONVSPEC: - discard = false; - if ( *format == '*' ) { discard = true; format++; } - fieldwidth = 0; - while ( '0'<= *format && *format <= '9' ) - fieldwidth = fieldwidth * 10 + *format++ - '0'; - scantype = TYPE_INT; - while ( IsTypeModifier(*format) ) - switch ( *format++ ) - { - case 'h': scantype = scantype == TYPE_SHORT ? TYPE_SHORTSHORT - : TYPE_SHORT; break; - case 'j': scantype = TYPE_MAX; break; - case 'l': scantype = scantype == TYPE_LONG ? TYPE_LONGLONG - : TYPE_LONG; break; - case 'L': scantype = TYPE_LONGLONG; break; - case 't': scantype = TYPE_PTRDIFF; break; - case 'z': scantype = TYPE_SIZE; break; - } - - switch ( char convc = *format++ ) - { - case '%': - escaped = true; - default: - fprintf(stderr, "Warning: scanf does not support %c (%i)\n", - convc, convc); - fprintf(stderr, "Bailing out to prevent problems.\n"); - errno = ENOTSUP; - return -1; - continue; - case 'd': - base = 10; scanmode = MODE_SCANINT; intunsigned = false; break; - case 'i': - base = 0; scanmode = MODE_SCANINT; intunsigned = false; break; - case 'o': - base = 0; scanmode = MODE_SCANINT; intunsigned = true; break; - case 'u': - base = 10; scanmode = MODE_SCANINT; intunsigned = true; break; - case 'x': - case 'X': - base = 16; scanmode = MODE_SCANINT; intunsigned = true; break; - case 'c': - string = false; scanmode = MODE_SCANSTRING; break; - case 's': - string = true; scanmode = MODE_SCANSTRING; break; - } - ungetc(ic, fp); - continue; - case MODE_SCANINT: - intparsed = 0; - intvalue = 0; - leadingzero = false; - negint = false; - hasprefix = false; - undoable = 0; - scanmode = MODE_SCANINT_REAL; - case MODE_SCANINT_REAL: - if ( fieldwidth ) - { - fprintf(stderr, "Error: field width not supported for integers in scanf.\n"); - errno = ENOTSUP; - return -1; - } - if ( !undoable && isspace(ic) ) - continue; - if ( undoable < UNDO_MAX ) - undodata[undoable++] = ic; - if ( c == '-' && !intunsigned && !negint ) - { - negint = true; - continue; - } - if ( !intparsed && c == '0' && !hasprefix && - (!base || base == 8 || base == 16) && !leadingzero ) - leadingzero = true; - if ( intparsed == 1 && (c == 'x' || c == 'X') && !hasprefix && - (!base || base == 16) && leadingzero ) - { - base = 16; - leadingzero = false; - hasprefix = true; - intparsed = 0; - continue; - } - else if ( intparsed == 1 && '1' <= c && c <= '7' && !hasprefix && - (!base || base == 8) && leadingzero ) - { - base = 8; - hasprefix = true; - leadingzero = false; - } - else if ( !intparsed && '0' <= c && c <= '9' && !hasprefix && - (!base || base == 10) && !leadingzero ) - { - base = 10; - leadingzero = false; - hasprefix = true; - } - cval = debase(c, base); - if ( cval < 0 ) - { - if ( !intparsed ) - { - while ( undoable ) - ungetc(undodata[--undoable], fp); - goto break_loop; - } - scanmode = MODE_INIT; - undoable = 0; - ungetc(ic, fp); - if ( discard ) { discard = false; continue; } - uintmax_t uintmaxval = intvalue; - // TODO: Possible truncation of INTMAX_MIN! - intmax_t intmaxval = uintmaxval; - if ( negint ) intmaxval = -intmaxval; - bool un = intunsigned; - switch ( scantype ) - { - case TYPE_SHORTSHORT: - if ( un ) *va_arg(ap, unsigned char*) = uintmaxval; - else *va_arg(ap, signed char*) = intmaxval; - break; - case TYPE_SHORT: - if ( un ) *va_arg(ap, unsigned short*) = uintmaxval; - else *va_arg(ap, signed short*) = intmaxval; - break; - case TYPE_INT: - if ( un ) *va_arg(ap, unsigned int*) = uintmaxval; - else *va_arg(ap, signed int*) = intmaxval; - break; - case TYPE_LONG: - if ( un ) *va_arg(ap, unsigned long*) = uintmaxval; - else *va_arg(ap, signed long*) = intmaxval; - break; - case TYPE_LONGLONG: - if ( un ) *va_arg(ap, unsigned long long*) = uintmaxval; - else *va_arg(ap, signed long long*) = intmaxval; - break; - case TYPE_PTRDIFF: - *va_arg(ap, ptrdiff_t*) = intmaxval; - break; - case TYPE_SIZE: - if ( un ) *va_arg(ap, size_t*) = uintmaxval; - else *va_arg(ap, ssize_t*) = intmaxval; - break; - case TYPE_MAX: - if ( un ) *va_arg(ap, uintmax_t*) = uintmaxval; - else *va_arg(ap, intmax_t*) = intmaxval; - break; - } - matcheditems++; - continue; - } - intvalue = intvalue * (uintmax_t) base + (uintmax_t) cval; - intparsed++; - continue; - case MODE_SCANSTRING: - if ( !fieldwidth ) - fieldwidth = string ? SIZE_MAX : 1; - scanmode = MODE_SCANSTRING_REAL; - strwritten = 0; - strdest = discard ? NULL : va_arg(ap, char*); - case MODE_SCANSTRING_REAL: - if ( string && !strwritten && isspace(ic) ) - continue; - if ( string && strwritten && - (ic == EOF || isspace(ic) || strwritten == fieldwidth) ) - { - ungetc(ic, fp); - if ( !discard ) - strdest[strwritten] = '\0'; - matcheditems++; - scanmode = MODE_INIT; - continue; - } - if ( !string && strwritten == fieldwidth ) - { - ungetc(ic, fp); - scanmode = MODE_INIT; - continue; - } - if ( ic == EOF ) - goto break_loop; - if ( !discard ) - strdest[strwritten++] = c; - continue; - } - } -break_loop: - return matcheditems; + return vscanf_callback(fp, wrap_fgetc, wrap_ungetc, format, ap); } diff --git a/libc/stdio/vscanf_callback.cpp b/libc/stdio/vscanf_callback.cpp new file mode 100644 index 00000000..3adbd48e --- /dev/null +++ b/libc/stdio/vscanf_callback.cpp @@ -0,0 +1,322 @@ +/******************************************************************************* + + Copyright(C) Jonas 'Sortie' Termansen 2012, 2014. + + This file is part of the Sortix C Library. + + The Sortix C Library is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + The Sortix C Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the Sortix C Library. If not, see . + + stdio/vscanf_callback.cpp + Input format conversion. + +*******************************************************************************/ + +#define __STDC_LIMIT_MACROS +#include +#include +#include +#include +#include +#include + +enum scanmode +{ + MODE_INIT, + MODE_CONVSPEC, + MODE_SCANINT, + MODE_SCANINT_REAL, + MODE_SCANSTRING, + MODE_SCANSTRING_REAL, +}; + +enum scantype +{ + TYPE_SHORT, + TYPE_SHORTSHORT, + TYPE_INT, + TYPE_LONG, + TYPE_LONGLONG, + TYPE_SIZE, + TYPE_PTRDIFF, + TYPE_MAX, +}; + +static bool IsTypeModifier(char c) +{ + return c == 'h' || c == 'j' || c == 'l' || c == 'L' || c == 't' || c == 'z'; +} + +static int debase(char c, int base) +{ + if ( c == '0' ) + return 0; + int ret = -1; + if ( '0' <= c && c <= '9' ) { ret = c - '0' + 0; } + if ( 'a' <= c && c <= 'f' ) { ret = c - 'a' + 10; } + if ( 'A' <= c && c <= 'F' ) { ret = c - 'A' + 10; } + if ( base <= ret ) + return -1; + return ret; +} + +extern "C" +int vscanf_callback(void* fp, + int (*fgetc)(void*), + int (*ungetc)(int, void*), + const char* restrict origformat, + va_list ap) +{ + union { const char* format; const unsigned char* formatuc; }; + format = origformat; + int matcheditems = 0; + size_t fieldwidth = 0; + bool escaped = false; + bool discard = false; + bool negint = false; + bool intunsigned = false; + bool leadingzero = false; + bool hasprefix = false; + bool string = false; + size_t intparsed = 0; + uintmax_t intvalue = 0; + int ic; + int base = 0; + int cval; + const size_t UNDO_MAX = 4; + int undodata[UNDO_MAX]; + size_t undoable = 0; + size_t strwritten = 0; + char* strdest = NULL; + enum scantype scantype = TYPE_INT; + enum scanmode scanmode = MODE_INIT; + while ( true ) + { + ic = fgetc(fp); + unsigned char uc = ic; char c = uc; + switch (scanmode) + { + case MODE_INIT: + if ( !*format ) + goto break_loop; + if ( isspace(*formatuc) ) + { + if ( isspace(ic) ) + continue; + else + do format++; + while ( isspace(*formatuc) ); + } + if ( *format == '%' && !escaped ) + { + format++; + scanmode = MODE_CONVSPEC; + ungetc(ic, fp); + continue; + } + escaped = false; + if ( *format != c ) { ungetc(ic, fp); goto break_loop; } + format++; + break; + case MODE_CONVSPEC: + discard = false; + if ( *format == '*' ) { discard = true; format++; } + fieldwidth = 0; + while ( '0'<= *format && *format <= '9' ) + fieldwidth = fieldwidth * 10 + *format++ - '0'; + scantype = TYPE_INT; + while ( IsTypeModifier(*format) ) + switch ( *format++ ) + { + case 'h': scantype = scantype == TYPE_SHORT ? TYPE_SHORTSHORT + : TYPE_SHORT; break; + case 'j': scantype = TYPE_MAX; break; + case 'l': scantype = scantype == TYPE_LONG ? TYPE_LONGLONG + : TYPE_LONG; break; + case 'L': scantype = TYPE_LONGLONG; break; + case 't': scantype = TYPE_PTRDIFF; break; + case 'z': scantype = TYPE_SIZE; break; + } + + switch ( char convc = *format++ ) + { + case '%': + escaped = true; + default: + fprintf(stderr, "Warning: scanf does not support %c (%i)\n", + convc, convc); + fprintf(stderr, "Bailing out to prevent problems.\n"); + errno = ENOTSUP; + return -1; + continue; + case 'd': + base = 10; scanmode = MODE_SCANINT; intunsigned = false; break; + case 'i': + base = 0; scanmode = MODE_SCANINT; intunsigned = false; break; + case 'o': + base = 0; scanmode = MODE_SCANINT; intunsigned = true; break; + case 'u': + base = 10; scanmode = MODE_SCANINT; intunsigned = true; break; + case 'x': + case 'X': + base = 16; scanmode = MODE_SCANINT; intunsigned = true; break; + case 'c': + string = false; scanmode = MODE_SCANSTRING; break; + case 's': + string = true; scanmode = MODE_SCANSTRING; break; + } + ungetc(ic, fp); + continue; + case MODE_SCANINT: + intparsed = 0; + intvalue = 0; + leadingzero = false; + negint = false; + hasprefix = false; + undoable = 0; + scanmode = MODE_SCANINT_REAL; + case MODE_SCANINT_REAL: + if ( fieldwidth ) + { + fprintf(stderr, "Error: field width not supported for integers in scanf.\n"); + errno = ENOTSUP; + return -1; + } + if ( !undoable && isspace(ic) ) + continue; + if ( undoable < UNDO_MAX ) + undodata[undoable++] = ic; + if ( c == '-' && !intunsigned && !negint ) + { + negint = true; + continue; + } + if ( !intparsed && c == '0' && !hasprefix && + (!base || base == 8 || base == 16) && !leadingzero ) + leadingzero = true; + if ( intparsed == 1 && (c == 'x' || c == 'X') && !hasprefix && + (!base || base == 16) && leadingzero ) + { + base = 16; + leadingzero = false; + hasprefix = true; + intparsed = 0; + continue; + } + else if ( intparsed == 1 && '1' <= c && c <= '7' && !hasprefix && + (!base || base == 8) && leadingzero ) + { + base = 8; + hasprefix = true; + leadingzero = false; + } + else if ( !intparsed && '0' <= c && c <= '9' && !hasprefix && + (!base || base == 10) && !leadingzero ) + { + base = 10; + leadingzero = false; + hasprefix = true; + } + cval = debase(c, base); + if ( cval < 0 ) + { + if ( !intparsed ) + { + while ( undoable ) + ungetc(undodata[--undoable], fp); + goto break_loop; + } + scanmode = MODE_INIT; + undoable = 0; + ungetc(ic, fp); + if ( discard ) { discard = false; continue; } + uintmax_t uintmaxval = intvalue; + // TODO: Possible truncation of INTMAX_MIN! + intmax_t intmaxval = uintmaxval; + if ( negint ) intmaxval = -intmaxval; + bool un = intunsigned; + switch ( scantype ) + { + case TYPE_SHORTSHORT: + if ( un ) *va_arg(ap, unsigned char*) = uintmaxval; + else *va_arg(ap, signed char*) = intmaxval; + break; + case TYPE_SHORT: + if ( un ) *va_arg(ap, unsigned short*) = uintmaxval; + else *va_arg(ap, signed short*) = intmaxval; + break; + case TYPE_INT: + if ( un ) *va_arg(ap, unsigned int*) = uintmaxval; + else *va_arg(ap, signed int*) = intmaxval; + break; + case TYPE_LONG: + if ( un ) *va_arg(ap, unsigned long*) = uintmaxval; + else *va_arg(ap, signed long*) = intmaxval; + break; + case TYPE_LONGLONG: + if ( un ) *va_arg(ap, unsigned long long*) = uintmaxval; + else *va_arg(ap, signed long long*) = intmaxval; + break; + case TYPE_PTRDIFF: + *va_arg(ap, ptrdiff_t*) = intmaxval; + break; + case TYPE_SIZE: + if ( un ) *va_arg(ap, size_t*) = uintmaxval; + else *va_arg(ap, ssize_t*) = intmaxval; + break; + case TYPE_MAX: + if ( un ) *va_arg(ap, uintmax_t*) = uintmaxval; + else *va_arg(ap, intmax_t*) = intmaxval; + break; + } + matcheditems++; + continue; + } + intvalue = intvalue * (uintmax_t) base + (uintmax_t) cval; + intparsed++; + continue; + case MODE_SCANSTRING: + if ( !fieldwidth ) + fieldwidth = string ? SIZE_MAX : 1; + scanmode = MODE_SCANSTRING_REAL; + strwritten = 0; + strdest = discard ? NULL : va_arg(ap, char*); + case MODE_SCANSTRING_REAL: + if ( string && !strwritten && isspace(ic) ) + continue; + if ( string && strwritten && + (ic == EOF || isspace(ic) || strwritten == fieldwidth) ) + { + ungetc(ic, fp); + if ( !discard ) + strdest[strwritten] = '\0'; + matcheditems++; + scanmode = MODE_INIT; + continue; + } + if ( !string && strwritten == fieldwidth ) + { + ungetc(ic, fp); + scanmode = MODE_INIT; + continue; + } + if ( ic == EOF ) + goto break_loop; + if ( !discard ) + strdest[strwritten++] = c; + continue; + } + } +break_loop: + return matcheditems; +} diff --git a/libc/stdio/vsscanf.cpp b/libc/stdio/vsscanf.cpp index 49ef305c..6061b273 100644 --- a/libc/stdio/vsscanf.cpp +++ b/libc/stdio/vsscanf.cpp @@ -1,6 +1,6 @@ /******************************************************************************* - Copyright(C) Jonas 'Sortie' Termansen 2012. + Copyright(C) Jonas 'Sortie' Termansen 2012, 2014. This file is part of the Sortix C Library. @@ -22,26 +22,42 @@ *******************************************************************************/ -#include -#include +#include #include -#include -#include + +struct vsscanf_input +{ + union + { + const char* str; + const unsigned char* ustr; + }; + size_t offset; +}; + +static int vsscanf_fgetc(void* fp) +{ + struct vsscanf_input* input = (struct vsscanf_input*) fp; + if ( !input->ustr[input->offset] ) + return EOF; + return (int) input->ustr[input->offset++]; +} + +static int vsscanf_ungetc(int c, void* fp) +{ + struct vsscanf_input* input = (struct vsscanf_input*) fp; + if ( c == EOF && !input->ustr[input->offset] ) + return c; + assert(input->offset); + input->offset--; + assert(input->ustr[input->offset] == (unsigned char) c); + return c; +} extern "C" int vsscanf(const char* str, const char* format, va_list ap) { - const char* filename = "/ugly-vsscanf-hack"; - FILE* fp = fopen(filename, "w+"); - if ( !fp ) - return -1; - int ret = -1; - size_t len = strlen(str); - if ( fwrite(str, sizeof(char), len, fp) == len ) - if ( fseek(fp, 0, SEEK_SET) == 0 ) - ret = vfscanf(fp, format, ap); - int savederrno = errno; - fclose(fp); - unlink(filename); - errno = savederrno; - return ret; + struct vsscanf_input input; + input.str = str; + input.offset = 0; + return vscanf_callback(&input, vsscanf_fgetc, vsscanf_ungetc, format, ap); }