Add mbrtowc(3), mbtowc(3), wcrtomb(3), wctomb(3).

This commit is contained in:
Jonas 'Sortie' Termansen 2012-12-20 14:42:28 +01:00
parent e52c6c0966
commit 05219a27f2
7 changed files with 228 additions and 7 deletions

View File

@ -71,6 +71,7 @@ heap.o \
integer.o \
localtime.o \
localtime_r.o \
mbrtowc.o \
mbtowc.o \
memccpy.o \
memchr.o \
@ -114,6 +115,8 @@ timespec.o \
ungetc.o \
vfscanf.o \
vsscanf.o \
wcrtomb.o \
wctomb.o \
HOSTEDOBJS=\
access.o \

View File

@ -60,6 +60,7 @@ void free(void*);
long labs(long);
long long llabs(long long);
void* malloc(size_t);
int mbtowc(wchar_t *restrict, const char* restrict, size_t);
#if !defined(_SORTIX_SOURCE)
char* mktemp(char* templ);
#endif
@ -74,6 +75,7 @@ unsigned long strtoul(const char* restrict, char** restrict, int);
unsigned long long strtoull(const char* restrict, char** restrict, int);
long long strtoll(const char* restrict, char** restrict, int);
int unsetenv(const char*);
int wctomb(char*, wchar_t);
#if defined(_SORTIX_SOURCE) || defined(_WANT_SORTIX_ENV)
const char* const* getenviron(void);
@ -110,7 +112,6 @@ lldiv_t lldiv(long long, long long);
long lrand48(void);
int mblen(const char*, size_t);
size_t mbstowcs(wchar_t *restrict, const char* restrict, size_t);
int mbtowc(wchar_t *restrict, const char* restrict, size_t);
char* mkdtemp(char*);
int mkstemp(char*);
long mrand48(void);
@ -132,7 +133,6 @@ long double strtold(const char* restrict, char** restrict);
int system(const char*);
int unlockpt(int);
size_t wcstombs(char* restrict, const wchar_t *restrict, size_t);
int wctomb(char*, wchar_t);
#if __POSIX_OBSOLETE <= 200801
int rand_r(unsigned *);

View File

@ -61,6 +61,9 @@ __BEGIN_DECLS
struct tm;
size_t wcrtomb(char* restrict, wchar_t, mbstate_t* restrict);
size_t mbrtowc(wchar_t* restrict, const char* restrict, size_t, mbstate_t* restrict);
/* TODO: These are not implemented in sortix libc yet. */
#if defined(__SORTIX_SHOW_UNIMPLEMENTED)
double wcstod(const wchar_t* restrict, wchar_t** restrict);
@ -92,9 +95,7 @@ long double wcstold(const wchar_t* restrict, wchar_t** restrict);
long long wcstoll(const wchar_t* restrict, wchar_t** restrict, int);
long wcstol(const wchar_t* restrict, wchar_t** restrict, int);
size_t mbrlen(const char* restrict, size_t, mbstate_t* restrict);
size_t mbrtowc(wchar_t* restrict, const char* restrict, size_t, mbstate_t* restrict);
size_t mbsrtowcs(wchar_t* restrict, const char** restrict, size_t, mbstate_t* restrict);
size_t wcrtomb(char* restrict, wchar_t, mbstate_t* restrict);
size_t wcscspn(const wchar_t*, const wchar_t*);
size_t wcsftime(wchar_t* restrict, size_t, const wchar_t* restrict, const struct tm* restrict);
size_t wcslen(const wchar_t*);

105
libc/mbrtowc.cpp Normal file
View File

@ -0,0 +1,105 @@
/*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2012.
This file is part of the Sortix C Library.
The Sortix C Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
The Sortix C Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
mbrtowc.cpp
Convert a multibyte sequence to a wide character.
*******************************************************************************/
#include <errno.h>
#include <stdint.h>
#include <wchar.h>
extern "C"
size_t mbrtowc(wchar_t* restrict pwc, const char* restrict s, size_t n,
mbstate_t* restrict /*ps*/)
{
if ( !s )
{
// TODO: Restore ps to initial state if currently valid.
return 0;
}
uint8_t* buf = (uint8_t*) s;
wchar_t ret = 0;
size_t numbytes = 0;
size_t sequence_len = 1;
while ( numbytes < sequence_len )
{
if ( numbytes == n )
{
// TODO: Support restore through the mbstate_t!
return (size_t) -2;
}
uint8_t b = buf[numbytes++];
bool is_continuation = b >> (8-2) == 0b10;
if ( 1 == numbytes && is_continuation )
return errno = EILSEQ, (size_t) -1;
if ( 2 <= numbytes && !is_continuation )
return errno = EILSEQ, (size_t) -1;
wchar_t new_bits;
size_t new_bits_num;
if ( b >> (8-1) == 0b0 )
new_bits = b & 0b01111111,
new_bits_num = 7,
sequence_len = 1;
else if ( b >> (8-2) == 0b10 )
new_bits = b & 0b00111111,
new_bits_num = 6,
sequence_len = 2;
else if ( b >> (8-3) == 0b110 )
new_bits = b & 0b00011111,
new_bits_num = 5,
sequence_len = 3;
else if ( b >> (8-4) == 0b1110 )
new_bits = b & 0b00001111,
new_bits_num = 4,
sequence_len = 4;
else if ( b >> (8-5) == 0b11110 )
new_bits = b & 0b00000111,
new_bits_num = 3,
sequence_len = 5;
else if ( b >> (8-6) == 0b111110 )
new_bits = b & 0b00000011,
new_bits_num = 2,
sequence_len = 6;
else if ( b >> (8-7) == 0b1111110 )
new_bits = b & 0b00000001,
new_bits_num = 1,
sequence_len = 7;
else
return errno = EILSEQ, (size_t) -1;
ret = ret >> new_bits_num | new_bits;
}
if ( !ret )
{
// TODO: Reset ps to initial state.
return 0;
}
if ( (numbytes == 2 && ret <= 0x007F) ||
(numbytes == 3 && ret <= 0x07FF) ||
(numbytes == 4 && ret <= 0xFFFF) ||
(numbytes == 5 && ret <= 0x1FFFFF) ||
(numbytes == 6 && ret <= 0x3FFFFFF) )
return errno = EILSEQ, (size_t) -1;
if ( pwc )
*pwc = ret;
return numbytes;
}

View File

@ -24,9 +24,10 @@
#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
extern "C" int mbtowc(wchar_t* /*pwd*/, const char* /*s*/, size_t /*n*/)
// TODO: This function is unpure and should be removed.
extern "C" int mbtowc(wchar_t* pwd, const char* s, size_t n)
{
fprintf(stderr, "mbtowc(3) is not implemented\n");
abort();
return mbrtowc(pwd, s, n, NULL);
}

79
libc/wcrtomb.cpp Normal file
View File

@ -0,0 +1,79 @@
/*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2012.
This file is part of the Sortix C Library.
The Sortix C Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
The Sortix C Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
wcrtomb.cpp
Convert a wide character to a multibyte sequence.
*******************************************************************************/
#include <errno.h>
#include <stdint.h>
#include <wchar.h>
extern "C"
size_t wcrtomb(char* restrict s, wchar_t wc, mbstate_t* restrict /*ps*/)
{
if ( !wc )
{
if ( s )
*s = '\0';
return 1;
}
uint32_t unicode = wc;
uint8_t* buf = (uint8_t*) s;
unsigned bytes = 1;
unsigned bits = 7;
if ( (1U<<7U) <= unicode ) { bytes = 2; bits = 11; }
if ( (1U<<11U) <= unicode ) { bytes = 3; bits = 16; }
if ( (1U<<16U) <= unicode ) { bytes = 4; bits = 21; }
if ( (1U<<21U) <= unicode ) { bytes = 5; bits = 26; }
if ( (1U<<26U) <= unicode ) { bytes = 6; bits = 31; }
if ( (1U<<31U) <= unicode ) { errno = EILSEQ; return (size_t) -1; }
if ( !s )
return bytes;
uint8_t prefix;
unsigned prefixavai;
switch ( bytes )
{
case 1: prefixavai = 7; prefix = 0b0U << prefixavai; break;
case 2: prefixavai = 5; prefix = 0b110U << prefixavai; break;
case 3: prefixavai = 4; prefix = 0b1110U << prefixavai; break;
case 4: prefixavai = 3; prefix = 0b11110U << prefixavai; break;
case 5: prefixavai = 2; prefix = 0b111110U << prefixavai; break;
case 6: prefixavai = 1; prefix = 0b1111110U << prefixavai; break;
}
// Put the first bits in the unused area of the prefix.
prefix |= unicode >> (bits - prefixavai);
*buf++ = prefix;
unsigned bitsleft = bits - prefixavai;
while ( bitsleft )
{
bitsleft -= 6;
uint8_t elembits = (unicode>>bitsleft) & ((1U<<6U)-1U);
uint8_t elem = (0b10U<<6U) | elembits;
*buf++ = elem;
}
return bytes;
}

32
libc/wctomb.cpp Normal file
View File

@ -0,0 +1,32 @@
/*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2012.
This file is part of the Sortix C Library.
The Sortix C Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
The Sortix C Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
wctomb.cpp
Convert a wide character to a multibyte sequence.
*******************************************************************************/
#include <stdlib.h>
#include <wchar.h>
// TODO: This function is unpure and should be removed.
extern "C" int wctomb(char* s, wchar_t wc)
{
return wcrtomb(s, wc, NULL);
}