Add mbrlen(3).

This commit is contained in:
Jonas 'Sortie' Termansen 2013-04-22 10:06:44 +02:00
parent 12947b0bdf
commit bf66d5bb76
3 changed files with 88 additions and 2 deletions

View File

@ -78,6 +78,7 @@ ldiv.o \
lldiv.o \
localtime.o \
localtime_r.o \
mbrlen.o \
mbrtowc.o \
mbsrtowcs.o \
mbstowcs.o \
@ -94,8 +95,8 @@ setbuf.o \
setvbuf.o \
sigaddset.o \
sigdelset.o \
sigfillset.o \
sigemptyset.o \
sigfillset.o \
sigismember.o \
sort.o \
sprint.o \

View File

@ -63,6 +63,7 @@ struct tm;
size_t mbsrtowcs(wchar_t* __restrict, const char** __restrict, size_t, mbstate_t* __restrict);
size_t wcrtomb(char* __restrict, wchar_t, mbstate_t* __restrict);
size_t mbrlen(const char* __restrict, size_t, mbstate_t* __restrict);
size_t mbrtowc(wchar_t* __restrict, const char* __restrict, size_t, mbstate_t* __restrict);
wchar_t* wcscat(wchar_t* __restrict, const wchar_t* __restrict);
wchar_t* wcschr(const wchar_t*, wchar_t);
@ -107,7 +108,6 @@ int wscanf(const wchar_t* __restrict, ...);
long double wcstold(const wchar_t* __restrict, wchar_t** __restrict);
long long wcstoll(const wchar_t* __restrict, wchar_t** __restrict, int);
long wcstol(const wchar_t* __restrict, wchar_t** __restrict, int);
size_t mbrlen(const char* __restrict, size_t, mbstate_t* __restrict);
size_t wcsftime(wchar_t* __restrict, size_t, const wchar_t* __restrict, const struct tm* __restrict);
size_t wcsxfrm(wchar_t* __restrict, const wchar_t* __restrict, size_t);
unsigned long long wcstoull(const wchar_t* __restrict, wchar_t** __restrict, int);

85
libc/mbrlen.cpp Normal file
View File

@ -0,0 +1,85 @@
/*******************************************************************************
Copyright(C) Jonas 'Sortie' Termansen 2013.
This file is part of the Sortix C Library.
The Sortix C Library is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
The Sortix C Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
mbrlen.cpp
Determine number of bytes in next multibyte character.
*******************************************************************************/
#include <errno.h>
#include <string.h>
#include <wchar.h>
static size_t utf8_header_length(unsigned char uc)
{
if ( (uc & 0b11000000) == 0b10000000 )
return 0;
if ( (uc & 0b10000000) == 0b00000000 )
return 1;
if ( (uc & 0b11100000) == 0b11000000 )
return 2;
if ( (uc & 0b11110000) == 0b11100000 )
return 3;
if ( (uc & 0b11111000) == 0b11110000 )
return 4;
if ( (uc & 0b11111100) == 0b11111000 )
return 5;
if ( (uc & 0b11111110) == 0b11111100 )
return 6;
return (size_t) -1;
}
// TODO: Use the shift state.
extern "C"
size_t mbrlen(const char* restrict s, size_t n, mbstate_t* restrict ps)
{
size_t expected_length;
for ( size_t i = 0; i < n; i++ )
{
unsigned char uc = (unsigned char) s[i];
if ( i == 0 )
{
if ( !uc )
{
memset(ps, 0, sizeof(*ps));
return 0;
}
if ( (expected_length = utf8_header_length(uc)) == (size_t) -1 )
return errno = EILSEQ, (size_t) -1;
// Check if we encounted an unexpected character claiming to be in
// the middle of a UTF-8 multibyte sequence (10xxxxxx).
if ( expected_length == 0 )
// TODO: Should we play catch up with the partial sequence?
return errno = EILSEQ, (size_t) -1;
}
// All non-header bytes should be of the form 10xxxxxx.
if ( 0 < i && expected_length < n && (uc & 0b11000000) != 0b10000000 )
return errno = EILSEQ, (size_t) -1;
if ( i + 1 == expected_length )
return i + 1;
}
return (size_t) -2;
}