Add mbrlen(3).

2013-04-22 10:06:44 +02:00 · 2013-04-22 10:06:44 +02:00 · bf66d5bb76
parent 12947b0bdf
commit bf66d5bb76
3 changed files with 88 additions and 2 deletions
--- a/libc/Makefile
+++ b/libc/Makefile
@ -78,6 +78,7 @@ ldiv.o \
 lldiv.o \
 localtime.o \
 localtime_r.o \
 mbrlen.o \
 mbrtowc.o \
 mbsrtowcs.o \
 mbstowcs.o \
@ -94,8 +95,8 @@ setbuf.o \
 setvbuf.o \
 sigaddset.o \
 sigdelset.o \
 sigfillset.o \
 sigemptyset.o \
 sigfillset.o \
 sigismember.o \
 sort.o \
 sprint.o \
--- a/libc/include/wchar.h
+++ b/libc/include/wchar.h
@ -63,6 +63,7 @@ struct tm;
 size_t mbsrtowcs(wchar_t* __restrict, const char** __restrict, size_t, mbstate_t* __restrict);
 size_t wcrtomb(char* __restrict, wchar_t, mbstate_t* __restrict);
 size_t mbrlen(const char* __restrict, size_t, mbstate_t* __restrict);
 size_t mbrtowc(wchar_t* __restrict, const char* __restrict, size_t, mbstate_t* __restrict);
 wchar_t* wcscat(wchar_t* __restrict, const wchar_t* __restrict);
 wchar_t* wcschr(const wchar_t*, wchar_t);
@ -107,7 +108,6 @@ int wscanf(const wchar_t* __restrict, ...);
 long double wcstold(const wchar_t* __restrict, wchar_t** __restrict);
 long long wcstoll(const wchar_t* __restrict, wchar_t** __restrict, int);
 long wcstol(const wchar_t* __restrict, wchar_t** __restrict, int);
 size_t mbrlen(const char* __restrict, size_t, mbstate_t* __restrict);
 size_t wcsftime(wchar_t* __restrict, size_t, const wchar_t* __restrict, const struct tm* __restrict);
 size_t wcsxfrm(wchar_t* __restrict, const wchar_t* __restrict, size_t);
 unsigned long long wcstoull(const wchar_t* __restrict, wchar_t** __restrict, int);
--- a/libc/mbrlen.cpp
+++ b/libc/mbrlen.cpp
@ -0,0 +1,85 @@
 /*******************************************************************************
    Copyright(C) Jonas 'Sortie' Termansen 2013.
    This file is part of the Sortix C Library.
    The Sortix C Library is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation, either version 3 of the License, or (at your
    option) any later version.
    The Sortix C Library is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
    License for more details.
    You should have received a copy of the GNU Lesser General Public License
    along with the Sortix C Library. If not, see <http://www.gnu.org/licenses/>.
    mbrlen.cpp
    Determine number of bytes in next multibyte character.
 *******************************************************************************/
 #include <errno.h>
 #include <string.h>
 #include <wchar.h>
 static size_t utf8_header_length(unsigned char uc)
 {
 	if ( (uc & 0b11000000) == 0b10000000 )
 		return 0;
 	if ( (uc & 0b10000000) == 0b00000000 )
 		return 1;
 	if ( (uc & 0b11100000) == 0b11000000 )
 		return 2;
 	if ( (uc & 0b11110000) == 0b11100000 )
 		return 3;
 	if ( (uc & 0b11111000) == 0b11110000 )
 		return 4;
 	if ( (uc & 0b11111100) == 0b11111000 )
 		return 5;
 	if ( (uc & 0b11111110) == 0b11111100 )
 		return 6;
 	return (size_t) -1;
 }
 // TODO: Use the shift state.
 extern "C"
 size_t mbrlen(const char* restrict s, size_t n, mbstate_t* restrict ps)
 {
 	size_t expected_length;
 	for ( size_t i = 0; i < n; i++ )
 	{
 		unsigned char uc = (unsigned char) s[i];
 		if ( i == 0 )
 		{
 			if ( !uc )
 			{
 				memset(ps, 0, sizeof(*ps));
 				return 0;
 			}
 			if ( (expected_length = utf8_header_length(uc)) == (size_t) -1 )
 				return errno = EILSEQ, (size_t) -1;
 			// Check if we encounted an unexpected character claiming to be in
 			// the middle of a UTF-8 multibyte sequence (10xxxxxx).
 			if ( expected_length == 0 )
 				// TODO: Should we play catch up with the partial sequence?
 				return errno = EILSEQ, (size_t) -1;
 		}
 		// All non-header bytes should be of the form 10xxxxxx.
 		if ( 0 < i && expected_length < n && (uc & 0b11000000) != 0b10000000 )
 			return errno = EILSEQ, (size_t) -1;
 		if ( i + 1 == expected_length )
 			return i + 1;
 	}
 	return (size_t) -2;
 }