From ffe59b9c7c695d19a865bacf46cc3e11081094df Mon Sep 17 00:00:00 2001 From: Jonas 'Sortie' Termansen Date: Sun, 27 Dec 2015 21:41:52 +0100 Subject: [PATCH] Fix mbrtowc decoding surrogates. --- libc/wchar/mbrtowc.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libc/wchar/mbrtowc.cpp b/libc/wchar/mbrtowc.cpp index ac663178..985d68e1 100644 --- a/libc/wchar/mbrtowc.cpp +++ b/libc/wchar/mbrtowc.cpp @@ -1,6 +1,6 @@ /******************************************************************************* - Copyright(C) Jonas 'Sortie' Termansen 2012, 2014. + Copyright(C) Jonas 'Sortie' Termansen 2012, 2014, 2015. This file is part of the Sortix C Library. @@ -114,6 +114,11 @@ size_t utf8_mbrtowc(wchar_t* restrict pwc, return errno = EILSEQ, (size_t) -1; #endif + // The definition of UTF-8 prohibits encoding character numbers between + // U+D800 and U+DFFF, which are reserved for use with the UTF-16 encoding + // form (as surrogate pairs) and do not directly represent characters. + if ( 0xD800 <= ps->wch && ps->wch <= 0xDFFF ) + return errno = EILSEQ, (size_t) -1; // RFC 3629 limits UTF-8 to 0x0 through 0x10FFFF. if ( 0x10FFFF <= ps->wch ) return errno = EILSEQ, (size_t) -1;