Optimize memcpy(3).
This commit is contained in:
parent
3116161f5d
commit
1b0ddadab3
127
libc/memcpy.cpp
127
libc/memcpy.cpp
|
@ -25,37 +25,106 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
static void* memcpy_aligned(unsigned long* dest,
|
#undef restrict
|
||||||
const unsigned long* src,
|
#define restrict __restrict__
|
||||||
size_t length)
|
|
||||||
{
|
|
||||||
size_t numcopies = length / sizeof(unsigned long);
|
|
||||||
for ( size_t i = 0; i < numcopies; i++ )
|
|
||||||
dest[i] = src[i];
|
|
||||||
return dest;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool IsWordAligned(uintptr_t addr)
|
#if 8 < __SIZEOF_LONG__
|
||||||
{
|
#error unsigned long is bigger than expected, please add support to this file.
|
||||||
const size_t WORDSIZE = sizeof(unsigned long);
|
#endif
|
||||||
return (addr / WORDSIZE * WORDSIZE) == addr;
|
|
||||||
}
|
|
||||||
|
|
||||||
extern "C" void* memcpy(void* destptr, const void* srcptr, size_t length)
|
inline static void* memcpy_slow(void* restrict dstptr,
|
||||||
|
const void* restrict srcptr, size_t length)
|
||||||
{
|
{
|
||||||
if ( IsWordAligned((uintptr_t) destptr) &&
|
uint8_t* restrict dst = (uint8_t* restrict) dstptr;
|
||||||
IsWordAligned((uintptr_t) srcptr) &&
|
const uint8_t* restrict src = (const uint8_t* restrict) srcptr;
|
||||||
IsWordAligned(length) )
|
|
||||||
{
|
|
||||||
unsigned long* dest = (unsigned long*) destptr;
|
|
||||||
const unsigned long* src = (const unsigned long*) srcptr;
|
|
||||||
return memcpy_aligned(dest, src, length);
|
|
||||||
}
|
|
||||||
uint8_t* dest = (uint8_t*) destptr;
|
|
||||||
const uint8_t* src = (const uint8_t*) srcptr;
|
|
||||||
for ( size_t i = 0; i < length; i += sizeof(uint8_t) )
|
for ( size_t i = 0; i < length; i += sizeof(uint8_t) )
|
||||||
{
|
dst[i] = src[i];
|
||||||
dest[i] = src[i];
|
return dstptr;
|
||||||
}
|
}
|
||||||
return dest;
|
|
||||||
|
extern "C" void* memcpy(void* restrict dstptr, const void* restrict srcptr,
|
||||||
|
size_t length)
|
||||||
|
{
|
||||||
|
const unsigned long unalignmask = sizeof(unsigned long) - 1;
|
||||||
|
const unsigned long srcunalign = (unsigned long) srcptr & unalignmask;
|
||||||
|
const unsigned long dstunalign = (unsigned long) dstptr & unalignmask;
|
||||||
|
if ( srcunalign != dstunalign )
|
||||||
|
return memcpy_slow(dstptr, srcptr, length);
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
unsigned long srcval;
|
||||||
|
const uint8_t* restrict src8;
|
||||||
|
const uint16_t* restrict src16;
|
||||||
|
const uint32_t* restrict src32;
|
||||||
|
const uint64_t* restrict src64;
|
||||||
|
const unsigned long* restrict srcul;
|
||||||
|
};
|
||||||
|
srcval = (unsigned long) srcptr;
|
||||||
|
|
||||||
|
union
|
||||||
|
{
|
||||||
|
unsigned long dstval;
|
||||||
|
uint8_t* restrict dst8;
|
||||||
|
uint16_t* restrict dst16;
|
||||||
|
uint32_t* restrict dst32;
|
||||||
|
uint64_t* restrict dst64;
|
||||||
|
unsigned long* restrict dstul;
|
||||||
|
};
|
||||||
|
dstval = (unsigned long) dstptr;
|
||||||
|
|
||||||
|
if ( dstunalign )
|
||||||
|
{
|
||||||
|
if ( 1 <= length && !(dstval & (1-1)) && (dstval & (2-1)) )
|
||||||
|
*dst8++ = *src8++,
|
||||||
|
length -= 1;
|
||||||
|
|
||||||
|
if ( 2 <= length && !(dstval & (2-1)) && (dstval & (4-1)) )
|
||||||
|
*dst16++ = *src16++,
|
||||||
|
length -= 2;
|
||||||
|
|
||||||
|
#if 8 <= __SIZEOF_LONG__
|
||||||
|
if ( 4 <= length && !(dstval & (4-1)) && (dstval & (8-1)) )
|
||||||
|
*dst32++ = *src32++,
|
||||||
|
length -= 4;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t numcopies = length / sizeof(unsigned long);
|
||||||
|
#if defined(__x86_64__) || defined(__i386__)
|
||||||
|
unsigned long zeroed_numcopies;
|
||||||
|
#if defined(__x86_64__)
|
||||||
|
asm volatile ("rep movsq" : "=c"(zeroed_numcopies), "=S"(srcul), "=D"(dstul)
|
||||||
|
: "c"(numcopies), "S"(srcul), "D"(dstul)
|
||||||
|
: "memory");
|
||||||
|
#elif defined(__i386__)
|
||||||
|
asm volatile ("rep movsd" : "=c"(zeroed_numcopies), "=S"(srcul), "=D"(dstul)
|
||||||
|
: "c"(numcopies), "S"(srcul), "D"(dstul)
|
||||||
|
: "memory");
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
for ( size_t i = 0; i < numcopies; i++ )
|
||||||
|
*dstul++ = *srcul++;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
length -= numcopies * sizeof(unsigned long);
|
||||||
|
|
||||||
|
if ( length )
|
||||||
|
{
|
||||||
|
#if 8 <= __SIZEOF_LONG__
|
||||||
|
if ( 4 <= length )
|
||||||
|
*dst32++ = *src32++,
|
||||||
|
length -= 4;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ( 2 <= length )
|
||||||
|
*dst16++ = *src16++,
|
||||||
|
length -= 2;
|
||||||
|
|
||||||
|
if ( 1 <= length )
|
||||||
|
*dst8++ = *src8++,
|
||||||
|
length -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return dstptr;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue