kern/misc: Implement faster grub_memcpy() for aligned buffers

When both "dest" and "src" are aligned, copying the data in grub_addr_t
sized chunks is more efficient than a byte-by-byte copy.

Also tweak __aeabi_memcpy(), __aeabi_memcpy4(), and __aeabi_memcpy8(),
since grub_memcpy() is not inline anymore.

Optimization for unaligned buffers was omitted to maintain code
simplicity and readability. The current chunk-copy optimization
for aligned buffers already provides a noticeable performance
improvement (*) for Argon2 keyslot decryption.

  (*) On my system, for a LUKS2 keyslot configured with a 1 GB Argon2
      memory requirement, this patch reduces the decryption time from
      22 seconds to 12 seconds.

Signed-off-by: Gary Lin <glin@suse.com>
Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
This commit is contained in:
Gary Lin 2025-10-15 11:00:28 +08:00 committed by Daniel Kiper
parent da01eb0c55
commit 28dbe8a3b6
4 changed files with 42 additions and 11 deletions

View File

@ -50,6 +50,12 @@ grub_memmove (void *dest, const void *src, grub_size_t n)
return dest;
}
void *
grub_memcpy (void *dest, const void *src, grub_size_t n)
{
return grub_memmove (dest, src, n);
}
int
grub_memcmp (const void *s1, const void *s2, grub_size_t n)
{

View File

@ -24,7 +24,7 @@
void * GRUB_BUILTIN_ATTR
memcpy (void *dest, const void *src, grub_size_t n)
{
return grub_memmove (dest, src, n);
return grub_memcpy (dest, src, n);
}
void * GRUB_BUILTIN_ATTR
memmove (void *dest, const void *src, grub_size_t n)
@ -372,11 +372,11 @@ grub_int32_t
__aeabi_idiv (grub_int32_t a, grub_int32_t b)
__attribute__ ((alias ("__divsi3")));
void *__aeabi_memcpy (void *dest, const void *src, grub_size_t n)
__attribute__ ((alias ("grub_memcpy")));
__attribute__ ((alias ("memcpy")));
void *__aeabi_memcpy4 (void *dest, const void *src, grub_size_t n)
__attribute__ ((alias ("grub_memcpy")));
__attribute__ ((alias ("memcpy")));
void *__aeabi_memcpy8 (void *dest, const void *src, grub_size_t n)
__attribute__ ((alias ("grub_memcpy")));
__attribute__ ((alias ("memcpy")));
void *__aeabi_memset (void *s, int c, grub_size_t n)
__attribute__ ((alias ("memset")));

View File

@ -99,6 +99,37 @@ grub_memmove (void *dest, const void *src, grub_size_t n)
return dest;
}
static void *
__memcpy_aligned (void *dest, const void *src, grub_size_t n)
{
grub_addr_t *dw = (grub_addr_t *) dest;
const grub_addr_t *sw = (const grub_addr_t *) src;
grub_uint8_t *d;
const grub_uint8_t *s;
for (; n >= sizeof (grub_addr_t); n -= sizeof (grub_addr_t))
*dw++ = *sw++;
d = (grub_uint8_t *) dw;
s = (const grub_uint8_t *) sw;
for (; n > 0; n--)
*d++ = *s++;
return dest;
}
void *
grub_memcpy (void *dest, const void *src, grub_size_t n)
{
/* Check if dest and src are aligned and n >= sizeof(grub_addr_t). */
if (((grub_addr_t) dest & (sizeof (grub_addr_t) - 1)) == 0 &&
((grub_addr_t) src & (sizeof (grub_addr_t) - 1)) == 0 &&
n >= sizeof (grub_addr_t))
return __memcpy_aligned (dest, src, n);
return grub_memmove (dest, src, n);
}
char *
grub_strcpy (char *dest, const char *src)
{

View File

@ -38,6 +38,7 @@
#define grub_dprintf(condition, ...) grub_real_dprintf(GRUB_FILE, __FUNCTION__, __LINE__, condition, __VA_ARGS__)
void *EXPORT_FUNC(grub_memmove) (void *dest, const void *src, grub_size_t n);
void *EXPORT_FUNC(grub_memcpy) (void *dest, const void *src, grub_size_t n);
char *EXPORT_FUNC(grub_strcpy) (char *dest, const char *src);
static inline char *
@ -103,13 +104,6 @@ grub_strlcpy (char *dest, const char *src, grub_size_t size)
return res;
}
/* XXX: If grub_memmove is too slow, we must implement grub_memcpy. */
static inline void *
grub_memcpy (void *dest, const void *src, grub_size_t n)
{
return grub_memmove (dest, src, n);
}
#if defined(__x86_64__) && !defined (GRUB_UTIL)
#if defined (__MINGW32__) || defined (__CYGWIN__) || defined (__MINGW64__)
#define GRUB_ASM_ATTR __attribute__ ((sysv_abi))