We currently use an old version of libgcrypt which results in us having fewer ciphers and missing on many other improvements. Signed-off-by: Vladimir Serbinenko <phcoder@gmail.com> Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
2545 lines
64 KiB
C
2545 lines
64 KiB
C
/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
|
|
* Copyright (C) 2019 Shawn Landden <shawn@git.icu>
|
|
* Copyright (C) 2019-2020, 2022-2023 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
*
|
|
* This file is part of Libgcrypt.
|
|
*
|
|
* Libgcrypt is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as
|
|
* published by the Free Software Foundation; either version 2.1 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* Libgcrypt is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
|
|
* and Cryptogams by Andy Polyakov, and if made part of a release of either
|
|
* or both projects, is thereafter dual-licensed under the license said project
|
|
* is released under.
|
|
*/
|
|
|
|
unsigned int PPC_OPT_ATTR
|
|
ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
|
|
const unsigned char *in)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block b;
|
|
|
|
b = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
AES_ENCRYPT (b, rounds);
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
return 0; /* does not use stack */
|
|
}
|
|
|
|
|
|
unsigned int PPC_OPT_ATTR
|
|
DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx, unsigned char *out,
|
|
const unsigned char *in)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
const u128_t *rk = (u128_t *)&ctx->keyschdec;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block b;
|
|
|
|
b = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
AES_DECRYPT (b, rounds);
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
return 0; /* does not use stack */
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
CFB_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES_ALL;
|
|
block key0_xor_keylast;
|
|
block iv, outiv;
|
|
|
|
iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
|
|
outiv = iv;
|
|
|
|
PRELOAD_ROUND_KEYS_ALL (rounds);
|
|
key0_xor_keylast = rkey0 ^ rkeylast;
|
|
iv ^= rkey0;
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
rkeylast = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const);
|
|
|
|
iv = asm_cipher_be (iv, rkey1);
|
|
iv = asm_cipher_be (iv, rkey2);
|
|
iv = asm_cipher_be (iv, rkey3);
|
|
iv = asm_cipher_be (iv, rkey4);
|
|
iv = asm_cipher_be (iv, rkey5);
|
|
iv = asm_cipher_be (iv, rkey6);
|
|
iv = asm_cipher_be (iv, rkey7);
|
|
iv = asm_cipher_be (iv, rkey8);
|
|
iv = asm_cipher_be (iv, rkey9);
|
|
if (rounds >= 12)
|
|
{
|
|
iv = asm_cipher_be (iv, rkey10);
|
|
iv = asm_cipher_be (iv, rkey11);
|
|
if (rounds > 12)
|
|
{
|
|
iv = asm_cipher_be (iv, rkey12);
|
|
iv = asm_cipher_be (iv, rkey13);
|
|
}
|
|
}
|
|
iv = asm_cipherlast_be (iv, rkeylast);
|
|
|
|
outiv = rkey0 ^ iv;
|
|
VEC_STORE_BE (out++, 0, outiv, bige_const);
|
|
}
|
|
|
|
VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg,
|
|
size_t nblocks, int encrypt)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = encrypt ? (u128_t *)&ctx->keyschenc
|
|
: (u128_t *)&ctx->keyschdec;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7;
|
|
block rkey;
|
|
|
|
if (!encrypt && !ctx->decryption_prepared)
|
|
{
|
|
internal_aes_ppc_prepare_decryption (ctx);
|
|
ctx->decryption_prepared = 1;
|
|
}
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
b0 = asm_xor (rkey0, b0);
|
|
b1 = asm_xor (rkey0, b1);
|
|
b4 = VEC_LOAD_BE (in, 4, bige_const);
|
|
b5 = VEC_LOAD_BE (in, 5, bige_const);
|
|
b2 = asm_xor (rkey0, b2);
|
|
b3 = asm_xor (rkey0, b3);
|
|
b6 = VEC_LOAD_BE (in, 6, bige_const);
|
|
b7 = VEC_LOAD_BE (in, 7, bige_const);
|
|
in += 8;
|
|
b4 = asm_xor (rkey0, b4);
|
|
b5 = asm_xor (rkey0, b5);
|
|
b6 = asm_xor (rkey0, b6);
|
|
b7 = asm_xor (rkey0, b7);
|
|
|
|
if (encrypt)
|
|
{
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_cipherlast_be (b0, rkeylast);
|
|
b1 = asm_cipherlast_be (b1, rkeylast);
|
|
b2 = asm_cipherlast_be (b2, rkeylast);
|
|
b3 = asm_cipherlast_be (b3, rkeylast);
|
|
b4 = asm_cipherlast_be (b4, rkeylast);
|
|
b5 = asm_cipherlast_be (b5, rkeylast);
|
|
b6 = asm_cipherlast_be (b6, rkeylast);
|
|
b7 = asm_cipherlast_be (b7, rkeylast);
|
|
}
|
|
else
|
|
{
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey); \
|
|
b4 = asm_ncipher_be (b4, rkey); \
|
|
b5 = asm_ncipher_be (b5, rkey); \
|
|
b6 = asm_ncipher_be (b6, rkey); \
|
|
b7 = asm_ncipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_ncipherlast_be (b0, rkeylast);
|
|
b1 = asm_ncipherlast_be (b1, rkeylast);
|
|
b2 = asm_ncipherlast_be (b2, rkeylast);
|
|
b3 = asm_ncipherlast_be (b3, rkeylast);
|
|
b4 = asm_ncipherlast_be (b4, rkeylast);
|
|
b5 = asm_ncipherlast_be (b5, rkeylast);
|
|
b6 = asm_ncipherlast_be (b6, rkeylast);
|
|
b7 = asm_ncipherlast_be (b7, rkeylast);
|
|
}
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
VEC_STORE_BE (out, 4, b4, bige_const);
|
|
VEC_STORE_BE (out, 5, b5, bige_const);
|
|
VEC_STORE_BE (out, 6, b6, bige_const);
|
|
VEC_STORE_BE (out, 7, b7, bige_const);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
b0 = asm_xor (rkey0, b0);
|
|
b1 = asm_xor (rkey0, b1);
|
|
b2 = asm_xor (rkey0, b2);
|
|
b3 = asm_xor (rkey0, b3);
|
|
|
|
if (encrypt)
|
|
{
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_cipherlast_be (b0, rkeylast);
|
|
b1 = asm_cipherlast_be (b1, rkeylast);
|
|
b2 = asm_cipherlast_be (b2, rkeylast);
|
|
b3 = asm_cipherlast_be (b3, rkeylast);
|
|
}
|
|
else
|
|
{
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_ncipherlast_be (b0, rkeylast);
|
|
b1 = asm_ncipherlast_be (b1, rkeylast);
|
|
b2 = asm_ncipherlast_be (b2, rkeylast);
|
|
b3 = asm_ncipherlast_be (b3, rkeylast);
|
|
}
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
if (encrypt)
|
|
{
|
|
AES_ENCRYPT (b0, rounds);
|
|
}
|
|
else
|
|
{
|
|
AES_DECRYPT (b0, rounds);
|
|
}
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
|
|
out++;
|
|
in++;
|
|
}
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block rkeylast_orig;
|
|
block iv, b, bin;
|
|
block in0, in1, in2, in3, in4, in5, in6, in7;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7;
|
|
block rkey;
|
|
|
|
iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
rkeylast_orig = rkeylast;
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
in0 = iv;
|
|
in1 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
in2 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
in3 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
in4 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
in1 = VEC_BE_SWAP (in1, bige_const);
|
|
in2 = VEC_BE_SWAP (in2, bige_const);
|
|
in5 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
in6 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
in3 = VEC_BE_SWAP (in3, bige_const);
|
|
in4 = VEC_BE_SWAP (in4, bige_const);
|
|
in7 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
iv = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
in5 = VEC_BE_SWAP (in5, bige_const);
|
|
in6 = VEC_BE_SWAP (in6, bige_const);
|
|
b0 = asm_xor (rkey0, in0);
|
|
b1 = asm_xor (rkey0, in1);
|
|
in7 = VEC_BE_SWAP (in7, bige_const);
|
|
iv = VEC_BE_SWAP (iv, bige_const);
|
|
b2 = asm_xor (rkey0, in2);
|
|
b3 = asm_xor (rkey0, in3);
|
|
b4 = asm_xor (rkey0, in4);
|
|
b5 = asm_xor (rkey0, in5);
|
|
b6 = asm_xor (rkey0, in6);
|
|
b7 = asm_xor (rkey0, in7);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
in4 = asm_xor (rkeylast, in4);
|
|
b0 = asm_cipherlast_be (b0, in1);
|
|
b1 = asm_cipherlast_be (b1, in2);
|
|
in5 = asm_xor (rkeylast, in5);
|
|
in6 = asm_xor (rkeylast, in6);
|
|
b2 = asm_cipherlast_be (b2, in3);
|
|
b3 = asm_cipherlast_be (b3, in4);
|
|
in7 = asm_xor (rkeylast, in7);
|
|
in0 = asm_xor (rkeylast, iv);
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b4 = asm_cipherlast_be (b4, in5);
|
|
b5 = asm_cipherlast_be (b5, in6);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b6 = asm_cipherlast_be (b6, in7);
|
|
b7 = asm_cipherlast_be (b7, in0);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
in0 = iv;
|
|
in1 = VEC_LOAD_BE (in, 0, bige_const);
|
|
in2 = VEC_LOAD_BE (in, 1, bige_const);
|
|
in3 = VEC_LOAD_BE (in, 2, bige_const);
|
|
iv = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
b0 = asm_xor (rkey0, in0);
|
|
b1 = asm_xor (rkey0, in1);
|
|
b2 = asm_xor (rkey0, in2);
|
|
b3 = asm_xor (rkey0, in3);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
in0 = asm_xor (rkeylast, iv);
|
|
b0 = asm_cipherlast_be (b0, in1);
|
|
b1 = asm_cipherlast_be (b1, in2);
|
|
b2 = asm_cipherlast_be (b2, in3);
|
|
b3 = asm_cipherlast_be (b3, in0);
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
bin = VEC_LOAD_BE (in, 0, bige_const);
|
|
rkeylast = rkeylast_orig ^ bin;
|
|
b = iv;
|
|
iv = bin;
|
|
|
|
AES_ENCRYPT (b, rounds);
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
out++;
|
|
in++;
|
|
}
|
|
|
|
VEC_STORE_BE (iv_arg, 0, iv, bige_const);
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
CBC_ENC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks, int cbc_mac)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
byte *out = (byte *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES_ALL;
|
|
block iv, key0_xor_keylast, nextiv, outiv;
|
|
unsigned int outadd = -(!cbc_mac) & 16;
|
|
|
|
if (nblocks == 0) /* CMAC may call with nblocks 0. */
|
|
return;
|
|
|
|
iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS_ALL (rounds);
|
|
key0_xor_keylast = rkey0 ^ rkeylast;
|
|
|
|
nextiv = VEC_LOAD_BE (in++, 0, bige_const);
|
|
iv ^= rkey0 ^ nextiv;
|
|
|
|
do
|
|
{
|
|
if (--nblocks)
|
|
{
|
|
nextiv = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const);
|
|
}
|
|
|
|
iv = asm_cipher_be (iv, rkey1);
|
|
iv = asm_cipher_be (iv, rkey2);
|
|
iv = asm_cipher_be (iv, rkey3);
|
|
iv = asm_cipher_be (iv, rkey4);
|
|
iv = asm_cipher_be (iv, rkey5);
|
|
iv = asm_cipher_be (iv, rkey6);
|
|
iv = asm_cipher_be (iv, rkey7);
|
|
iv = asm_cipher_be (iv, rkey8);
|
|
iv = asm_cipher_be (iv, rkey9);
|
|
if (rounds >= 12)
|
|
{
|
|
iv = asm_cipher_be (iv, rkey10);
|
|
iv = asm_cipher_be (iv, rkey11);
|
|
if (rounds > 12)
|
|
{
|
|
iv = asm_cipher_be (iv, rkey12);
|
|
iv = asm_cipher_be (iv, rkey13);
|
|
}
|
|
}
|
|
outiv = iv;
|
|
/* Proper order for following instructions is important for best
|
|
* performance on POWER8: the output path vcipherlast needs to be
|
|
* last one. */
|
|
__asm__ volatile ("vcipherlast %0, %0, %2\n\t"
|
|
"vcipherlast %1, %1, %3\n\t"
|
|
: "+v" (iv), "+v" (outiv)
|
|
: "v" (nextiv), "v" (rkeylast));
|
|
|
|
VEC_STORE_BE ((u128_t *)out, 0, outiv, bige_const);
|
|
out += outadd;
|
|
}
|
|
while (nblocks);
|
|
|
|
VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
CBC_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschdec;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block rkeylast_orig;
|
|
block in0, in1, in2, in3, in4, in5, in6, in7;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7;
|
|
block rkey;
|
|
block iv, b;
|
|
|
|
if (!ctx->decryption_prepared)
|
|
{
|
|
internal_aes_ppc_prepare_decryption (ctx);
|
|
ctx->decryption_prepared = 1;
|
|
}
|
|
|
|
iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
rkeylast_orig = rkeylast;
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
in0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
in1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
in2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
in3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
in0 = VEC_BE_SWAP (in0, bige_const);
|
|
in1 = VEC_BE_SWAP (in1, bige_const);
|
|
in4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
in5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
in2 = VEC_BE_SWAP (in2, bige_const);
|
|
in3 = VEC_BE_SWAP (in3, bige_const);
|
|
in6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
in7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
b0 = asm_xor (rkey0, in0);
|
|
b1 = asm_xor (rkey0, in1);
|
|
in4 = VEC_BE_SWAP (in4, bige_const);
|
|
in5 = VEC_BE_SWAP (in5, bige_const);
|
|
b2 = asm_xor (rkey0, in2);
|
|
b3 = asm_xor (rkey0, in3);
|
|
in6 = VEC_BE_SWAP (in6, bige_const);
|
|
in7 = VEC_BE_SWAP (in7, bige_const);
|
|
b4 = asm_xor (rkey0, in4);
|
|
b5 = asm_xor (rkey0, in5);
|
|
b6 = asm_xor (rkey0, in6);
|
|
b7 = asm_xor (rkey0, in7);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey); \
|
|
b4 = asm_ncipher_be (b4, rkey); \
|
|
b5 = asm_ncipher_be (b5, rkey); \
|
|
b6 = asm_ncipher_be (b6, rkey); \
|
|
b7 = asm_ncipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
iv = asm_xor (rkeylast, iv);
|
|
in0 = asm_xor (rkeylast, in0);
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
b0 = asm_ncipherlast_be (b0, iv);
|
|
iv = in7;
|
|
b1 = asm_ncipherlast_be (b1, in0);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
in4 = asm_xor (rkeylast, in4);
|
|
b2 = asm_ncipherlast_be (b2, in1);
|
|
b3 = asm_ncipherlast_be (b3, in2);
|
|
in5 = asm_xor (rkeylast, in5);
|
|
in6 = asm_xor (rkeylast, in6);
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b4 = asm_ncipherlast_be (b4, in3);
|
|
b5 = asm_ncipherlast_be (b5, in4);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b6 = asm_ncipherlast_be (b6, in5);
|
|
b7 = asm_ncipherlast_be (b7, in6);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
in0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
in1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
in2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
in3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
b0 = asm_xor (rkey0, in0);
|
|
b1 = asm_xor (rkey0, in1);
|
|
b2 = asm_xor (rkey0, in2);
|
|
b3 = asm_xor (rkey0, in3);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
iv = asm_xor (rkeylast, iv);
|
|
in0 = asm_xor (rkeylast, in0);
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
|
|
b0 = asm_ncipherlast_be (b0, iv);
|
|
iv = in3;
|
|
b1 = asm_ncipherlast_be (b1, in0);
|
|
b2 = asm_ncipherlast_be (b2, in1);
|
|
b3 = asm_ncipherlast_be (b3, in2);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
rkeylast = rkeylast_orig ^ iv;
|
|
|
|
iv = VEC_LOAD_BE (in, 0, bige_const);
|
|
b = iv;
|
|
AES_DECRYPT (b, rounds);
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in++;
|
|
out++;
|
|
}
|
|
|
|
VEC_STORE_BE (iv_arg, 0, iv, bige_const);
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
CTR_ENC_FUNC (void *context, unsigned char *ctr_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks)
|
|
{
|
|
static const unsigned char vec_one_const[16] =
|
|
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block rkeylast_orig;
|
|
block ctr, b, one;
|
|
|
|
ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const);
|
|
one = VEC_LOAD_BE (&vec_one_const, 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
rkeylast_orig = rkeylast;
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
block in0, in1, in2, in3, in4, in5, in6, in7;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7;
|
|
block two, three, four;
|
|
block rkey;
|
|
|
|
two = asm_add_uint128 (one, one);
|
|
three = asm_add_uint128 (two, one);
|
|
four = asm_add_uint128 (two, two);
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b1 = asm_add_uint128 (ctr, one);
|
|
b2 = asm_add_uint128 (ctr, two);
|
|
b3 = asm_add_uint128 (ctr, three);
|
|
b4 = asm_add_uint128 (ctr, four);
|
|
b5 = asm_add_uint128 (b1, four);
|
|
b6 = asm_add_uint128 (b2, four);
|
|
b7 = asm_add_uint128 (b3, four);
|
|
b0 = asm_xor (rkey0, ctr);
|
|
rkey = ALIGNED_LOAD (rk, 1);
|
|
ctr = asm_add_uint128 (b4, four);
|
|
b1 = asm_xor (rkey0, b1);
|
|
b2 = asm_xor (rkey0, b2);
|
|
b3 = asm_xor (rkey0, b3);
|
|
b0 = asm_cipher_be (b0, rkey);
|
|
b1 = asm_cipher_be (b1, rkey);
|
|
b2 = asm_cipher_be (b2, rkey);
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
b4 = asm_xor (rkey0, b4);
|
|
b5 = asm_xor (rkey0, b5);
|
|
b6 = asm_xor (rkey0, b6);
|
|
b7 = asm_xor (rkey0, b7);
|
|
b4 = asm_cipher_be (b4, rkey);
|
|
b5 = asm_cipher_be (b5, rkey);
|
|
b6 = asm_cipher_be (b6, rkey);
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
in0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
DO_ROUND(2);
|
|
in1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
DO_ROUND(3);
|
|
in2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
DO_ROUND(4);
|
|
in3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
DO_ROUND(5);
|
|
in4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
DO_ROUND(6);
|
|
in5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
DO_ROUND(7);
|
|
in6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
DO_ROUND(8);
|
|
in7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
DO_ROUND(9);
|
|
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
in0 = VEC_BE_SWAP (in0, bige_const);
|
|
in1 = VEC_BE_SWAP (in1, bige_const);
|
|
in2 = VEC_BE_SWAP (in2, bige_const);
|
|
in3 = VEC_BE_SWAP (in3, bige_const);
|
|
in4 = VEC_BE_SWAP (in4, bige_const);
|
|
in5 = VEC_BE_SWAP (in5, bige_const);
|
|
in6 = VEC_BE_SWAP (in6, bige_const);
|
|
in7 = VEC_BE_SWAP (in7, bige_const);
|
|
|
|
in0 = asm_xor (rkeylast, in0);
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
b0 = asm_cipherlast_be (b0, in0);
|
|
b1 = asm_cipherlast_be (b1, in1);
|
|
in4 = asm_xor (rkeylast, in4);
|
|
in5 = asm_xor (rkeylast, in5);
|
|
b2 = asm_cipherlast_be (b2, in2);
|
|
b3 = asm_cipherlast_be (b3, in3);
|
|
in6 = asm_xor (rkeylast, in6);
|
|
in7 = asm_xor (rkeylast, in7);
|
|
b4 = asm_cipherlast_be (b4, in4);
|
|
b5 = asm_cipherlast_be (b5, in5);
|
|
b6 = asm_cipherlast_be (b6, in6);
|
|
b7 = asm_cipherlast_be (b7, in7);
|
|
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
b1 = asm_add_uint128 (ctr, one);
|
|
b2 = asm_add_uint128 (ctr, two);
|
|
b3 = asm_add_uint128 (ctr, three);
|
|
b0 = asm_xor (rkey0, ctr);
|
|
ctr = asm_add_uint128 (ctr, four);
|
|
b1 = asm_xor (rkey0, b1);
|
|
b2 = asm_xor (rkey0, b2);
|
|
b3 = asm_xor (rkey0, b3);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
|
|
in0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
in1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
in2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
in3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
in0 = asm_xor (rkeylast, in0);
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
|
|
b0 = asm_cipherlast_be (b0, in0);
|
|
b1 = asm_cipherlast_be (b1, in1);
|
|
b2 = asm_cipherlast_be (b2, in2);
|
|
b3 = asm_cipherlast_be (b3, in3);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
b = ctr;
|
|
ctr = asm_add_uint128 (ctr, one);
|
|
rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
AES_ENCRYPT (b, rounds);
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
out++;
|
|
in++;
|
|
}
|
|
|
|
VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
|
|
}
|
|
|
|
|
|
size_t PPC_OPT_ATTR
|
|
OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg, const void *inbuf_arg,
|
|
size_t nblocks, int encrypt)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = (void *)&c->context.c;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
u64 data_nblocks = c->u_mode.ocb.data_nblocks;
|
|
block l0, l1, l2, l;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7, b;
|
|
block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
|
|
block rkey, rkeylf;
|
|
block ctr, iv;
|
|
ROUND_KEY_VARIABLES;
|
|
|
|
iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const);
|
|
ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const);
|
|
|
|
l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
|
|
l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
|
|
l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
|
|
|
|
if (encrypt)
|
|
{
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
|
|
{
|
|
l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
|
|
b = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
|
iv ^= l;
|
|
/* Checksum_i = Checksum_{i-1} xor P_i */
|
|
ctr ^= b;
|
|
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
|
|
b ^= iv;
|
|
AES_ENCRYPT (b, rounds);
|
|
b ^= iv;
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in += 1;
|
|
out += 1;
|
|
}
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
b1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
b2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
b3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
b4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
b5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
b6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
b7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
|
|
b0 = VEC_BE_SWAP(b0, bige_const);
|
|
b1 = VEC_BE_SWAP(b1, bige_const);
|
|
b2 = VEC_BE_SWAP(b2, bige_const);
|
|
b3 = VEC_BE_SWAP(b3, bige_const);
|
|
b4 = VEC_BE_SWAP(b4, bige_const);
|
|
b5 = VEC_BE_SWAP(b5, bige_const);
|
|
b6 = VEC_BE_SWAP(b6, bige_const);
|
|
b7 = VEC_BE_SWAP(b7, bige_const);
|
|
l = VEC_BE_SWAP(l, bige_const);
|
|
|
|
ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
|
|
|
|
iv ^= rkey0;
|
|
|
|
iv0 = iv ^ l0;
|
|
iv1 = iv ^ l0 ^ l1;
|
|
iv2 = iv ^ l1;
|
|
iv3 = iv ^ l1 ^ l2;
|
|
iv4 = iv ^ l1 ^ l2 ^ l0;
|
|
iv5 = iv ^ l2 ^ l0;
|
|
iv6 = iv ^ l2;
|
|
iv7 = iv ^ l2 ^ l;
|
|
|
|
b0 ^= iv0;
|
|
b1 ^= iv1;
|
|
b2 ^= iv2;
|
|
b3 ^= iv3;
|
|
b4 ^= iv4;
|
|
b5 ^= iv5;
|
|
b6 ^= iv6;
|
|
b7 ^= iv7;
|
|
iv = iv7 ^ rkey0;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
|
|
rkeylf = asm_xor (rkeylast, rkey0);
|
|
|
|
DO_ROUND(8);
|
|
|
|
iv0 = asm_xor (rkeylf, iv0);
|
|
iv1 = asm_xor (rkeylf, iv1);
|
|
iv2 = asm_xor (rkeylf, iv2);
|
|
iv3 = asm_xor (rkeylf, iv3);
|
|
iv4 = asm_xor (rkeylf, iv4);
|
|
iv5 = asm_xor (rkeylf, iv5);
|
|
iv6 = asm_xor (rkeylf, iv6);
|
|
iv7 = asm_xor (rkeylf, iv7);
|
|
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_cipherlast_be (b0, iv0);
|
|
b1 = asm_cipherlast_be (b1, iv1);
|
|
b2 = asm_cipherlast_be (b2, iv2);
|
|
b3 = asm_cipherlast_be (b3, iv3);
|
|
b4 = asm_cipherlast_be (b4, iv4);
|
|
b5 = asm_cipherlast_be (b5, iv5);
|
|
b6 = asm_cipherlast_be (b6, iv6);
|
|
b7 = asm_cipherlast_be (b7, iv7);
|
|
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4 && (data_nblocks % 4) == 0)
|
|
{
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
|
|
|
|
ctr ^= b0 ^ b1 ^ b2 ^ b3;
|
|
|
|
iv ^= rkey0;
|
|
|
|
iv0 = iv ^ l0;
|
|
iv1 = iv ^ l0 ^ l1;
|
|
iv2 = iv ^ l1;
|
|
iv3 = iv ^ l1 ^ l;
|
|
|
|
b0 ^= iv0;
|
|
b1 ^= iv1;
|
|
b2 ^= iv2;
|
|
b3 ^= iv3;
|
|
iv = iv3 ^ rkey0;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
rkey = rkeylast ^ rkey0;
|
|
b0 = asm_cipherlast_be (b0, rkey ^ iv0);
|
|
b1 = asm_cipherlast_be (b1, rkey ^ iv1);
|
|
b2 = asm_cipherlast_be (b2, rkey ^ iv2);
|
|
b3 = asm_cipherlast_be (b3, rkey ^ iv3);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
|
|
b = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
|
iv ^= l;
|
|
/* Checksum_i = Checksum_{i-1} xor P_i */
|
|
ctr ^= b;
|
|
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
|
|
b ^= iv;
|
|
AES_ENCRYPT (b, rounds);
|
|
b ^= iv;
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in += 1;
|
|
out += 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const u128_t *rk = (u128_t *)&ctx->keyschdec;
|
|
|
|
if (!ctx->decryption_prepared)
|
|
{
|
|
internal_aes_ppc_prepare_decryption (ctx);
|
|
ctx->decryption_prepared = 1;
|
|
}
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
|
|
{
|
|
l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
|
|
b = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
|
iv ^= l;
|
|
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
|
|
b ^= iv;
|
|
AES_DECRYPT (b, rounds);
|
|
b ^= iv;
|
|
/* Checksum_i = Checksum_{i-1} xor P_i */
|
|
ctr ^= b;
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in += 1;
|
|
out += 1;
|
|
}
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
b1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
b2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
b3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
b4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
b5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
b6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
b7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
|
|
b0 = VEC_BE_SWAP(b0, bige_const);
|
|
b1 = VEC_BE_SWAP(b1, bige_const);
|
|
b2 = VEC_BE_SWAP(b2, bige_const);
|
|
b3 = VEC_BE_SWAP(b3, bige_const);
|
|
b4 = VEC_BE_SWAP(b4, bige_const);
|
|
b5 = VEC_BE_SWAP(b5, bige_const);
|
|
b6 = VEC_BE_SWAP(b6, bige_const);
|
|
b7 = VEC_BE_SWAP(b7, bige_const);
|
|
l = VEC_BE_SWAP(l, bige_const);
|
|
|
|
iv ^= rkey0;
|
|
|
|
iv0 = iv ^ l0;
|
|
iv1 = iv ^ l0 ^ l1;
|
|
iv2 = iv ^ l1;
|
|
iv3 = iv ^ l1 ^ l2;
|
|
iv4 = iv ^ l1 ^ l2 ^ l0;
|
|
iv5 = iv ^ l2 ^ l0;
|
|
iv6 = iv ^ l2;
|
|
iv7 = iv ^ l2 ^ l;
|
|
|
|
b0 ^= iv0;
|
|
b1 ^= iv1;
|
|
b2 ^= iv2;
|
|
b3 ^= iv3;
|
|
b4 ^= iv4;
|
|
b5 ^= iv5;
|
|
b6 ^= iv6;
|
|
b7 ^= iv7;
|
|
iv = iv7 ^ rkey0;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey); \
|
|
b4 = asm_ncipher_be (b4, rkey); \
|
|
b5 = asm_ncipher_be (b5, rkey); \
|
|
b6 = asm_ncipher_be (b6, rkey); \
|
|
b7 = asm_ncipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
|
|
rkeylf = asm_xor (rkeylast, rkey0);
|
|
|
|
DO_ROUND(8);
|
|
|
|
iv0 = asm_xor (rkeylf, iv0);
|
|
iv1 = asm_xor (rkeylf, iv1);
|
|
iv2 = asm_xor (rkeylf, iv2);
|
|
iv3 = asm_xor (rkeylf, iv3);
|
|
iv4 = asm_xor (rkeylf, iv4);
|
|
iv5 = asm_xor (rkeylf, iv5);
|
|
iv6 = asm_xor (rkeylf, iv6);
|
|
iv7 = asm_xor (rkeylf, iv7);
|
|
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_ncipherlast_be (b0, iv0);
|
|
b1 = asm_ncipherlast_be (b1, iv1);
|
|
b2 = asm_ncipherlast_be (b2, iv2);
|
|
b3 = asm_ncipherlast_be (b3, iv3);
|
|
b4 = asm_ncipherlast_be (b4, iv4);
|
|
b5 = asm_ncipherlast_be (b5, iv5);
|
|
b6 = asm_ncipherlast_be (b6, iv6);
|
|
b7 = asm_ncipherlast_be (b7, iv7);
|
|
|
|
ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
|
|
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4 && (data_nblocks % 4) == 0)
|
|
{
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
|
|
|
|
iv ^= rkey0;
|
|
|
|
iv0 = iv ^ l0;
|
|
iv1 = iv ^ l0 ^ l1;
|
|
iv2 = iv ^ l1;
|
|
iv3 = iv ^ l1 ^ l;
|
|
|
|
b0 ^= iv0;
|
|
b1 ^= iv1;
|
|
b2 ^= iv2;
|
|
b3 ^= iv3;
|
|
iv = iv3 ^ rkey0;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
rkey = rkeylast ^ rkey0;
|
|
b0 = asm_ncipherlast_be (b0, rkey ^ iv0);
|
|
b1 = asm_ncipherlast_be (b1, rkey ^ iv1);
|
|
b2 = asm_ncipherlast_be (b2, rkey ^ iv2);
|
|
b3 = asm_ncipherlast_be (b3, rkey ^ iv3);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
ctr ^= b0 ^ b1 ^ b2 ^ b3;
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
|
|
b = VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
|
iv ^= l;
|
|
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
|
|
b ^= iv;
|
|
AES_DECRYPT (b, rounds);
|
|
b ^= iv;
|
|
/* Checksum_i = Checksum_{i-1} xor P_i */
|
|
ctr ^= b;
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in += 1;
|
|
out += 1;
|
|
}
|
|
}
|
|
|
|
VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const);
|
|
VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
|
|
c->u_mode.ocb.data_nblocks = data_nblocks;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
size_t PPC_OPT_ATTR
|
|
OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
|
|
{
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = (void *)&c->context.c;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
const u128_t *abuf = (const u128_t *)abuf_arg;
|
|
int rounds = ctx->rounds;
|
|
u64 data_nblocks = c->u_mode.ocb.aad_nblocks;
|
|
block l0, l1, l2, l;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7, b;
|
|
block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
|
|
block rkey, frkey;
|
|
block ctr, iv;
|
|
ROUND_KEY_VARIABLES;
|
|
|
|
iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const);
|
|
ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const);
|
|
|
|
l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
|
|
l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
|
|
l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
|
|
{
|
|
l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
|
|
b = VEC_LOAD_BE (abuf, 0, bige_const);
|
|
|
|
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
|
iv ^= l;
|
|
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
|
|
b ^= iv;
|
|
AES_ENCRYPT (b, rounds);
|
|
ctr ^= b;
|
|
|
|
abuf += 1;
|
|
}
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b0 = VEC_LOAD_BE (abuf, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (abuf, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (abuf, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (abuf, 3, bige_const);
|
|
b4 = VEC_LOAD_BE (abuf, 4, bige_const);
|
|
b5 = VEC_LOAD_BE (abuf, 5, bige_const);
|
|
b6 = VEC_LOAD_BE (abuf, 6, bige_const);
|
|
b7 = VEC_LOAD_BE (abuf, 7, bige_const);
|
|
|
|
l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const);
|
|
|
|
frkey = rkey0;
|
|
iv ^= frkey;
|
|
|
|
iv0 = iv ^ l0;
|
|
iv1 = iv ^ l0 ^ l1;
|
|
iv2 = iv ^ l1;
|
|
iv3 = iv ^ l1 ^ l2;
|
|
iv4 = iv ^ l1 ^ l2 ^ l0;
|
|
iv5 = iv ^ l2 ^ l0;
|
|
iv6 = iv ^ l2;
|
|
iv7 = iv ^ l2 ^ l;
|
|
|
|
b0 ^= iv0;
|
|
b1 ^= iv1;
|
|
b2 ^= iv2;
|
|
b3 ^= iv3;
|
|
b4 ^= iv4;
|
|
b5 ^= iv5;
|
|
b6 ^= iv6;
|
|
b7 ^= iv7;
|
|
iv = iv7 ^ frkey;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
rkey = rkeylast;
|
|
b0 = asm_cipherlast_be (b0, rkey);
|
|
b1 = asm_cipherlast_be (b1, rkey);
|
|
b2 = asm_cipherlast_be (b2, rkey);
|
|
b3 = asm_cipherlast_be (b3, rkey);
|
|
b4 = asm_cipherlast_be (b4, rkey);
|
|
b5 = asm_cipherlast_be (b5, rkey);
|
|
b6 = asm_cipherlast_be (b6, rkey);
|
|
b7 = asm_cipherlast_be (b7, rkey);
|
|
|
|
ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
|
|
|
|
abuf += 8;
|
|
}
|
|
|
|
if (nblocks >= 4 && (data_nblocks % 4) == 0)
|
|
{
|
|
b0 = VEC_LOAD_BE (abuf, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (abuf, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (abuf, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (abuf, 3, bige_const);
|
|
|
|
l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
|
|
|
|
frkey = rkey0;
|
|
iv ^= frkey;
|
|
|
|
iv0 = iv ^ l0;
|
|
iv1 = iv ^ l0 ^ l1;
|
|
iv2 = iv ^ l1;
|
|
iv3 = iv ^ l1 ^ l;
|
|
|
|
b0 ^= iv0;
|
|
b1 ^= iv1;
|
|
b2 ^= iv2;
|
|
b3 ^= iv3;
|
|
iv = iv3 ^ frkey;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
rkey = rkeylast;
|
|
b0 = asm_cipherlast_be (b0, rkey);
|
|
b1 = asm_cipherlast_be (b1, rkey);
|
|
b2 = asm_cipherlast_be (b2, rkey);
|
|
b3 = asm_cipherlast_be (b3, rkey);
|
|
|
|
ctr ^= b0 ^ b1 ^ b2 ^ b3;
|
|
|
|
abuf += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
|
|
b = VEC_LOAD_BE (abuf, 0, bige_const);
|
|
|
|
/* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
|
|
iv ^= l;
|
|
/* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */
|
|
b ^= iv;
|
|
AES_ENCRYPT (b, rounds);
|
|
ctr ^= b;
|
|
|
|
abuf += 1;
|
|
}
|
|
|
|
VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const);
|
|
VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
|
|
c->u_mode.ocb.aad_nblocks = data_nblocks;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks, int encrypt)
|
|
{
|
|
#ifdef WORDS_BIGENDIAN
|
|
static const block vec_bswap128_const =
|
|
{ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
|
|
#else
|
|
static const block vec_bswap128_const =
|
|
{ ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 };
|
|
#endif
|
|
static const unsigned char vec_tweak_const[16] =
|
|
{ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
|
|
static const vector unsigned long long vec_shift63_const =
|
|
{ 63, 63 };
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
block tweak;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf;
|
|
block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
|
|
block tweak_const, bswap128_const, shift63_const;
|
|
ROUND_KEY_VARIABLES;
|
|
|
|
tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const);
|
|
bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0);
|
|
shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0);
|
|
|
|
tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const);
|
|
tweak = asm_vperm1 (tweak, bswap128_const);
|
|
|
|
#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \
|
|
do { \
|
|
block tmp1, tmp2; \
|
|
tmp1 = asm_swap_uint64_halfs(tin); \
|
|
tmp2 = asm_add_uint64(tin, tin); \
|
|
tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \
|
|
tout = asm_xor(tmp1, tmp2); \
|
|
} while (0)
|
|
|
|
if (encrypt)
|
|
{
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
b1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
b2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
b3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
tweak0 = tweak;
|
|
GEN_TWEAK (tweak1, tweak0);
|
|
tweak0 = asm_vperm1 (tweak0, bswap128_const);
|
|
b4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
b5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
GEN_TWEAK (tweak2, tweak1);
|
|
tweak1 = asm_vperm1 (tweak1, bswap128_const);
|
|
b6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
b7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
|
|
b0 = VEC_BE_SWAP(b0, bige_const);
|
|
b1 = VEC_BE_SWAP(b1, bige_const);
|
|
GEN_TWEAK (tweak3, tweak2);
|
|
tweak2 = asm_vperm1 (tweak2, bswap128_const);
|
|
GEN_TWEAK (tweak4, tweak3);
|
|
tweak3 = asm_vperm1 (tweak3, bswap128_const);
|
|
b2 = VEC_BE_SWAP(b2, bige_const);
|
|
b3 = VEC_BE_SWAP(b3, bige_const);
|
|
GEN_TWEAK (tweak5, tweak4);
|
|
tweak4 = asm_vperm1 (tweak4, bswap128_const);
|
|
GEN_TWEAK (tweak6, tweak5);
|
|
tweak5 = asm_vperm1 (tweak5, bswap128_const);
|
|
b4 = VEC_BE_SWAP(b4, bige_const);
|
|
b5 = VEC_BE_SWAP(b5, bige_const);
|
|
GEN_TWEAK (tweak7, tweak6);
|
|
tweak6 = asm_vperm1 (tweak6, bswap128_const);
|
|
GEN_TWEAK (tweak, tweak7);
|
|
tweak7 = asm_vperm1 (tweak7, bswap128_const);
|
|
b6 = VEC_BE_SWAP(b6, bige_const);
|
|
b7 = VEC_BE_SWAP(b7, bige_const);
|
|
|
|
tweak0 = asm_xor (tweak0, rkey0);
|
|
tweak1 = asm_xor (tweak1, rkey0);
|
|
tweak2 = asm_xor (tweak2, rkey0);
|
|
tweak3 = asm_xor (tweak3, rkey0);
|
|
tweak4 = asm_xor (tweak4, rkey0);
|
|
tweak5 = asm_xor (tweak5, rkey0);
|
|
tweak6 = asm_xor (tweak6, rkey0);
|
|
tweak7 = asm_xor (tweak7, rkey0);
|
|
|
|
b0 = asm_xor (b0, tweak0);
|
|
b1 = asm_xor (b1, tweak1);
|
|
b2 = asm_xor (b2, tweak2);
|
|
b3 = asm_xor (b3, tweak3);
|
|
b4 = asm_xor (b4, tweak4);
|
|
b5 = asm_xor (b5, tweak5);
|
|
b6 = asm_xor (b6, tweak6);
|
|
b7 = asm_xor (b7, tweak7);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
|
|
rkeylf = asm_xor (rkeylast, rkey0);
|
|
|
|
DO_ROUND(8);
|
|
|
|
tweak0 = asm_xor (tweak0, rkeylf);
|
|
tweak1 = asm_xor (tweak1, rkeylf);
|
|
tweak2 = asm_xor (tweak2, rkeylf);
|
|
tweak3 = asm_xor (tweak3, rkeylf);
|
|
tweak4 = asm_xor (tweak4, rkeylf);
|
|
tweak5 = asm_xor (tweak5, rkeylf);
|
|
tweak6 = asm_xor (tweak6, rkeylf);
|
|
tweak7 = asm_xor (tweak7, rkeylf);
|
|
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_cipherlast_be (b0, tweak0);
|
|
b1 = asm_cipherlast_be (b1, tweak1);
|
|
b2 = asm_cipherlast_be (b2, tweak2);
|
|
b3 = asm_cipherlast_be (b3, tweak3);
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b4 = asm_cipherlast_be (b4, tweak4);
|
|
b5 = asm_cipherlast_be (b5, tweak5);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b6 = asm_cipherlast_be (b6, tweak6);
|
|
b7 = asm_cipherlast_be (b7, tweak7);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
tweak0 = tweak;
|
|
GEN_TWEAK (tweak1, tweak0);
|
|
GEN_TWEAK (tweak2, tweak1);
|
|
GEN_TWEAK (tweak3, tweak2);
|
|
GEN_TWEAK (tweak, tweak3);
|
|
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
tweak0 = asm_vperm1 (tweak0, bswap128_const);
|
|
tweak1 = asm_vperm1 (tweak1, bswap128_const);
|
|
tweak2 = asm_vperm1 (tweak2, bswap128_const);
|
|
tweak3 = asm_vperm1 (tweak3, bswap128_const);
|
|
|
|
b0 ^= tweak0 ^ rkey0;
|
|
b1 ^= tweak1 ^ rkey0;
|
|
b2 ^= tweak2 ^ rkey0;
|
|
b3 ^= tweak3 ^ rkey0;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
rkey = rkeylast;
|
|
b0 = asm_cipherlast_be (b0, rkey ^ tweak0);
|
|
b1 = asm_cipherlast_be (b1, rkey ^ tweak1);
|
|
b2 = asm_cipherlast_be (b2, rkey ^ tweak2);
|
|
b3 = asm_cipherlast_be (b3, rkey ^ tweak3);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
tweak0 = asm_vperm1 (tweak, bswap128_const);
|
|
|
|
/* Xor-Encrypt/Decrypt-Xor block. */
|
|
b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
|
|
|
|
/* Generate next tweak. */
|
|
GEN_TWEAK (tweak, tweak);
|
|
|
|
AES_ENCRYPT (b, rounds);
|
|
|
|
b ^= tweak0;
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in++;
|
|
out++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const u128_t *rk = (u128_t *)&ctx->keyschdec;
|
|
|
|
if (!ctx->decryption_prepared)
|
|
{
|
|
internal_aes_ppc_prepare_decryption (ctx);
|
|
ctx->decryption_prepared = 1;
|
|
}
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
b1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
b2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
b3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
tweak0 = tweak;
|
|
GEN_TWEAK (tweak1, tweak0);
|
|
tweak0 = asm_vperm1 (tweak0, bswap128_const);
|
|
b4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
b5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
GEN_TWEAK (tweak2, tweak1);
|
|
tweak1 = asm_vperm1 (tweak1, bswap128_const);
|
|
b6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
b7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
|
|
b0 = VEC_BE_SWAP(b0, bige_const);
|
|
b1 = VEC_BE_SWAP(b1, bige_const);
|
|
GEN_TWEAK (tweak3, tweak2);
|
|
tweak2 = asm_vperm1 (tweak2, bswap128_const);
|
|
GEN_TWEAK (tweak4, tweak3);
|
|
tweak3 = asm_vperm1 (tweak3, bswap128_const);
|
|
b2 = VEC_BE_SWAP(b2, bige_const);
|
|
b3 = VEC_BE_SWAP(b3, bige_const);
|
|
GEN_TWEAK (tweak5, tweak4);
|
|
tweak4 = asm_vperm1 (tweak4, bswap128_const);
|
|
GEN_TWEAK (tweak6, tweak5);
|
|
tweak5 = asm_vperm1 (tweak5, bswap128_const);
|
|
b4 = VEC_BE_SWAP(b4, bige_const);
|
|
b5 = VEC_BE_SWAP(b5, bige_const);
|
|
GEN_TWEAK (tweak7, tweak6);
|
|
tweak6 = asm_vperm1 (tweak6, bswap128_const);
|
|
GEN_TWEAK (tweak, tweak7);
|
|
tweak7 = asm_vperm1 (tweak7, bswap128_const);
|
|
b6 = VEC_BE_SWAP(b6, bige_const);
|
|
b7 = VEC_BE_SWAP(b7, bige_const);
|
|
|
|
tweak0 = asm_xor (tweak0, rkey0);
|
|
tweak1 = asm_xor (tweak1, rkey0);
|
|
tweak2 = asm_xor (tweak2, rkey0);
|
|
tweak3 = asm_xor (tweak3, rkey0);
|
|
tweak4 = asm_xor (tweak4, rkey0);
|
|
tweak5 = asm_xor (tweak5, rkey0);
|
|
tweak6 = asm_xor (tweak6, rkey0);
|
|
tweak7 = asm_xor (tweak7, rkey0);
|
|
|
|
b0 = asm_xor (b0, tweak0);
|
|
b1 = asm_xor (b1, tweak1);
|
|
b2 = asm_xor (b2, tweak2);
|
|
b3 = asm_xor (b3, tweak3);
|
|
b4 = asm_xor (b4, tweak4);
|
|
b5 = asm_xor (b5, tweak5);
|
|
b6 = asm_xor (b6, tweak6);
|
|
b7 = asm_xor (b7, tweak7);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey); \
|
|
b4 = asm_ncipher_be (b4, rkey); \
|
|
b5 = asm_ncipher_be (b5, rkey); \
|
|
b6 = asm_ncipher_be (b6, rkey); \
|
|
b7 = asm_ncipher_be (b7, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
|
|
rkeylf = asm_xor (rkeylast, rkey0);
|
|
|
|
DO_ROUND(8);
|
|
|
|
tweak0 = asm_xor (tweak0, rkeylf);
|
|
tweak1 = asm_xor (tweak1, rkeylf);
|
|
tweak2 = asm_xor (tweak2, rkeylf);
|
|
tweak3 = asm_xor (tweak3, rkeylf);
|
|
tweak4 = asm_xor (tweak4, rkeylf);
|
|
tweak5 = asm_xor (tweak5, rkeylf);
|
|
tweak6 = asm_xor (tweak6, rkeylf);
|
|
tweak7 = asm_xor (tweak7, rkeylf);
|
|
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
b0 = asm_ncipherlast_be (b0, tweak0);
|
|
b1 = asm_ncipherlast_be (b1, tweak1);
|
|
b2 = asm_ncipherlast_be (b2, tweak2);
|
|
b3 = asm_ncipherlast_be (b3, tweak3);
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b4 = asm_ncipherlast_be (b4, tweak4);
|
|
b5 = asm_ncipherlast_be (b5, tweak5);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b6 = asm_ncipherlast_be (b6, tweak6);
|
|
b7 = asm_ncipherlast_be (b7, tweak7);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
tweak0 = tweak;
|
|
GEN_TWEAK (tweak1, tweak0);
|
|
GEN_TWEAK (tweak2, tweak1);
|
|
GEN_TWEAK (tweak3, tweak2);
|
|
GEN_TWEAK (tweak, tweak3);
|
|
|
|
b0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
b1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
b2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
b3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
tweak0 = asm_vperm1 (tweak0, bswap128_const);
|
|
tweak1 = asm_vperm1 (tweak1, bswap128_const);
|
|
tweak2 = asm_vperm1 (tweak2, bswap128_const);
|
|
tweak3 = asm_vperm1 (tweak3, bswap128_const);
|
|
|
|
b0 ^= tweak0 ^ rkey0;
|
|
b1 ^= tweak1 ^ rkey0;
|
|
b2 ^= tweak2 ^ rkey0;
|
|
b3 ^= tweak3 ^ rkey0;
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_ncipher_be (b0, rkey); \
|
|
b1 = asm_ncipher_be (b1, rkey); \
|
|
b2 = asm_ncipher_be (b2, rkey); \
|
|
b3 = asm_ncipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
rkey = rkeylast;
|
|
b0 = asm_ncipherlast_be (b0, rkey ^ tweak0);
|
|
b1 = asm_ncipherlast_be (b1, rkey ^ tweak1);
|
|
b2 = asm_ncipherlast_be (b2, rkey ^ tweak2);
|
|
b3 = asm_ncipherlast_be (b3, rkey ^ tweak3);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
tweak0 = asm_vperm1 (tweak, bswap128_const);
|
|
|
|
/* Xor-Encrypt/Decrypt-Xor block. */
|
|
b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
|
|
|
|
/* Generate next tweak. */
|
|
GEN_TWEAK (tweak, tweak);
|
|
|
|
AES_DECRYPT (b, rounds);
|
|
|
|
b ^= tweak0;
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
in++;
|
|
out++;
|
|
}
|
|
}
|
|
|
|
tweak = asm_vperm1 (tweak, bswap128_const);
|
|
VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
|
|
|
|
#undef GEN_TWEAK
|
|
}
|
|
|
|
|
|
void PPC_OPT_ATTR
|
|
CTR32LE_ENC_FUNC(void *context, unsigned char *ctr_arg, void *outbuf_arg,
|
|
const void *inbuf_arg, size_t nblocks)
|
|
{
|
|
#ifndef WORDS_BIGENDIAN
|
|
static const vec_u32 vec_u32_one = { 1, 0, 0, 0 };
|
|
#else
|
|
static const vec_u32 vec_u32_one = { 0, 0, 0, 1 };
|
|
#endif
|
|
const block bige_const = asm_load_be_const();
|
|
RIJNDAEL_context *ctx = context;
|
|
const u128_t *rk = (u128_t *)&ctx->keyschenc;
|
|
const u128_t *in = (const u128_t *)inbuf_arg;
|
|
u128_t *out = (u128_t *)outbuf_arg;
|
|
int rounds = ctx->rounds;
|
|
ROUND_KEY_VARIABLES;
|
|
block rkeylast_orig;
|
|
block b;
|
|
vec_u32 ctr, one;
|
|
|
|
ctr = (vec_u32)vec_reve (VEC_LOAD_BE (ctr_arg, 0, bige_const));
|
|
one = vec_u32_one;
|
|
|
|
PRELOAD_ROUND_KEYS (rounds);
|
|
rkeylast_orig = rkeylast;
|
|
|
|
#define VEC_ADD_CTRLE32(ctrv_u32, addv_u32) \
|
|
vec_reve((block)((ctrv_u32) + (addv_u32)))
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
block in0, in1, in2, in3, in4, in5, in6, in7;
|
|
block b0, b1, b2, b3, b4, b5, b6, b7;
|
|
vec_u32 two, three, four, five, six, seven, eight;
|
|
block rkey;
|
|
|
|
two = one + one;
|
|
three = two + one;
|
|
four = two + two;
|
|
five = three + two;
|
|
six = three + three;
|
|
seven = four + three;
|
|
eight = four + four;
|
|
|
|
for (; nblocks >= 8; nblocks -= 8)
|
|
{
|
|
b1 = VEC_ADD_CTRLE32 (ctr, one);
|
|
b2 = VEC_ADD_CTRLE32 (ctr, two);
|
|
b3 = VEC_ADD_CTRLE32 (ctr, three);
|
|
b4 = VEC_ADD_CTRLE32 (ctr, four);
|
|
b5 = VEC_ADD_CTRLE32 (ctr, five);
|
|
b6 = VEC_ADD_CTRLE32 (ctr, six);
|
|
b7 = VEC_ADD_CTRLE32 (ctr, seven);
|
|
b0 = asm_xor (rkey0, vec_reve((block)ctr));
|
|
rkey = ALIGNED_LOAD (rk, 1);
|
|
ctr = ctr + eight;
|
|
b1 = asm_xor (rkey0, b1);
|
|
b2 = asm_xor (rkey0, b2);
|
|
b3 = asm_xor (rkey0, b3);
|
|
b0 = asm_cipher_be (b0, rkey);
|
|
b1 = asm_cipher_be (b1, rkey);
|
|
b2 = asm_cipher_be (b2, rkey);
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
b4 = asm_xor (rkey0, b4);
|
|
b5 = asm_xor (rkey0, b5);
|
|
b6 = asm_xor (rkey0, b6);
|
|
b7 = asm_xor (rkey0, b7);
|
|
b4 = asm_cipher_be (b4, rkey);
|
|
b5 = asm_cipher_be (b5, rkey);
|
|
b6 = asm_cipher_be (b6, rkey);
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey); \
|
|
b4 = asm_cipher_be (b4, rkey); \
|
|
b5 = asm_cipher_be (b5, rkey); \
|
|
b6 = asm_cipher_be (b6, rkey); \
|
|
b7 = asm_cipher_be (b7, rkey);
|
|
|
|
in0 = VEC_LOAD_BE_NOSWAP (in, 0);
|
|
DO_ROUND(2);
|
|
in1 = VEC_LOAD_BE_NOSWAP (in, 1);
|
|
DO_ROUND(3);
|
|
in2 = VEC_LOAD_BE_NOSWAP (in, 2);
|
|
DO_ROUND(4);
|
|
in3 = VEC_LOAD_BE_NOSWAP (in, 3);
|
|
DO_ROUND(5);
|
|
in4 = VEC_LOAD_BE_NOSWAP (in, 4);
|
|
DO_ROUND(6);
|
|
in5 = VEC_LOAD_BE_NOSWAP (in, 5);
|
|
DO_ROUND(7);
|
|
in6 = VEC_LOAD_BE_NOSWAP (in, 6);
|
|
DO_ROUND(8);
|
|
in7 = VEC_LOAD_BE_NOSWAP (in, 7);
|
|
in += 8;
|
|
DO_ROUND(9);
|
|
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
in0 = VEC_BE_SWAP (in0, bige_const);
|
|
in1 = VEC_BE_SWAP (in1, bige_const);
|
|
in2 = VEC_BE_SWAP (in2, bige_const);
|
|
in3 = VEC_BE_SWAP (in3, bige_const);
|
|
in4 = VEC_BE_SWAP (in4, bige_const);
|
|
in5 = VEC_BE_SWAP (in5, bige_const);
|
|
in6 = VEC_BE_SWAP (in6, bige_const);
|
|
in7 = VEC_BE_SWAP (in7, bige_const);
|
|
|
|
in0 = asm_xor (rkeylast, in0);
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
b0 = asm_cipherlast_be (b0, in0);
|
|
b1 = asm_cipherlast_be (b1, in1);
|
|
in4 = asm_xor (rkeylast, in4);
|
|
in5 = asm_xor (rkeylast, in5);
|
|
b2 = asm_cipherlast_be (b2, in2);
|
|
b3 = asm_cipherlast_be (b3, in3);
|
|
in6 = asm_xor (rkeylast, in6);
|
|
in7 = asm_xor (rkeylast, in7);
|
|
b4 = asm_cipherlast_be (b4, in4);
|
|
b5 = asm_cipherlast_be (b5, in5);
|
|
b6 = asm_cipherlast_be (b6, in6);
|
|
b7 = asm_cipherlast_be (b7, in7);
|
|
|
|
b0 = VEC_BE_SWAP (b0, bige_const);
|
|
b1 = VEC_BE_SWAP (b1, bige_const);
|
|
b2 = VEC_BE_SWAP (b2, bige_const);
|
|
b3 = VEC_BE_SWAP (b3, bige_const);
|
|
b4 = VEC_BE_SWAP (b4, bige_const);
|
|
b5 = VEC_BE_SWAP (b5, bige_const);
|
|
b6 = VEC_BE_SWAP (b6, bige_const);
|
|
b7 = VEC_BE_SWAP (b7, bige_const);
|
|
VEC_STORE_BE_NOSWAP (out, 0, b0);
|
|
VEC_STORE_BE_NOSWAP (out, 1, b1);
|
|
VEC_STORE_BE_NOSWAP (out, 2, b2);
|
|
VEC_STORE_BE_NOSWAP (out, 3, b3);
|
|
VEC_STORE_BE_NOSWAP (out, 4, b4);
|
|
VEC_STORE_BE_NOSWAP (out, 5, b5);
|
|
VEC_STORE_BE_NOSWAP (out, 6, b6);
|
|
VEC_STORE_BE_NOSWAP (out, 7, b7);
|
|
out += 8;
|
|
}
|
|
|
|
if (nblocks >= 4)
|
|
{
|
|
b1 = VEC_ADD_CTRLE32 (ctr, one);
|
|
b2 = VEC_ADD_CTRLE32 (ctr, two);
|
|
b3 = VEC_ADD_CTRLE32 (ctr, three);
|
|
b0 = asm_xor (rkey0, vec_reve((block)ctr));
|
|
ctr = ctr + four;
|
|
b1 = asm_xor (rkey0, b1);
|
|
b2 = asm_xor (rkey0, b2);
|
|
b3 = asm_xor (rkey0, b3);
|
|
|
|
#define DO_ROUND(r) \
|
|
rkey = ALIGNED_LOAD (rk, r); \
|
|
b0 = asm_cipher_be (b0, rkey); \
|
|
b1 = asm_cipher_be (b1, rkey); \
|
|
b2 = asm_cipher_be (b2, rkey); \
|
|
b3 = asm_cipher_be (b3, rkey);
|
|
|
|
DO_ROUND(1);
|
|
DO_ROUND(2);
|
|
DO_ROUND(3);
|
|
DO_ROUND(4);
|
|
DO_ROUND(5);
|
|
DO_ROUND(6);
|
|
DO_ROUND(7);
|
|
DO_ROUND(8);
|
|
|
|
in0 = VEC_LOAD_BE (in, 0, bige_const);
|
|
in1 = VEC_LOAD_BE (in, 1, bige_const);
|
|
in2 = VEC_LOAD_BE (in, 2, bige_const);
|
|
in3 = VEC_LOAD_BE (in, 3, bige_const);
|
|
|
|
DO_ROUND(9);
|
|
if (rounds >= 12)
|
|
{
|
|
DO_ROUND(10);
|
|
DO_ROUND(11);
|
|
if (rounds > 12)
|
|
{
|
|
DO_ROUND(12);
|
|
DO_ROUND(13);
|
|
}
|
|
}
|
|
|
|
#undef DO_ROUND
|
|
|
|
in0 = asm_xor (rkeylast, in0);
|
|
in1 = asm_xor (rkeylast, in1);
|
|
in2 = asm_xor (rkeylast, in2);
|
|
in3 = asm_xor (rkeylast, in3);
|
|
|
|
b0 = asm_cipherlast_be (b0, in0);
|
|
b1 = asm_cipherlast_be (b1, in1);
|
|
b2 = asm_cipherlast_be (b2, in2);
|
|
b3 = asm_cipherlast_be (b3, in3);
|
|
|
|
VEC_STORE_BE (out, 0, b0, bige_const);
|
|
VEC_STORE_BE (out, 1, b1, bige_const);
|
|
VEC_STORE_BE (out, 2, b2, bige_const);
|
|
VEC_STORE_BE (out, 3, b3, bige_const);
|
|
|
|
in += 4;
|
|
out += 4;
|
|
nblocks -= 4;
|
|
}
|
|
}
|
|
|
|
for (; nblocks; nblocks--)
|
|
{
|
|
b = vec_reve((block)ctr);
|
|
ctr = ctr + one;
|
|
rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
|
|
|
|
AES_ENCRYPT (b, rounds);
|
|
|
|
VEC_STORE_BE (out, 0, b, bige_const);
|
|
|
|
out++;
|
|
in++;
|
|
}
|
|
|
|
#undef VEC_ADD_CTRLE32
|
|
|
|
VEC_STORE_BE (ctr_arg, 0, vec_reve((block)ctr), bige_const);
|
|
}
|