modes/ocb128.c: split fixed block xors to aligned and misaligned.
Main goal was to improve performance on RISC platforms, e.g. 10%
was measured on MIPS, POWER8...
Reviewed-by: Matt Caswell <matt@openssl.org>
diff --git a/crypto/modes/ocb128.c b/crypto/modes/ocb128.c
index 5408d50..d49aa6e 100644
--- a/crypto/modes/ocb128.c
+++ b/crypto/modes/ocb128.c
@@ -53,11 +53,6 @@
#ifndef OPENSSL_NO_OCB
-union ublock {
- unsigned char *chrblk;
- OCB_BLOCK *ocbblk;
-};
-
/*
* Calculate the number of binary trailing zero's in any given number
*/
@@ -88,23 +83,18 @@
unsigned char shift_mask;
int i;
unsigned char mask[15];
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
shift_mask = 0xff;
shift_mask <<= (8 - shift);
for (i = 15; i >= 0; i--) {
if (i > 0) {
- mask[i - 1] = locin.chrblk[i] & shift_mask;
+ mask[i - 1] = in->c[i] & shift_mask;
mask[i - 1] >>= 8 - shift;
}
- locout.chrblk[i] = locin.chrblk[i] << shift;
+ out->c[i] = in->c[i] << shift;
if (i != 15) {
- locout.chrblk[i] ^= mask[i];
+ out->c[i] ^= mask[i];
}
}
}
@@ -115,23 +105,18 @@
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
{
unsigned char mask;
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
/*
* Calculate the mask based on the most significant bit. There are more
* efficient ways to do this - but this way is constant time
*/
- mask = locin.chrblk[0] & 0x80;
+ mask = in->c[0] & 0x80;
mask >>= 7;
mask *= 135;
ocb_block_lshift(in, 1, out);
- locout.chrblk[15] ^= mask;
+ out->c[15] ^= mask;
}
/*
@@ -191,13 +176,7 @@
static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
void *keyenc)
{
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
-
- ctx->encrypt(locin.chrblk, locout.chrblk, keyenc);
+ ctx->encrypt(in->c, out->c, keyenc);
}
/*
@@ -206,13 +185,7 @@
static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
void *keydec)
{
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
-
- ctx->decrypt(locin.chrblk, locout.chrblk, keydec);
+ ctx->decrypt(in->c, out->c, keydec);
}
/*
@@ -305,9 +278,6 @@
unsigned char ktop[16], tmp[16], mask;
unsigned char stretch[24], nonce[16];
size_t bottom, shift;
- union ublock offset;
-
- offset.ocbblk = &ctx->offset;
/*
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
@@ -341,7 +311,7 @@
&ctx->offset);
mask = 0xff;
mask <<= 8 - shift;
- offset.chrblk[15] |=
+ ctx->offset.c[15] |=
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
return 1;
@@ -444,13 +414,13 @@
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, inblock, &tmp1);
+ ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
/* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_block16_xor(&ctx->checksum, inblock, &ctx->checksum);
+ ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum);
ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc);
outblock =
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, &tmp2, outblock);
+ ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
}
@@ -517,14 +487,14 @@
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, inblock, &tmp1);
+ ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec);
outblock =
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, &tmp2, outblock);
+ ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
/* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_block16_xor(&ctx->checksum, outblock, &ctx->checksum);
+ ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum);
}
/*