Nikos Mavrogiannopoulos nmav@redhat.com writes:
Re-sending as it seems I forgot to remove cmac-internal from makefile.
I've had a first reading, and a few comments.
diff --git a/cmac.c b/cmac.c new file mode 100644 index 00000000..b4886808 --- /dev/null +++ b/cmac.c
+static const uint8_t const_Rb[] = {
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x87
+};
+static inline void block_lshift(const union nettle_block16 *in, + union nettle_block16 *out) +{
- uint64_t b1 = READ_UINT64(in->b);
- uint64_t b2 = READ_UINT64(in->b+8);
- unsigned overflow = b2 & 0x8000000000000000;
- b1 <<= 1;
- b2 <<= 1;
- if (overflow)
b1 |= 0x01;
I'd write the shift as
b1 = (b1 << 1) | (b2 >> 63); b2 <<= 1;
I think it could return the bit shifted out (since the next thing done is examining that bit). Or maybe even better, move the
if (one-bit shifted out) { memxor const_Rb }
into this function, which would then correspond to multiplication by in Z_2[x] / (Rb polynomial). Nettle conventions would also put the destination argument first. I'd suggest rewriting the function as
static void block_mulx(union nettle_block16 *dst, const union nettle_block16 *src) { uint64_t s1 = READ_UINT64(src->b); uint64_t s0 = READ_UINT64(src->b+8); uint64_t d1 = (s1 << 1) | (s0 >> 63); uint64_t d0 = s0 << 1; if (s1 >> 63) d0 ^= 0x87; /* If that's what the const_Rb memxor boils down to? */
WRITE_UINT64(dst->b, d1); WRITE_UINT64(dst->b+8, d0); }
Further cleverness is possible, but further optimization might not be needed, since, as you say, it's for key setup only.
+void cmac128_set_key(struct cmac128 *ctx, void *key,
nettle_cipher_func *encrypt)
+{
- memset(ctx, 0, sizeof(*ctx));
- /* step 1 - generate subkeys k1 and k2 */
- encrypt(key, 16, ctx->L.b, const_Zero);
- block_lshift(ctx->L.b, ctx->K1.b);
- if (ctx->L.b[0] & 0x80) {
memxor(ctx->K1.b, const_Rb, 16);
- }
Is ctx->L used for anything after key setup? If not, it should be moved to a local variable, out of the context struct.
- block_lshift(ctx->K1.b, ctx->K2.b);
- if (ctx->K1.b[0] & 0x80) {
memxor(ctx->K2.b, const_Rb, 16);
- }
+}
+#define MIN(x,y) ((x)<(y)?(x):(y))
+void cmac128_update(struct cmac128 *ctx, void *key,
nettle_cipher_func *encrypt,
size_t msg_len, const uint8_t *msg)
+{
- /*
* check if we expand the block
*/
- if (ctx->last_len < 16) {
size_t len = MIN(16 - ctx->last_len, msg_len);
memcpy(&ctx->last.b[ctx->last_len], msg, len);
msg += len;
msg_len -= len;
ctx->last_len += len;
- }
Other similar code uses different naming convenstions, in particular ctx->block rather than ctx->last, and ctx->index rather than ctx->last_len. Not terribly important, but it is nice to have some consistency in style.
- if (msg_len == 0) {
/* if it is still the last block, we are done */
return;
- }
- /*
* now checksum everything but the last block
*/
- memxor3(ctx->Y.b, ctx->X.b, ctx->last.b, 16);
- encrypt(key, 16, ctx->X.b, ctx->Y.b);
- while (msg_len > 16) {
memxor3(ctx->Y.b, ctx->X.b, msg, 16);
encrypt(key, 16, ctx->X.b, ctx->Y.b);
msg += 16;
msg_len -= 16;
- }
- /*
* copy the last block, it will be processed in
* cmac128_digest().
*/
- memset(ctx->last.b, 0, sizeof(ctx->last.b));
I'd prefer to postpone the memset/zero-padding until cmac128_digest.
- memcpy(ctx->last.b, msg, msg_len);
- ctx->last_len = msg_len;
+}
+void cmac128_digest(struct cmac128 *ctx, void *key,
nettle_cipher_func *encrypt,
unsigned length,
uint8_t *out)
+{
- uint8_t tmp[16];
Is both last_len == 0 and last_len == 16 possible here? If practical, it would be nice to arrange so that the update function above processes all full blocks.
- if (ctx->last_len < 16) {
ctx->last.b[ctx->last_len] = 0x80;
memxor3(tmp, ctx->last.b, ctx->K2.b, 16);
- } else {
memxor3(tmp, ctx->last.b, ctx->K1.b, 16);
- }
If that works out, this conditional would be replaced by
if (ctx->last_len > 0) { /* Pad last block, xor with K2 rather than K1 */ assert (ctx->last_len < 16); ctx->last.b[ctx->last_len] = 0x80; memset(last.b + ctx->last_len + 1, 0, 15 - ctx->last_len); memxor3(tmp, ctx->last.b, ctx->K2.b, 16) } /* Note: No else clause */
Other code reuses the block buffer for temporary values in the digest function. Maybe that would work here too, with memxor rather than memxor3. Since memxor is a byte-by-byte operation, one could also consider eliminating the memset, and instead do memxor + memcpy, but not sure if that's an improvement in perforamance or readability.
- memxor3(ctx->Y.b, tmp, ctx->X.b, 16);
- assert(length <= 16);
- if (length == 16) {
encrypt(key, length, out, ctx->Y.b);
- } else {
encrypt(key, length, tmp, ctx->Y.b);
memcpy(out, tmp, length);
- }
Maybe I'm missing something, but to me both clauses look equivalent?
diff --git a/cmac.h b/cmac.h new file mode 100644 index 00000000..5f0553b7 --- /dev/null +++ b/cmac.h @@ -0,0 +1,117 @@ +struct cmac128 {
- union nettle_block16 K1;
- union nettle_block16 K2;
- union nettle_block16 L;
- union nettle_block16 X;
- union nettle_block16 Y;
- union nettle_block16 last;
- size_t last_len;
+};
To me, it looks like L and Y don't belong in the context struct. As I understand it, K1 and K2 are the mmac key, X is the state, and last is the buffer needed to handle one block at a time.
Regards, /Niels