Hello colleagues,
I gave a little thought to Niels' idea:
5. Revamp hmac and underlying hash functions with a separate state struct. Probably low priority, but it is a bit silly that, e.g., hmac_sha512_ctx includes three 128-byte large block buffers.
I've implemented new approach using hmac2 prefix, but if you like this approach I can switch hmac2 prefix to just hmac and drop older API.
Provide alternative HMAC interface, with context struct having just derived key and single hash state instead of three hash states at once.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- hmac.c | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hmac.h | 32 ++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+)
diff --git a/hmac.c b/hmac.c index 6ac5e11a0686..6d57f8c9197c 100644 --- a/hmac.c +++ b/hmac.c @@ -115,3 +115,69 @@ hmac_digest(const void *outer, const void *inner, void *state,
memcpy(state, inner, hash->context_size); } + +static void +hmac2_reinit_state(void *state, void *derived_key, + const struct nettle_hash *hash, + uint8_t padc) +{ + TMP_DECL(pad, uint8_t, NETTLE_MAX_HASH_BLOCK_SIZE); + TMP_ALLOC(pad, hash->block_size); + + memset(pad, padc, hash->block_size); + memxor(pad, derived_key, hash->block_size); + + hash->init(state); + hash->update(state, hash->block_size, pad); +} + +void +hmac2_set_key(void *state, void *derived_key, + const struct nettle_hash *hash, + size_t key_length, const uint8_t *key) +{ + memset(derived_key, 0, hash->block_size); + + if (key_length > hash->block_size) + { + assert(hash->digest_size <= hash->block_size); + + /* Reduce key to the algorithm's hash size. Use the area pointed + * to by state for the temporary state. */ + hash->init(state); + hash->update(state, key_length, key); + hash->digest(state, hash->digest_size, derived_key); + } + else + { + memcpy(derived_key, key, key_length); + } + + hmac2_reinit_state(state, derived_key, hash, IPAD); +} + +void +hmac2_update(void *state, + const struct nettle_hash *hash, + size_t length, const uint8_t *data) +{ + hash->update(state, length, data); +} + +void +hmac2_digest(void *state, void *derived_key, + const struct nettle_hash *hash, + size_t length, uint8_t *dst) +{ + TMP_DECL(digest, uint8_t, NETTLE_MAX_HASH_DIGEST_SIZE); + TMP_ALLOC(digest, hash->digest_size); + + hash->digest(state, hash->digest_size, digest); + + hmac2_reinit_state(state, derived_key, hash, OPAD); + + hash->update(state, hash->digest_size, digest); + hash->digest(state, length, dst); + + hmac2_reinit_state(state, derived_key, hash, IPAD); +} diff --git a/hmac.h b/hmac.h index 40a8e77aab6d..29a2798ffc21 100644 --- a/hmac.h +++ b/hmac.h @@ -49,6 +49,9 @@ extern "C" { #define hmac_set_key nettle_hmac_set_key #define hmac_update nettle_hmac_update #define hmac_digest nettle_hmac_digest +#define hmac2_set_key nettle_hmac2_set_key +#define hmac2_update nettle_hmac2_update +#define hmac2_digest nettle_hmac2_digest #define hmac_md5_set_key nettle_hmac_md5_set_key #define hmac_md5_update nettle_hmac_md5_update #define hmac_md5_digest nettle_hmac_md5_digest @@ -87,6 +90,24 @@ hmac_digest(const void *outer, const void *inner, void *state, size_t length, uint8_t *digest);
+void +hmac2_set_key(void *state, void *derived_key, + const struct nettle_hash *hash, + size_t length, const uint8_t *key); + +/* This function is not strictly needed, it's s just the same as the + * hash update or hmac2_update functions. */ +void +hmac2_update(void *state, + const struct nettle_hash *hash, + size_t length, const uint8_t *data); + +void +hmac2_digest(void *state, void *derived_key, + const struct nettle_hash *hash, + size_t length, uint8_t *digest); + + #define HMAC_CTX(type) \ { type outer; type inner; type state; }
@@ -98,6 +119,17 @@ hmac_digest(const void *outer, const void *inner, void *state, hmac_digest( &(ctx)->outer, &(ctx)->inner, &(ctx)->state, \ (hash), (length), (digest) )
+#define HMAC2_CTX(type, block_size) \ +{ type state; uint8_t key[block_size]; } + +#define HMAC2_SET_KEY(ctx, hash, length, key) \ + hmac2_set_key( &(ctx)->state, &(ctx)->key, \ + (hash), (length), (key) ) + +#define HMAC2_DIGEST(ctx, hash, length, digest) \ + hmac2_digest( &(ctx)->state, &(ctx)->key, \ + (hash), (length), (digest) ) + /* HMAC using specific hash functions */
/* hmac-md5 */
Note, this does change ABI of these functions, as context size changes.
Signed-off-by: Dmitry Eremin-Solenikov dbaryshkov@gmail.com --- hmac-md5.c | 4 ++-- hmac-ripemd160.c | 4 ++-- hmac-sha1.c | 4 ++-- hmac-sha224.c | 4 ++-- hmac-sha256.c | 4 ++-- hmac-sha384.c | 4 ++-- hmac-sha512.c | 4 ++-- hmac.h | 11 ++++++----- 8 files changed, 20 insertions(+), 19 deletions(-)
diff --git a/hmac-md5.c b/hmac-md5.c index a27e64f6f61b..01670af88f2d 100644 --- a/hmac-md5.c +++ b/hmac-md5.c @@ -41,7 +41,7 @@ void hmac_md5_set_key(struct hmac_md5_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_md5, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_md5, key_length, key); }
void @@ -55,5 +55,5 @@ void hmac_md5_digest(struct hmac_md5_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_md5, length, digest); + HMAC2_DIGEST(ctx, &nettle_md5, length, digest); } diff --git a/hmac-ripemd160.c b/hmac-ripemd160.c index 24e2cbe7a578..376d66822a6b 100644 --- a/hmac-ripemd160.c +++ b/hmac-ripemd160.c @@ -41,7 +41,7 @@ void hmac_ripemd160_set_key(struct hmac_ripemd160_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_ripemd160, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_ripemd160, key_length, key); }
void @@ -55,5 +55,5 @@ void hmac_ripemd160_digest(struct hmac_ripemd160_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_ripemd160, length, digest); + HMAC2_DIGEST(ctx, &nettle_ripemd160, length, digest); } diff --git a/hmac-sha1.c b/hmac-sha1.c index 5e7188f92cf2..e696f608b25f 100644 --- a/hmac-sha1.c +++ b/hmac-sha1.c @@ -41,7 +41,7 @@ void hmac_sha1_set_key(struct hmac_sha1_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_sha1, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_sha1, key_length, key); }
void @@ -55,5 +55,5 @@ void hmac_sha1_digest(struct hmac_sha1_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_sha1, length, digest); + HMAC2_DIGEST(ctx, &nettle_sha1, length, digest); } diff --git a/hmac-sha224.c b/hmac-sha224.c index c5bc8750a054..74ad061a6bd2 100644 --- a/hmac-sha224.c +++ b/hmac-sha224.c @@ -41,12 +41,12 @@ void hmac_sha224_set_key(struct hmac_sha224_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_sha224, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_sha224, key_length, key); }
void hmac_sha224_digest(struct hmac_sha224_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_sha224, length, digest); + HMAC2_DIGEST(ctx, &nettle_sha224, length, digest); } diff --git a/hmac-sha256.c b/hmac-sha256.c index af5cc0f1cbef..25e69deb44f8 100644 --- a/hmac-sha256.c +++ b/hmac-sha256.c @@ -41,7 +41,7 @@ void hmac_sha256_set_key(struct hmac_sha256_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_sha256, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_sha256, key_length, key); }
void @@ -55,5 +55,5 @@ void hmac_sha256_digest(struct hmac_sha256_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_sha256, length, digest); + HMAC2_DIGEST(ctx, &nettle_sha256, length, digest); } diff --git a/hmac-sha384.c b/hmac-sha384.c index 30008b5f85c1..2ab8430e11e7 100644 --- a/hmac-sha384.c +++ b/hmac-sha384.c @@ -41,12 +41,12 @@ void hmac_sha384_set_key(struct hmac_sha512_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_sha384, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_sha384, key_length, key); }
void hmac_sha384_digest(struct hmac_sha512_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_sha384, length, digest); + HMAC2_DIGEST(ctx, &nettle_sha384, length, digest); } diff --git a/hmac-sha512.c b/hmac-sha512.c index de64637a8216..325b2d8a8642 100644 --- a/hmac-sha512.c +++ b/hmac-sha512.c @@ -41,7 +41,7 @@ void hmac_sha512_set_key(struct hmac_sha512_ctx *ctx, size_t key_length, const uint8_t *key) { - HMAC_SET_KEY(ctx, &nettle_sha512, key_length, key); + HMAC2_SET_KEY(ctx, &nettle_sha512, key_length, key); }
void @@ -55,5 +55,5 @@ void hmac_sha512_digest(struct hmac_sha512_ctx *ctx, size_t length, uint8_t *digest) { - HMAC_DIGEST(ctx, &nettle_sha512, length, digest); + HMAC2_DIGEST(ctx, &nettle_sha512, length, digest); } diff --git a/hmac.h b/hmac.h index 29a2798ffc21..50daa401100b 100644 --- a/hmac.h +++ b/hmac.h @@ -77,6 +77,7 @@ hmac_set_key(void *outer, void *inner, void *state, const struct nettle_hash *hash, size_t length, const uint8_t *key);
+ /* This function is not strictly needed, it's s just the same as the * hash update function. */ void @@ -133,7 +134,7 @@ hmac2_digest(void *state, void *derived_key, /* HMAC using specific hash functions */
/* hmac-md5 */ -struct hmac_md5_ctx HMAC_CTX(struct md5_ctx); +struct hmac_md5_ctx HMAC2_CTX(struct md5_ctx, MD5_BLOCK_SIZE);
void hmac_md5_set_key(struct hmac_md5_ctx *ctx, @@ -149,7 +150,7 @@ hmac_md5_digest(struct hmac_md5_ctx *ctx,
/* hmac-ripemd160 */ -struct hmac_ripemd160_ctx HMAC_CTX(struct ripemd160_ctx); +struct hmac_ripemd160_ctx HMAC2_CTX(struct ripemd160_ctx, RIPEMD160_BLOCK_SIZE);
void hmac_ripemd160_set_key(struct hmac_ripemd160_ctx *ctx, @@ -165,7 +166,7 @@ hmac_ripemd160_digest(struct hmac_ripemd160_ctx *ctx,
/* hmac-sha1 */ -struct hmac_sha1_ctx HMAC_CTX(struct sha1_ctx); +struct hmac_sha1_ctx HMAC2_CTX(struct sha1_ctx, SHA1_BLOCK_SIZE);
void hmac_sha1_set_key(struct hmac_sha1_ctx *ctx, @@ -180,7 +181,7 @@ hmac_sha1_digest(struct hmac_sha1_ctx *ctx, size_t length, uint8_t *digest);
/* hmac-sha256 */ -struct hmac_sha256_ctx HMAC_CTX(struct sha256_ctx); +struct hmac_sha256_ctx HMAC2_CTX(struct sha256_ctx, SHA256_BLOCK_SIZE);
void hmac_sha256_set_key(struct hmac_sha256_ctx *ctx, @@ -208,7 +209,7 @@ hmac_sha224_digest(struct hmac_sha224_ctx *ctx, size_t length, uint8_t *digest);
/* hmac-sha512 */ -struct hmac_sha512_ctx HMAC_CTX(struct sha512_ctx); +struct hmac_sha512_ctx HMAC2_CTX(struct sha512_ctx, SHA512_BLOCK_SIZE);
void hmac_sha512_set_key(struct hmac_sha512_ctx *ctx,
Dmitry Eremin-Solenikov dbaryshkov@gmail.com writes:
Hello colleagues,
I gave a little thought to Niels' idea:
- Revamp hmac and underlying hash functions with a separate state struct. Probably low priority, but it is a bit silly that, e.g., hmac_sha512_ctx includes three 128-byte large block buffers.
I've implemented new approach using hmac2 prefix, but if you like this approach I can switch hmac2 prefix to just hmac and drop older API.
Nice!
diff --git a/hmac.c b/hmac.c index 6ac5e11a0686..6d57f8c9197c 100644 --- a/hmac.c +++ b/hmac.c @@ -115,3 +115,69 @@ hmac_digest(const void *outer, const void *inner, void *state,
memcpy(state, inner, hash->context_size); }
+static void +hmac2_reinit_state(void *state, void *derived_key,
const struct nettle_hash *hash,
uint8_t padc)
+{
- TMP_DECL(pad, uint8_t, NETTLE_MAX_HASH_BLOCK_SIZE);
- TMP_ALLOC(pad, hash->block_size);
- memset(pad, padc, hash->block_size);
- memxor(pad, derived_key, hash->block_size);
- hash->init(state);
- hash->update(state, hash->block_size, pad);
+}
This reinit function is used instead of a plain memcpy (of the complete ctx, including buffer). That's less efficient, since we'll get more calls to the heavy compression function for each message.
In principle, it should be possible to replace derived_key with the relevant part of hash context, except the buffer, and memcpy that. If it's possible to arrange it in that way without things getting too ugly, I think that might be worth the effort.
A typical context struct looks like
struct sha256_ctx { uint32_t state[_SHA256_DIGEST_LENGTH]; /* State variables */ uint64_t count; /* 64-bit block count */ uint8_t block[SHA256_BLOCK_SIZE]; /* SHA256 data buffer */ unsigned int index; /* index into buffer */ };
Here, we should first reorder fields so that the block buffer is last,
struct sha256_ctx { uint32_t state[_SHA256_DIGEST_LENGTH]; /* State variables */ uint64_t count; /* 64-bit block count */ unsigned int index; /* index into buffer */ uint8_t block[SHA256_BLOCK_SIZE]; /* SHA256 data buffer */ };
(and we can do that, since we're planning an abi break).
Then at the time reinit is called, we would memcpy the first three fields. state here depends on the key, while count will be always 1 and index always zero (but it's likely not a useful optimization to handle the constat part separately). To make reasonably clean, we may have to take out the non-block fields to a separate struct, say
struct sha256_state { uint32_t state[_SHA256_DIGEST_LENGTH]; /* State variables */ uint64_t count; /* 64-bit block count */ unsigned int index; /* index into buffer */ };
struct sha256_ctx { struct sha256_state state; uint8_t block[SHA256_BLOCK_SIZE]; /* SHA256 data buffer */ };
and let
struct hmac_sha256 { struct sha256_state inner; struct sha256_state outer; struct sha256_ctx hash_ctx; /* Initialized from key, updated as the message is processed */ };
We'd need to add a state_size field to struct nettle_hash, and then reinit would be
memcpy(&hmac_ctx->hash_ctx, hmac_ctx->inner /* or outer */, hash->state_size);
And the nice thing is that any hash function not matching this internal structure can let state_size == context_size, and things will keep working.
What do you think?
Regards, /Niels
Hello,
вт, 24 июл. 2018 г. в 15:40, Niels Möller nisse@lysator.liu.se:
This reinit function is used instead of a plain memcpy (of the complete ctx, including buffer). That's less efficient, since we'll get more calls to the heavy compression function for each message.
True. I'll look into adding HMAC functions to nettle-benchmark then. It would be interesting to compare performance.
struct sha256_state { uint32_t state[_SHA256_DIGEST_LENGTH]; /* State variables */ uint64_t count; /* 64-bit block count */ unsigned int index; /* index into buffer */ };
struct sha256_ctx { struct sha256_state state; uint8_t block[SHA256_BLOCK_SIZE]; /* SHA256 data buffer */ };
It might be worth moving both index and block out of 'state' function and then updating compress/MD_* macros to accept separate 'compression state' and 'buffer state' structures. This might result in some code cleanups. I'll give this idea a thought.
and let
struct hmac_sha256 { struct sha256_state inner; struct sha256_state outer; struct sha256_ctx hash_ctx; /* Initialized from key, updated as the message is processed */ };
We'd need to add a state_size field to struct nettle_hash, and then reinit would be
memcpy(&hmac_ctx->hash_ctx, hmac_ctx->inner /* or outer */, hash->state_size);
And the nice thing is that any hash function not matching this internal structure can let state_size == context_size, and things will keep working.
What do you think?
What about having following functions:
_FOO_init(state); FOO_init(ctx); _FOO_compress(state, block[]) FOO_update(ctx, length, data); _FOO_digest(state, buffer_state); FOO_digest(ctx);
Users will call typical FOO_* functions, while HMAC code can call internal _FOO_* functions.
Dmitry Eremin-Solenikov dbaryshkov@gmail.com writes:
True. I'll look into adding HMAC functions to nettle-benchmark then. It would be interesting to compare performance.
That would be great. It's better to measure performance than to speculate about it.
It might be worth moving both index and block out of 'state' function and then updating compress/MD_* macros to accept separate 'compression state' and 'buffer state' structures. This might result in some code cleanups. I'll give this idea a thought.
That would be conceptually very nice. I suspect there might be some complications from the count field (counter of compressed blocks), which most hash function have, but, e.g., sha3 doesn't. On the other hand, hmac is designed to be used only with MD-style hash functions, so I'm not sure hmac-sha3 is of any use.
What about having following functions:
_FOO_init(state); FOO_init(ctx); _FOO_compress(state, block[]) FOO_update(ctx, length, data); _FOO_digest(state, buffer_state); FOO_digest(ctx);
Users will call typical FOO_* functions, while HMAC code can call internal _FOO_* functions.
What would _FOO_digest be used for? Also note that all functions needed by hmac would need to be exposed in struct nettle_hash.
We already have a couple of FOO_compress functions, mainly because those functions are candidates for assembly implementation.
Regards, /Niels
On Wed, 2018-07-25 at 07:31 +0200, Niels Möller wrote:
Dmitry Eremin-Solenikov dbaryshkov@gmail.com writes:
True. I'll look into adding HMAC functions to nettle-benchmark then. It would be interesting to compare performance.
That would be great. It's better to measure performance than to speculate about it.
It might be worth moving both index and block out of 'state' function and then updating compress/MD_* macros to accept separate 'compression state' and 'buffer state' structures. This might result in some code cleanups. I'll give this idea a thought.
That would be conceptually very nice. I suspect there might be some complications from the count field (counter of compressed blocks), which most hash function have, but, e.g., sha3 doesn't. On the other hand, hmac is designed to be used only with MD-style hash functions, so I'm not sure hmac-sha3 is of any use.
It would be an overkill to have hmac-sha3, first because it is slow, and secondly because it was designed to be a MAC as simple as SHA3(K, msg).
regards, Nikos
nettle-bugs@lists.lysator.liu.se