[RFC TLS Offload Support 14/15] crypto: rfc5288 aesni optimized intel routines
From: Aviad Yehezkel <hidden>
Date: 2017-03-28 13:26:31
Also in:
linux-crypto
Subsystem:
the rest, x86 architecture (32-bit and 64-bit) · Maintainers:
Linus Torvalds, Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen
From: Dave Watson <redacted> The assembly routines require the AAD data to be padded out to the nearest 4 bytes. Copy the 13 byte tag to a spare assoc data area when necessary Signed-off-by: Dave Watson <redacted> --- arch/x86/crypto/aesni-intel_asm.S | 6 ++ arch/x86/crypto/aesni-intel_avx-x86_64.S | 4 ++ arch/x86/crypto/aesni-intel_glue.c | 105 ++++++++++++++++++++++++++----- 3 files changed, 99 insertions(+), 16 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 383a6f8..4e80bb8 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S@@ -229,6 +229,9 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD mov arg8, %r12 # %r12 = aadLen + add $3, %r12 + and $~3, %r12 + mov %r12, %r11 pxor %xmm\i, %xmm\i
@@ -454,6 +457,9 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation MOVADQ SHUF_MASK(%rip), %xmm14 mov arg7, %r10 # %r10 = AAD mov arg8, %r12 # %r12 = aadLen + add $3, %r12 + and $~3, %r12 + mov %r12, %r11 pxor %xmm\i, %xmm\i _get_AAD_loop\num_initial_blocks\operation:
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 522ab68..0756e4a 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S@@ -360,6 +360,8 @@ VARIABLE_OFFSET = 16*8 mov arg6, %r10 # r10 = AAD mov arg7, %r12 # r12 = aadLen + add $3, %r12 + and $~3, %r12 mov %r12, %r11
@@ -1619,6 +1621,8 @@ ENDPROC(aesni_gcm_dec_avx_gen2) mov arg6, %r10 # r10 = AAD mov arg7, %r12 # r12 = aadLen + add $3, %r12 + and $~3, %r12 mov %r12, %r11
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 4ff90a7..dcada94 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c@@ -957,6 +957,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req) { u8 one_entry_in_sg = 0; u8 *src, *dst, *assoc; + u8 *assocmem = NULL; + __be32 counter = cpu_to_be32(1); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
@@ -966,12 +968,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req) struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; unsigned int i; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length equal */ - /* to 16 or 20 bytes */ - if (unlikely(req->assoclen != 16 && req->assoclen != 20)) - return -EINVAL; + unsigned int padded_assoclen = (req->assoclen + 3) & ~3; + u8 assocbuf[24]; /* IV below built */ for (i = 0; i < 4; i++)
@@ -996,7 +994,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req) } else { /* Allocate memory for src, dst, assoc */ assoc = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, - GFP_ATOMIC); + GFP_ATOMIC); + assocmem = assoc; if (unlikely(!assoc)) return -ENOMEM; scatterwalk_map_and_copy(assoc, req->src, 0,
@@ -1005,6 +1004,14 @@ static int helper_rfc4106_encrypt(struct aead_request *req) dst = src; } + if (req->assoclen != padded_assoclen) { + scatterwalk_map_and_copy(assocbuf, req->src, 0, + req->assoclen, 0); + memset(assocbuf + req->assoclen, 0, + padded_assoclen - req->assoclen); + assoc = assocbuf; + } + kernel_fpu_begin(); aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv, ctx->hash_subkey, assoc, req->assoclen - 8,
@@ -1025,7 +1032,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) } else { scatterwalk_map_and_copy(dst, req->dst, req->assoclen, req->cryptlen + auth_tag_len, 1); - kfree(assoc); + kfree(assocmem); } return 0; }
@@ -1034,6 +1041,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) { u8 one_entry_in_sg = 0; u8 *src, *dst, *assoc; + u8 *assocmem = NULL; unsigned long tempCipherLen = 0; __be32 counter = cpu_to_be32(1); int retval = 0;
@@ -1043,16 +1051,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req) unsigned long auth_tag_len = crypto_aead_authsize(tfm); u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); u8 authTag[16]; + u8 assocbuf[24]; struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; unsigned int i; - - if (unlikely(req->assoclen != 16 && req->assoclen != 20)) - return -EINVAL; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length */ - /* equal to 16 or 20 bytes */ + unsigned int padded_assoclen = (req->assoclen + 3) & ~3; tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); /* IV below built */
@@ -1079,6 +1082,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } else { /* Allocate memory for src, dst, assoc */ assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); + assocmem = assoc; if (!assoc) return -ENOMEM; scatterwalk_map_and_copy(assoc, req->src, 0,
@@ -1087,6 +1091,14 @@ static int helper_rfc4106_decrypt(struct aead_request *req) dst = src; } + if (req->assoclen != padded_assoclen) { + scatterwalk_map_and_copy(assocbuf, req->src, + 0, req->assoclen, 0); + memset(assocbuf + req->assoclen, 0, + padded_assoclen - req->assoclen); + assoc = assocbuf; + } + kernel_fpu_begin(); aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, ctx->hash_subkey, assoc, req->assoclen - 8,
@@ -1109,7 +1121,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) } else { scatterwalk_map_and_copy(dst, req->dst, req->assoclen, tempCipherLen, 1); - kfree(assoc); + kfree(assocmem); } return retval; }
@@ -1151,6 +1163,12 @@ static int rfc4106_encrypt(struct aead_request *req) aead_request_set_tfm(req, tfm); + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 16 or 20 bytes */ + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + return crypto_aead_encrypt(req); }
@@ -1167,6 +1185,43 @@ static int rfc4106_decrypt(struct aead_request *req) aead_request_set_tfm(req, tfm); + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 16 or 20 bytes */ + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + return crypto_aead_decrypt(req); +} + +static int rfc5288_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(tfm); + struct cryptd_aead *cryptd_tfm = *ctx; + + if (unlikely(req->assoclen != 21)) + return -EINVAL; + + aead_request_set_tfm(req, irq_fpu_usable() ? + cryptd_aead_child(cryptd_tfm) : + &cryptd_tfm->base); + + return crypto_aead_encrypt(req); +} + +static int rfc5288_decrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(tfm); + struct cryptd_aead *cryptd_tfm = *ctx; + + if (unlikely(req->assoclen != 21)) + return -EINVAL; + + aead_request_set_tfm(req, irq_fpu_usable() ? + cryptd_aead_child(cryptd_tfm) : + &cryptd_tfm->base); return crypto_aead_decrypt(req); } #endif
@@ -1506,6 +1561,24 @@ static struct aead_alg aesni_aead_algs[] = { { .cra_blocksize = 1, .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_sync_ctx), .cra_module = THIS_MODULE, + }, +}, { + .init = rfc4106_init, + .exit = rfc4106_exit, + .setkey = rfc4106_set_key, + .setauthsize = rfc4106_set_authsize, + .encrypt = rfc5288_encrypt, + .decrypt = rfc5288_decrypt, + .ivsize = 8, + .maxauthsize = 16, + .base = { + .cra_name = "rfc5288(gcm(aes))", + .cra_driver_name = "rfc5288-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cryptd_aead *), + .cra_module = THIS_MODULE, }, } }; #else
--
2.7.4