[PATCH v3 1/4] LoongArch: bpf: Fix memory ordering for value-returning atomics
From: Chenguang Zhao <hidden>
Date: 2026-06-22 09:04:29
Also in:
linux-kselftest, loongarch
Subsystem:
bpf jit for loongarch, bpf [general] (safe dynamic programs and tools), loongarch, the rest · Maintainers:
Tiezhu Yang, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko, Eduard Zingerman, Kumar Kartikeya Dwivedi, Huacai Chen, Linus Torvalds
Per the Linux Kernel Memory Model, value-returning atomic RMW operations
must provide sequentially consistent ordering (a full memory barrier). On
LoongArch, plain AMO instructions and bare ll/sc loops do not satisfy this
requirement by themselves.
Update emit_atomic_rmw() to emit barrier-carrying instructions for all
value-returning BPF atomics:
- BPF_FETCH (ADD/AND/OR/XOR): use am*_db.{b,h,w,d}
- BPF_XCHG: use amswap_db.{b,h,w,d}
- BPF_CMPXCHG: emit dbar 0x700 after the ll/sc loop, matching
__WEAK_LLSC_MB in cmpxchg.h
Add the corresponding instruction encodings and emit helpers to inst.h.
Non-value-returning RMW ops (plain BPF_ADD, BPF_AND, etc.) are left as
weakly ordered, consistent with LKMM.
Signed-off-by: Chenguang Zhao <redacted>
---
arch/loongarch/include/asm/inst.h | 18 +++++++++++++++++
arch/loongarch/net/bpf_jit.c | 32 +++++++++++++++++--------------
2 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 76b723590023..bdbc17d07110 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h@@ -199,6 +199,10 @@ enum reg3_op { amswaph_op = 0x70b9, amaddb_op = 0x70ba, amaddh_op = 0x70bb, + amswapdbb_op = 0x70bc, + amswapdbh_op = 0x70bd, + amadddbb_op = 0x70be, + amadddbh_op = 0x70bf, amswapw_op = 0x70c0, amswapd_op = 0x70c1, amaddw_op = 0x70c2,
@@ -783,6 +787,20 @@ DEF_EMIT_REG3_FORMAT(amswapb, amswapb_op) DEF_EMIT_REG3_FORMAT(amswaph, amswaph_op) DEF_EMIT_REG3_FORMAT(amswapw, amswapw_op) DEF_EMIT_REG3_FORMAT(amswapd, amswapd_op) +DEF_EMIT_REG3_FORMAT(amswapdbb, amswapdbb_op) +DEF_EMIT_REG3_FORMAT(amswapdbh, amswapdbh_op) +DEF_EMIT_REG3_FORMAT(amadddbb, amadddbb_op) +DEF_EMIT_REG3_FORMAT(amadddbh, amadddbh_op) +DEF_EMIT_REG3_FORMAT(amadddbw, amadddbw_op) +DEF_EMIT_REG3_FORMAT(amadddbd, amadddbd_op) +DEF_EMIT_REG3_FORMAT(amanddbw, amanddbw_op) +DEF_EMIT_REG3_FORMAT(amanddbd, amanddbd_op) +DEF_EMIT_REG3_FORMAT(amordbw, amordbw_op) +DEF_EMIT_REG3_FORMAT(amordbd, amordbd_op) +DEF_EMIT_REG3_FORMAT(amxordbw, amxordbw_op) +DEF_EMIT_REG3_FORMAT(amxordbd, amxordbd_op) +DEF_EMIT_REG3_FORMAT(amswapdbw, amswapdbw_op) +DEF_EMIT_REG3_FORMAT(amswapdbd, amswapdbd_op) #define DEF_EMIT_REG3SA2_FORMAT(NAME, OP) \ static inline void emit_##NAME(union loongarch_instruction *insn, \
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index 24913dc7f4e8..47707579e61c 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c@@ -7,6 +7,9 @@ #include <linux/memory.h> #include "bpf_jit.h" +/* dbar hint for ll/sc completion ordering, see __WEAK_LLSC_MB */ +#define DBAR_LLSC_MB 0x700 + #define LOONGARCH_MAX_REG_ARGS 8 #define LOONGARCH_LONG_JUMP_NINSNS 5
@@ -418,7 +421,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amadd.b instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amaddb, src, t1, t3); + emit_insn(ctx, amadddbb, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_H:
@@ -426,39 +429,39 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amadd.h instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amaddh, src, t1, t3); + emit_insn(ctx, amadddbh, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_W: - emit_insn(ctx, amaddw, src, t1, t3); + emit_insn(ctx, amadddbw, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_DW: - emit_insn(ctx, amaddd, src, t1, t3); + emit_insn(ctx, amadddbd, src, t1, t3); break; } break; case BPF_AND | BPF_FETCH: if (isdw) { - emit_insn(ctx, amandd, src, t1, t3); + emit_insn(ctx, amanddbd, src, t1, t3); } else { - emit_insn(ctx, amandw, src, t1, t3); + emit_insn(ctx, amanddbw, src, t1, t3); emit_zext_32(ctx, src, true); } break; case BPF_OR | BPF_FETCH: if (isdw) { - emit_insn(ctx, amord, src, t1, t3); + emit_insn(ctx, amordbd, src, t1, t3); } else { - emit_insn(ctx, amorw, src, t1, t3); + emit_insn(ctx, amordbw, src, t1, t3); emit_zext_32(ctx, src, true); } break; case BPF_XOR | BPF_FETCH: if (isdw) { - emit_insn(ctx, amxord, src, t1, t3); + emit_insn(ctx, amxordbd, src, t1, t3); } else { - emit_insn(ctx, amxorw, src, t1, t3); + emit_insn(ctx, amxordbw, src, t1, t3); emit_zext_32(ctx, src, true); } break;
@@ -470,7 +473,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amswap.b instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amswapb, src, t1, t3); + emit_insn(ctx, amswapdbb, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_H:
@@ -478,15 +481,15 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) pr_err_once("bpf-jit: amswap.h instruction is not supported\n"); return -EINVAL; } - emit_insn(ctx, amswaph, src, t1, t3); + emit_insn(ctx, amswapdbh, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_W: - emit_insn(ctx, amswapw, src, t1, t3); + emit_insn(ctx, amswapdbw, src, t1, t3); emit_zext_32(ctx, src, true); break; case BPF_DW: - emit_insn(ctx, amswapd, src, t1, t3); + emit_insn(ctx, amswapdbd, src, t1, t3); break; } break;
@@ -509,6 +512,7 @@ static int emit_atomic_rmw(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6); emit_zext_32(ctx, r0, true); } + emit_insn(ctx, dbar, DBAR_LLSC_MB); break; default: pr_err_once("bpf-jit: invalid atomic read-modify-write opcode %02x\n", imm);
--
2.25.1