From 44234214fcd2e806cdc4531292a1fd52423f1406 Mon Sep 17 00:00:00 2001 From: Ash Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Mon, 10 Feb 2025 23:17:47 +0000 Subject: [PATCH] Fix: Invoke `f16f32` in WGMMA --- less_slow.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/less_slow.cu b/less_slow.cu index bfa845c..6073fb7 100644 --- a/less_slow.cu +++ b/less_slow.cu @@ -603,7 +603,7 @@ __global__ void tops_f16f32_sm90wgmma_64x256x16_loop128_cuda_kernel() { std::uint64_t b_descriptor = wgmma_descriptor((std::uint64_t)b_shared, 128 * 256 / 8, 128, 0, 0); wgmma_fence(); for (int i = 0; i != 128; ++i) { - wgmma_bf16f32_64x256x16(c_registers, a_descriptor, b_descriptor); + wgmma_f16f32_64x256x16(c_registers, a_descriptor, b_descriptor); wgmma_commit_group(); } wgmma_sync_group();