Files
llvm/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
Joseph Huber 8763812b4c [Offload] Remove check on kernel argument sizes (#162121)
Summary:
This check is unnecessarily restrictive and currently incorrectly fires
for any size less than eight bytes. Just remove it, we do sanity checks
elsewhere and at some point need to trust the ABI.
2025-10-06 12:49:44 -05:00

288 lines
8.6 KiB
C++

//===------- Offload API tests - olLaunchKernel --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "../common/Fixtures.hpp"
#include <OffloadAPI.h>
#include <gtest/gtest.h>
struct LaunchKernelTestBase : OffloadQueueTest {
void SetUpProgram(const char *program) {
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
ASSERT_TRUE(TestEnvironment::loadDeviceBinary(program, Device, DeviceBin));
ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
DeviceBin->getBufferSize(), &Program));
LaunchArgs.Dimensions = 1;
LaunchArgs.GroupSize = {64, 1, 1};
LaunchArgs.NumGroups = {1, 1, 1};
LaunchArgs.DynSharedMemory = 0;
}
void TearDown() override {
if (Program) {
olDestroyProgram(Program);
}
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::TearDown());
}
std::unique_ptr<llvm::MemoryBuffer> DeviceBin;
ol_program_handle_t Program = nullptr;
ol_kernel_launch_size_args_t LaunchArgs{};
};
struct LaunchSingleKernelTestBase : LaunchKernelTestBase {
void SetUpKernel(const char *kernel) {
RETURN_ON_FATAL_FAILURE(SetUpProgram(kernel));
ASSERT_SUCCESS(
olGetSymbol(Program, kernel, OL_SYMBOL_KIND_KERNEL, &Kernel));
}
ol_symbol_handle_t Kernel = nullptr;
};
#define KERNEL_TEST(NAME, KERNEL) \
struct olLaunchKernel##NAME##Test : LaunchSingleKernelTestBase { \
void SetUp() override { SetUpKernel(#KERNEL); } \
}; \
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test);
KERNEL_TEST(Foo, foo)
KERNEL_TEST(NoArgs, noargs)
KERNEL_TEST(Byte, byte)
KERNEL_TEST(LocalMem, localmem)
KERNEL_TEST(LocalMemReduction, localmem_reduction)
KERNEL_TEST(LocalMemStatic, localmem_static)
KERNEL_TEST(GlobalCtor, global_ctor)
KERNEL_TEST(GlobalDtor, global_dtor)
struct LaunchMultipleKernelTestBase : LaunchKernelTestBase {
void SetUpKernels(const char *program, std::vector<const char *> kernels) {
RETURN_ON_FATAL_FAILURE(SetUpProgram(program));
Kernels.resize(kernels.size());
size_t I = 0;
for (auto K : kernels)
ASSERT_SUCCESS(
olGetSymbol(Program, K, OL_SYMBOL_KIND_KERNEL, &Kernels[I++]));
}
std::vector<ol_symbol_handle_t> Kernels;
};
#define KERNEL_MULTI_TEST(NAME, PROGRAM, ...) \
struct olLaunchKernel##NAME##Test : LaunchMultipleKernelTestBase { \
void SetUp() override { SetUpKernels(#PROGRAM, {__VA_ARGS__}); } \
}; \
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernel##NAME##Test);
KERNEL_MULTI_TEST(Global, global, "write", "read")
TEST_P(olLaunchKernelFooTest, Success) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < 64; i++) {
ASSERT_EQ(Data[i], i);
}
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelFooTest, SuccessThreaded) {
threadify([&](size_t) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args),
&LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < 64; i++) {
ASSERT_EQ(Data[i], i);
}
ASSERT_SUCCESS(olMemFree(Mem));
});
}
TEST_P(olLaunchKernelNoArgsTest, Success) {
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
}
TEST_P(olLaunchKernelFooTest, SuccessSynchronous) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(olLaunchKernel(nullptr, Device, Kernel, &Args, sizeof(Args),
&LaunchArgs));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < 64; i++) {
ASSERT_EQ(Data[i], i);
}
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelLocalMemTest, Success) {
LaunchArgs.NumGroups.x = 4;
LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t);
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x *
sizeof(uint32_t),
&Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < LaunchArgs.GroupSize.x * LaunchArgs.NumGroups.x; i++)
ASSERT_EQ(Data[i], (i % 64) * 2);
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelLocalMemReductionTest, Success) {
LaunchArgs.NumGroups.x = 4;
LaunchArgs.DynSharedMemory = 64 * sizeof(uint32_t);
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelLocalMemStaticTest, Success) {
LaunchArgs.NumGroups.x = 4;
LaunchArgs.DynSharedMemory = 0;
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.NumGroups.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < LaunchArgs.NumGroups.x; i++)
ASSERT_EQ(Data[i], 2 * LaunchArgs.GroupSize.x);
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelGlobalTest, Success) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernels[0], nullptr, 0, &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernels[1], &Args, sizeof(Args),
&LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < 64; i++) {
ASSERT_EQ(Data[i], i * 2);
}
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelGlobalTest, InvalidNotAKernel) {
ol_symbol_handle_t Global = nullptr;
ASSERT_SUCCESS(
olGetSymbol(Program, "global", OL_SYMBOL_KIND_GLOBAL_VARIABLE, &Global));
ASSERT_ERROR(OL_ERRC_SYMBOL_KIND,
olLaunchKernel(Queue, Device, Global, nullptr, 0, &LaunchArgs));
}
TEST_P(olLaunchKernelGlobalCtorTest, Success) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
LaunchArgs.GroupSize.x * sizeof(uint32_t), &Mem));
struct {
void *Mem;
} Args{Mem};
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, &Args, sizeof(Args), &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
uint32_t *Data = (uint32_t *)Mem;
for (uint32_t i = 0; i < 64; i++) {
ASSERT_EQ(Data[i], i + 100);
}
ASSERT_SUCCESS(olMemFree(Mem));
}
TEST_P(olLaunchKernelGlobalDtorTest, Success) {
// TODO: We can't inspect the result of a destructor yet, once we
// find/implement a way, update this test. For now we just check that nothing
// crashes
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs));
ASSERT_SUCCESS(olSyncQueue(Queue));
}