feature: use heapless builtins in OCL if supported

Related-To: NEO-7621
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-02-07 14:43:44 +00:00
committed by Compute-Runtime-Automation
parent 6f69fa997a
commit 59f661356c
26 changed files with 609 additions and 101 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -146,6 +146,13 @@ struct AuxBuiltinsMatcher {
}
};
struct HeaplessSupportedMatcher {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
}
};
HWTEST2_F(BuiltInTests, GivenBuiltinTypeBinaryWhenGettingAuxTranslationBuiltinThenResourceSizeIsNonZero, MatchAny) {
auto mockBuiltinsLib = std::unique_ptr<MockBuiltinsLib>(new MockBuiltinsLib());
@@ -2413,3 +2420,184 @@ HWTEST_F(BuiltInOwnershipWrapperTests, givenBuiltInOwnershipWrapperWhenAskedForT
EXPECT_FALSE(std::is_copy_constructible<BuiltInOwnershipWrapper>::value);
EXPECT_FALSE(std::is_copy_assignable<BuiltInOwnershipWrapper>::value);
}
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
BuiltinOpParams builtinOpsParams;
builtinOpsParams.srcMemObj = &srcBuffer;
builtinOpsParams.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
builtinOpsParams.dstMemObj = &dstBuffer;
builtinOpsParams.dstOffset = {0, 0, 0};
builtinOpsParams.size = {static_cast<size_t>(size), 0, 0};
MultiDispatchInfo multiDispatchInfo(builtinOpsParams);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), builtinOpsParams));
}
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToSystemBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {static_cast<size_t>(size), 1, 1};
dc.srcRowPitch = static_cast<size_t>(size);
dc.srcSlicePitch = 0;
dc.dstRowPitch = static_cast<size_t>(size);
dc.dstSlicePitch = 0;
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}
HWTEST2_F(BuiltInTests, whenBuilderCopyBufferToLocalBufferRectStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.srcOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {static_cast<size_t>(size), 1, 1};
dc.srcRowPitch = static_cast<size_t>(size);
dc.srcSlicePitch = 0;
dc.dstRowPitch = static_cast<size_t>(size);
dc.dstSlicePitch = 0;
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}
HWTEST2_F(BuiltInTests, whenBuilderFillSystemBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.size = {static_cast<size_t>(size), 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_TRUE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}
HWTEST2_F(BuiltInTests, whenBuilderFillLocalBufferStatelessHeaplessIsUsedThenParamsAreCorrect, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless, *pClDevice);
uint64_t bigSize = 10ull * MemoryConstants::gigaByte;
uint64_t bigOffset = 4ull * MemoryConstants::gigaByte;
uint64_t size = 4ull * MemoryConstants::gigaByte;
MockBuffer srcBuffer;
srcBuffer.size = static_cast<size_t>(bigSize);
MockBuffer dstBuffer;
dstBuffer.size = static_cast<size_t>(bigSize);
srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::bufferHostMemory);
dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer);
BuiltinOpParams dc;
dc.srcMemObj = &srcBuffer;
dc.dstMemObj = &dstBuffer;
dc.dstOffset = {static_cast<size_t>(bigOffset), 0, 0};
dc.size = {static_cast<size_t>(size), 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo));
EXPECT_EQ(1u, multiDispatchInfo.size());
EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc));
for (auto &dispatchInfo : multiDispatchInfo) {
EXPECT_FALSE(dispatchInfo.getKernel()->getDestinationAllocationInSystemMemory());
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -19,6 +19,8 @@
#include "opencl/test/unit_test/indirect_heap/indirect_heap_fixture.h"
#include "opencl/test/unit_test/mocks/mock_cl_device.h"
#include "test_traits_common.h"
namespace NEO {
struct CommandDeviceFixture : public ClDeviceFixture,
@@ -147,4 +149,11 @@ struct CommandQueueStateful : public CommandQueueHw<FamilyType> {
bool expectedKernelSystemMemory = false;
};
struct HeaplessSupportedMatcher {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
}
};
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -252,6 +252,47 @@ HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessThenStatelessK
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
}
HWTEST2_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRectStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStatelessHeapless,
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer;
dc.dstMemObj = dstBuffer;
dc.srcOffset = {0, 0, 0};
dc.dstOffset = {0, 0, 0};
dc.size = {50, 50, 1};
dc.srcRowPitch = rowPitch;
dc.srcSlicePitch = slicePitch;
dc.dstRowPitch = rowPitch;
dc.dstSlicePitch = slicePitch;
MultiDispatchInfo multiDispatchInfo(dc);
builder.buildDispatchInfos(multiDispatchInfo);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
EXPECT_EQ(8u, scratchPointerAddress.offset);
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
}
HWTEST_F(EnqueueCopyBufferRectTest, WhenCopyingBufferRect2DThenL3ProgrammingIsCorrect) {
enqueueCopyBufferRect2D<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);

View File

@@ -263,6 +263,45 @@ HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessThenStatelessKernelIsU
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
}
HWTEST2_F(EnqueueCopyBufferTest, WhenCopyingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
auto srcBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
auto dstBuffer = std::unique_ptr<Buffer>(BufferHelper<>::create());
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferToBufferStatelessHeapless,
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
dc.srcMemObj = srcBuffer.get();
dc.dstMemObj = dstBuffer.get();
dc.srcOffset = {EnqueueCopyBufferTraits::srcOffset, 0, 0};
dc.dstOffset = {EnqueueCopyBufferTraits::dstOffset, 0, 0};
dc.size = {EnqueueCopyBufferTraits::size, 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
builder.buildDispatchInfos(multiDispatchInfo);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
EXPECT_EQ(8u, scratchPointerAddress.offset);
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
}
HWTEST_F(EnqueueCopyBufferTest, WhenCopyingBufferThenL3ProgrammingIsCorrect) {
enqueueCopyBufferAndParse<FamilyType>();
validateL3Programming<FamilyType>(cmdList, itorWalker);

View File

@@ -344,6 +344,49 @@ HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferThenArgumentTwoShouldMatchP
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
}
HWTEST2_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessHeaplessThenCorrectKernelIsUsed, HeaplessSupportedMatcher) {
if (is32bit) {
GTEST_SKIP();
}
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});
// Extract the kernel used
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::fillBufferStatelessHeapless,
pCmdQ->getClDevice());
ASSERT_NE(nullptr, &builder);
BuiltinOpParams dc;
MemObj patternMemObj(&this->context, 0, {}, 0, 0, alignUp(EnqueueFillBufferTraits::patternSize, 4), patternAllocation->getUnderlyingBuffer(),
patternAllocation->getUnderlyingBuffer(), GraphicsAllocationHelper::toMultiGraphicsAllocation(patternAllocation), false, false, true);
dc.srcMemObj = &patternMemObj;
dc.dstMemObj = buffer;
dc.dstOffset = {EnqueueFillBufferTraits::offset, 0, 0};
dc.size = {EnqueueFillBufferTraits::size, 0, 0};
MultiDispatchInfo multiDispatchInfo(dc);
builder.buildDispatchInfos(multiDispatchInfo);
EXPECT_NE(0u, multiDispatchInfo.size());
auto kernel = multiDispatchInfo.begin()->getKernel();
ASSERT_NE(nullptr, kernel);
auto &kernelDescriptor = kernel->getKernelInfo().kernelDescriptor;
EXPECT_TRUE(kernelDescriptor.kernelAttributes.supportsBuffersBiggerThan4Gb());
EXPECT_FALSE(kernel->getKernelInfo().getArgDescriptorAt(0).as<ArgDescPointer>().isPureStateful());
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
EXPECT_EQ(0u, indirectDataPointerAddress.offset);
EXPECT_EQ(8u, indirectDataPointerAddress.pointerSize);
EXPECT_EQ(8u, scratchPointerAddress.offset);
EXPECT_EQ(8u, scratchPointerAddress.pointerSize);
context.getMemoryManager()->freeGraphicsMemory(patternAllocation);
}
HWTEST_F(EnqueueFillBufferCmdTests, WhenFillingBufferStatelessThenStatelessKernelIsUsed) {
auto patternAllocation = context.getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{context.getDevice(0)->getRootDeviceIndex(), EnqueueFillBufferTraits::patternSize});