feature: update processing kernel residency
- save position of kernel internal container when allocation can change - reuse the same position when new allocation arrives - add index container for additional allocation of image argument - save position of additional allocation of image argument - reuse position when for new image argument Related-To: NEO-11719 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
d103f9f0f6
commit
0628d97ee1
|
@ -820,7 +820,16 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||||
argumentsResidencyContainer[argIndex] = image->getAllocation();
|
argumentsResidencyContainer[argIndex] = image->getAllocation();
|
||||||
|
|
||||||
if (image->getImplicitArgsAllocation()) {
|
if (image->getImplicitArgsAllocation()) {
|
||||||
this->argumentsResidencyContainer.push_back(image->getImplicitArgsAllocation());
|
if (implicitArgsResidencyContainerIndices[argIndex] == std::numeric_limits<size_t>::max()) {
|
||||||
|
implicitArgsResidencyContainerIndices[argIndex] = argumentsResidencyContainer.size();
|
||||||
|
argumentsResidencyContainer.push_back(image->getImplicitArgsAllocation());
|
||||||
|
} else {
|
||||||
|
argumentsResidencyContainer[implicitArgsResidencyContainerIndices[argIndex]] = image->getImplicitArgsAllocation();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (implicitArgsResidencyContainerIndices[argIndex] != std::numeric_limits<size_t>::max()) {
|
||||||
|
argumentsResidencyContainer[implicitArgsResidencyContainerIndices[argIndex]] = nullptr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto imageInfo = image->getImageInfo();
|
auto imageInfo = image->getImageInfo();
|
||||||
|
@ -1085,6 +1094,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
argumentsResidencyContainer.resize(this->kernelArgHandlers.size(), nullptr);
|
argumentsResidencyContainer.resize(this->kernelArgHandlers.size(), nullptr);
|
||||||
|
implicitArgsResidencyContainerIndices.resize(this->kernelArgHandlers.size(), std::numeric_limits<size_t>::max());
|
||||||
|
|
||||||
auto &kernelAttributes = kernelDescriptor.kernelAttributes;
|
auto &kernelAttributes = kernelDescriptor.kernelAttributes;
|
||||||
if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) {
|
if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U) && (false == module->shouldAllocatePrivateMemoryPerDispatch())) {
|
||||||
|
@ -1179,14 +1189,24 @@ bool KernelImp::usesRegionGroupBarrier() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
|
void KernelImp::patchSyncBuffer(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
|
||||||
this->internalResidencyContainer.push_back(gfxAllocation);
|
if (syncBufferIndex == std::numeric_limits<size_t>::max()) {
|
||||||
|
syncBufferIndex = this->internalResidencyContainer.size();
|
||||||
|
this->internalResidencyContainer.push_back(gfxAllocation);
|
||||||
|
} else {
|
||||||
|
this->internalResidencyContainer[syncBufferIndex] = gfxAllocation;
|
||||||
|
}
|
||||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
||||||
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.syncBufferAddress,
|
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.syncBufferAddress,
|
||||||
static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)));
|
static_cast<uintptr_t>(ptrOffset(gfxAllocation->getGpuAddressToPatch(), bufferOffset)));
|
||||||
}
|
}
|
||||||
|
|
||||||
void KernelImp::patchRegionGroupBarrier(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
|
void KernelImp::patchRegionGroupBarrier(NEO::GraphicsAllocation *gfxAllocation, size_t bufferOffset) {
|
||||||
this->internalResidencyContainer.push_back(gfxAllocation);
|
if (regionGroupBarrierIndex == std::numeric_limits<size_t>::max()) {
|
||||||
|
regionGroupBarrierIndex = this->internalResidencyContainer.size();
|
||||||
|
this->internalResidencyContainer.push_back(gfxAllocation);
|
||||||
|
} else {
|
||||||
|
this->internalResidencyContainer[regionGroupBarrierIndex] = gfxAllocation;
|
||||||
|
}
|
||||||
|
|
||||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
||||||
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.regionGroupBarrierBuffer,
|
this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.regionGroupBarrierBuffer,
|
||||||
|
|
|
@ -215,10 +215,13 @@ struct KernelImp : Kernel {
|
||||||
std::vector<KernelArgInfo> kernelArgInfos;
|
std::vector<KernelArgInfo> kernelArgInfos;
|
||||||
std::vector<KernelImp::KernelArgHandler> kernelArgHandlers;
|
std::vector<KernelImp::KernelArgHandler> kernelArgHandlers;
|
||||||
std::vector<NEO::GraphicsAllocation *> argumentsResidencyContainer;
|
std::vector<NEO::GraphicsAllocation *> argumentsResidencyContainer;
|
||||||
|
std::vector<size_t> implicitArgsResidencyContainerIndices;
|
||||||
std::vector<NEO::GraphicsAllocation *> internalResidencyContainer;
|
std::vector<NEO::GraphicsAllocation *> internalResidencyContainer;
|
||||||
|
|
||||||
std::mutex *devicePrintfKernelMutex = nullptr;
|
std::mutex *devicePrintfKernelMutex = nullptr;
|
||||||
NEO::GraphicsAllocation *printfBuffer = nullptr;
|
NEO::GraphicsAllocation *printfBuffer = nullptr;
|
||||||
|
size_t syncBufferIndex = std::numeric_limits<size_t>::max();
|
||||||
|
size_t regionGroupBarrierIndex = std::numeric_limits<size_t>::max();
|
||||||
|
|
||||||
uint32_t groupSize[3] = {0u, 0u, 0u};
|
uint32_t groupSize[3] = {0u, 0u, 0u};
|
||||||
uint32_t numThreadsPerThreadGroup = 1u;
|
uint32_t numThreadsPerThreadGroup = 1u;
|
||||||
|
|
|
@ -45,6 +45,7 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||||
using ::L0::KernelImp::dynamicStateHeapData;
|
using ::L0::KernelImp::dynamicStateHeapData;
|
||||||
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
using ::L0::KernelImp::dynamicStateHeapDataSize;
|
||||||
using ::L0::KernelImp::groupSize;
|
using ::L0::KernelImp::groupSize;
|
||||||
|
using ::L0::KernelImp::implicitArgsResidencyContainerIndices;
|
||||||
using ::L0::KernelImp::internalResidencyContainer;
|
using ::L0::KernelImp::internalResidencyContainer;
|
||||||
using ::L0::KernelImp::isBindlessOffsetSet;
|
using ::L0::KernelImp::isBindlessOffsetSet;
|
||||||
using ::L0::KernelImp::kernelHasIndirectAccess;
|
using ::L0::KernelImp::kernelHasIndirectAccess;
|
||||||
|
@ -61,12 +62,14 @@ struct WhiteBox<::L0::KernelImp> : public ::L0::KernelImp {
|
||||||
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
|
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
|
||||||
using ::L0::KernelImp::pImplicitArgs;
|
using ::L0::KernelImp::pImplicitArgs;
|
||||||
using ::L0::KernelImp::printfBuffer;
|
using ::L0::KernelImp::printfBuffer;
|
||||||
|
using ::L0::KernelImp::regionGroupBarrierIndex;
|
||||||
using ::L0::KernelImp::requiredWorkgroupOrder;
|
using ::L0::KernelImp::requiredWorkgroupOrder;
|
||||||
using ::L0::KernelImp::setAssertBuffer;
|
using ::L0::KernelImp::setAssertBuffer;
|
||||||
using ::L0::KernelImp::slmArgsTotalSize;
|
using ::L0::KernelImp::slmArgsTotalSize;
|
||||||
using ::L0::KernelImp::suggestGroupSizeCache;
|
using ::L0::KernelImp::suggestGroupSizeCache;
|
||||||
using ::L0::KernelImp::surfaceStateHeapData;
|
using ::L0::KernelImp::surfaceStateHeapData;
|
||||||
using ::L0::KernelImp::surfaceStateHeapDataSize;
|
using ::L0::KernelImp::surfaceStateHeapDataSize;
|
||||||
|
using ::L0::KernelImp::syncBufferIndex;
|
||||||
using ::L0::KernelImp::unifiedMemoryControls;
|
using ::L0::KernelImp::unifiedMemoryControls;
|
||||||
using ::L0::KernelImp::usingSurfaceStateHeap;
|
using ::L0::KernelImp::usingSurfaceStateHeap;
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||||
#include "shared/test/common/mocks/mock_device.h"
|
#include "shared/test/common/mocks/mock_device.h"
|
||||||
|
#include "shared/test/common/mocks/mock_sync_buffer_handler.h"
|
||||||
#include "shared/test/common/test_macros/hw_test.h"
|
#include "shared/test/common/test_macros/hw_test.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/event/event.h"
|
#include "level_zero/core/source/event/event.h"
|
||||||
|
@ -435,6 +436,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
||||||
Mock<::L0::KernelImp> kernel;
|
Mock<::L0::KernelImp> kernel;
|
||||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||||
kernel.module = pMockModule.get();
|
kernel.module = pMockModule.get();
|
||||||
|
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel.syncBufferIndex);
|
||||||
|
|
||||||
kernel.setGroupSize(4, 1, 1);
|
kernel.setGroupSize(4, 1, 1);
|
||||||
ze_group_count_t groupCount{8, 1, 1};
|
ze_group_count_t groupCount{8, 1, 1};
|
||||||
|
@ -458,6 +460,15 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
||||||
auto result = pCommandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, cooperativeParams, false);
|
auto result = pCommandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, cooperativeParams, false);
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
auto mockSyncBufferHandler = reinterpret_cast<MockSyncBufferHandler *>(device->getNEODevice()->syncBufferHandler.get());
|
||||||
|
auto syncBufferAllocation = mockSyncBufferHandler->graphicsAllocation;
|
||||||
|
|
||||||
|
EXPECT_NE(std::numeric_limits<size_t>::max(), kernel.syncBufferIndex);
|
||||||
|
auto syncBufferAllocationIt = std::find(kernel.internalResidencyContainer.begin(), kernel.internalResidencyContainer.end(), syncBufferAllocation);
|
||||||
|
ASSERT_NE(kernel.internalResidencyContainer.end(), syncBufferAllocationIt);
|
||||||
|
auto expectedIndex = static_cast<size_t>(std::distance(kernel.internalResidencyContainer.begin(), syncBufferAllocationIt));
|
||||||
|
EXPECT_EQ(expectedIndex, kernel.syncBufferIndex);
|
||||||
|
|
||||||
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
pCommandList->initialize(device, engineGroupType, 0u);
|
pCommandList->initialize(device, engineGroupType, 0u);
|
||||||
CmdListKernelLaunchParams launchParams = {};
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
|
@ -465,6 +476,14 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
|
||||||
result = pCommandList->appendLaunchKernelWithParams(&kernel, groupCount, nullptr, launchParams);
|
result = pCommandList->appendLaunchKernelWithParams(&kernel, groupCount, nullptr, launchParams);
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
|
||||||
|
// sync buffer index once set should not change
|
||||||
|
EXPECT_EQ(expectedIndex, kernel.syncBufferIndex);
|
||||||
|
syncBufferAllocationIt = std::find(kernel.internalResidencyContainer.begin(), kernel.internalResidencyContainer.end(), syncBufferAllocation);
|
||||||
|
ASSERT_NE(kernel.internalResidencyContainer.end(), syncBufferAllocationIt);
|
||||||
|
// verify syncBufferAllocation is added only once
|
||||||
|
auto notFoundIt = std::find(syncBufferAllocationIt + 1, kernel.internalResidencyContainer.end(), syncBufferAllocation);
|
||||||
|
EXPECT_EQ(kernel.internalResidencyContainer.end(), notFoundIt);
|
||||||
|
|
||||||
{
|
{
|
||||||
VariableBackup<std::array<bool, 4>> usesSyncBuffer{&kernelAttributes.flags.packed};
|
VariableBackup<std::array<bool, 4>> usesSyncBuffer{&kernelAttributes.flags.packed};
|
||||||
usesSyncBuffer = {};
|
usesSyncBuffer = {};
|
||||||
|
@ -498,6 +517,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||||
Mock<::L0::KernelImp> kernel;
|
Mock<::L0::KernelImp> kernel;
|
||||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||||
kernel.module = pMockModule.get();
|
kernel.module = pMockModule.get();
|
||||||
|
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel.regionGroupBarrierIndex);
|
||||||
|
|
||||||
kernel.crossThreadData = std::make_unique<uint8_t[]>(64);
|
kernel.crossThreadData = std::make_unique<uint8_t[]>(64);
|
||||||
kernel.crossThreadDataSize = 64;
|
kernel.crossThreadDataSize = 64;
|
||||||
|
@ -528,9 +548,23 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
||||||
return element.first->getGpuAddressToPatch() == patchPtr;
|
return element.first->getGpuAddressToPatch() == patchPtr;
|
||||||
});
|
});
|
||||||
ASSERT_NE(ultCsr->makeResidentAllocations.end(), allocIter);
|
ASSERT_NE(ultCsr->makeResidentAllocations.end(), allocIter);
|
||||||
|
auto regionGroupBarrierAllocation = allocIter->first;
|
||||||
|
|
||||||
|
auto regionGroupBarrierAllocIt = std::find(kernel.internalResidencyContainer.begin(), kernel.internalResidencyContainer.end(), regionGroupBarrierAllocation);
|
||||||
|
ASSERT_NE(kernel.internalResidencyContainer.end(), regionGroupBarrierAllocIt);
|
||||||
|
auto expectedIndex = static_cast<size_t>(std::distance(kernel.internalResidencyContainer.begin(), regionGroupBarrierAllocIt));
|
||||||
|
EXPECT_EQ(expectedIndex, kernel.regionGroupBarrierIndex);
|
||||||
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false));
|
EXPECT_EQ(ZE_RESULT_SUCCESS, cmdList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false));
|
||||||
|
|
||||||
|
// region group barrier index once set should not change
|
||||||
|
EXPECT_EQ(expectedIndex, kernel.regionGroupBarrierIndex);
|
||||||
|
regionGroupBarrierAllocIt = std::find(kernel.internalResidencyContainer.begin(), kernel.internalResidencyContainer.end(), regionGroupBarrierAllocation);
|
||||||
|
ASSERT_NE(kernel.internalResidencyContainer.end(), regionGroupBarrierAllocIt);
|
||||||
|
// verify regionGroupBarrierAllocation is added only once
|
||||||
|
auto notFoundIt = std::find(regionGroupBarrierAllocIt + 1, kernel.internalResidencyContainer.end(), regionGroupBarrierAllocation);
|
||||||
|
EXPECT_EQ(kernel.internalResidencyContainer.end(), notFoundIt);
|
||||||
|
|
||||||
auto patchPtr2 = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless));
|
auto patchPtr2 = *reinterpret_cast<uint64_t *>(ptrOffset(kernel.crossThreadData.get(), regionGroupBarrier.stateless));
|
||||||
|
|
||||||
size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ;
|
size_t requestedNumberOfWorkgroups = groupCount.groupCountX * groupCount.groupCountY * groupCount.groupCountZ;
|
||||||
|
|
|
@ -675,6 +675,115 @@ TEST_F(SetKernelArg, givenDisableSystemPointerKernelArgumentIsEnabledWhenBufferA
|
||||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
|
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(SetKernelArg, givenBindlessImageAndKernelFromNativeWhenSetArgImageCalledThenResidencyContainerHasSingleImplicitArgAllocation, ImageSupport) {
|
||||||
|
auto neoDevice = device->getNEODevice();
|
||||||
|
if (!neoDevice->getRootDeviceEnvironment().getReleaseHelper() ||
|
||||||
|
!neoDevice->getDeviceInfo().imageSupport) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr uint32_t imageArgIndex = 3;
|
||||||
|
createKernel();
|
||||||
|
|
||||||
|
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[imageArgIndex].as<NEO::ArgDescImage>());
|
||||||
|
imageArg.metadataPayload.imgWidth = 0x1c;
|
||||||
|
imageArg.metadataPayload.imgHeight = 0x18;
|
||||||
|
imageArg.metadataPayload.imgDepth = 0x14;
|
||||||
|
|
||||||
|
imageArg.metadataPayload.arraySize = 0x10;
|
||||||
|
imageArg.metadataPayload.numSamples = 0xc;
|
||||||
|
imageArg.metadataPayload.channelDataType = 0x8;
|
||||||
|
imageArg.metadataPayload.channelOrder = 0x4;
|
||||||
|
imageArg.metadataPayload.numMipLevels = 0x0;
|
||||||
|
|
||||||
|
imageArg.metadataPayload.flatWidth = 0x30;
|
||||||
|
imageArg.metadataPayload.flatHeight = 0x2c;
|
||||||
|
imageArg.metadataPayload.flatPitch = 0x28;
|
||||||
|
imageArg.metadataPayload.flatBaseOffset = 0x20;
|
||||||
|
|
||||||
|
ze_image_desc_t desc = {};
|
||||||
|
|
||||||
|
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||||
|
desc.type = ZE_IMAGE_TYPE_3D;
|
||||||
|
desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2;
|
||||||
|
desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT;
|
||||||
|
desc.width = 11;
|
||||||
|
desc.height = 13;
|
||||||
|
desc.depth = 17;
|
||||||
|
|
||||||
|
desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A;
|
||||||
|
desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0;
|
||||||
|
desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1;
|
||||||
|
desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X;
|
||||||
|
|
||||||
|
auto imageBasic = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||||
|
auto ret = imageBasic->initialize(device, &desc);
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||||
|
auto imageBasicHandle = imageBasic->toHandle();
|
||||||
|
|
||||||
|
auto bindlessHelper = new MockBindlesHeapsHelper(neoDevice,
|
||||||
|
neoDevice->getNumGenericSubDevices() > 1);
|
||||||
|
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHelper);
|
||||||
|
|
||||||
|
ze_image_bindless_exp_desc_t bindlessExtDesc = {};
|
||||||
|
bindlessExtDesc.stype = ZE_STRUCTURE_TYPE_BINDLESS_IMAGE_EXP_DESC;
|
||||||
|
bindlessExtDesc.pNext = nullptr;
|
||||||
|
bindlessExtDesc.flags = ZE_IMAGE_BINDLESS_EXP_FLAG_BINDLESS;
|
||||||
|
|
||||||
|
desc = {};
|
||||||
|
desc.pNext = &bindlessExtDesc;
|
||||||
|
|
||||||
|
desc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||||
|
desc.type = ZE_IMAGE_TYPE_3D;
|
||||||
|
desc.format.layout = ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8;
|
||||||
|
desc.format.type = ZE_IMAGE_FORMAT_TYPE_UINT;
|
||||||
|
desc.width = 11;
|
||||||
|
desc.height = 13;
|
||||||
|
desc.depth = 17;
|
||||||
|
|
||||||
|
desc.format.x = ZE_IMAGE_FORMAT_SWIZZLE_A;
|
||||||
|
desc.format.y = ZE_IMAGE_FORMAT_SWIZZLE_0;
|
||||||
|
desc.format.z = ZE_IMAGE_FORMAT_SWIZZLE_1;
|
||||||
|
desc.format.w = ZE_IMAGE_FORMAT_SWIZZLE_X;
|
||||||
|
|
||||||
|
auto imageBindless1 = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||||
|
ret = imageBindless1->initialize(device, &desc);
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||||
|
|
||||||
|
auto imgImplicitArgsAlloc1 = imageBindless1->getImplicitArgsAllocation();
|
||||||
|
auto imageBindlessHandle1 = imageBindless1->toHandle();
|
||||||
|
|
||||||
|
auto imageBindless2 = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||||
|
ret = imageBindless2->initialize(device, &desc);
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||||
|
|
||||||
|
auto imgImplicitArgsAlloc2 = imageBindless2->getImplicitArgsAllocation();
|
||||||
|
auto imageBindlessHandle2 = imageBindless2->toHandle();
|
||||||
|
|
||||||
|
EXPECT_EQ(std::numeric_limits<size_t>::max(), kernel->implicitArgsResidencyContainerIndices[imageArgIndex]);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->setArgImage(imageArgIndex, sizeof(imageBindless1.get()), &imageBindlessHandle1));
|
||||||
|
|
||||||
|
auto implicitArgIt = std::find(kernel->argumentsResidencyContainer.begin(), kernel->argumentsResidencyContainer.end(), imgImplicitArgsAlloc1);
|
||||||
|
ASSERT_NE(kernel->argumentsResidencyContainer.end(), implicitArgIt);
|
||||||
|
auto expectedDistance = static_cast<size_t>(std::distance(kernel->argumentsResidencyContainer.begin(), implicitArgIt));
|
||||||
|
EXPECT_EQ(expectedDistance, kernel->implicitArgsResidencyContainerIndices[imageArgIndex]);
|
||||||
|
EXPECT_EQ(imgImplicitArgsAlloc1, kernel->argumentsResidencyContainer[kernel->implicitArgsResidencyContainerIndices[imageArgIndex]]);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->setArgImage(imageArgIndex, sizeof(imageBindless2.get()), &imageBindlessHandle2));
|
||||||
|
|
||||||
|
implicitArgIt = std::find(kernel->argumentsResidencyContainer.begin(), kernel->argumentsResidencyContainer.end(), imgImplicitArgsAlloc2);
|
||||||
|
ASSERT_NE(kernel->argumentsResidencyContainer.end(), implicitArgIt);
|
||||||
|
auto expectedDistance2 = static_cast<size_t>(std::distance(kernel->argumentsResidencyContainer.begin(), implicitArgIt));
|
||||||
|
EXPECT_EQ(expectedDistance2, kernel->implicitArgsResidencyContainerIndices[imageArgIndex]);
|
||||||
|
EXPECT_EQ(expectedDistance, expectedDistance2);
|
||||||
|
EXPECT_EQ(imgImplicitArgsAlloc2, kernel->argumentsResidencyContainer[kernel->implicitArgsResidencyContainerIndices[imageArgIndex]]);
|
||||||
|
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, kernel->setArgImage(imageArgIndex, sizeof(imageBasic.get()), &imageBasicHandle));
|
||||||
|
|
||||||
|
EXPECT_EQ(nullptr, kernel->argumentsResidencyContainer[kernel->implicitArgsResidencyContainerIndices[imageArgIndex]]);
|
||||||
|
}
|
||||||
|
|
||||||
using KernelImmutableDataTests = Test<ModuleImmutableDataFixture>;
|
using KernelImmutableDataTests = Test<ModuleImmutableDataFixture>;
|
||||||
|
|
||||||
TEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
|
TEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "shared/test/common/mocks/mock_csr.h"
|
#include "shared/test/common/mocks/mock_csr.h"
|
||||||
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
|
#include "shared/test/common/mocks/mock_internal_allocation_storage.h"
|
||||||
#include "shared/test/common/mocks/mock_os_context.h"
|
#include "shared/test/common/mocks/mock_os_context.h"
|
||||||
|
#include "shared/test/common/mocks/mock_sync_buffer_handler.h"
|
||||||
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
#include "shared/test/common/mocks/mock_timestamp_container.h"
|
||||||
#include "shared/test/common/test_macros/hw_test.h"
|
#include "shared/test/common/test_macros/hw_test.h"
|
||||||
#include "shared/test/common/utilities/base_object_utils.h"
|
#include "shared/test/common/utilities/base_object_utils.h"
|
||||||
|
@ -687,10 +688,6 @@ HWTEST_F(EnqueueHandlerTest, givenKernelUsingSyncBufferWhenEnqueuingKernelThenSs
|
||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct MockSyncBufferHandler : SyncBufferHandler {
|
|
||||||
using SyncBufferHandler::graphicsAllocation;
|
|
||||||
};
|
|
||||||
|
|
||||||
pDevice->allocateSyncBufferHandler();
|
pDevice->allocateSyncBufferHandler();
|
||||||
|
|
||||||
size_t offset = 0;
|
size_t offset = 0;
|
||||||
|
|
|
@ -6,8 +6,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "shared/source/helpers/gfx_core_helper.h"
|
#include "shared/source/helpers/gfx_core_helper.h"
|
||||||
#include "shared/source/program/sync_buffer_handler.h"
|
|
||||||
#include "shared/source/release_helper/release_helper.h"
|
#include "shared/source/release_helper/release_helper.h"
|
||||||
|
#include "shared/test/common/mocks/mock_sync_buffer_handler.h"
|
||||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||||
#include "shared/test/common/test_macros/hw_test.h"
|
#include "shared/test/common/test_macros/hw_test.h"
|
||||||
|
|
||||||
|
@ -22,13 +22,6 @@
|
||||||
|
|
||||||
using namespace NEO;
|
using namespace NEO;
|
||||||
|
|
||||||
class MockSyncBufferHandler : public SyncBufferHandler {
|
|
||||||
public:
|
|
||||||
using SyncBufferHandler::bufferSize;
|
|
||||||
using SyncBufferHandler::graphicsAllocation;
|
|
||||||
using SyncBufferHandler::usedBufferSize;
|
|
||||||
};
|
|
||||||
|
|
||||||
class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
|
class SyncBufferEnqueueHandlerTest : public EnqueueHandlerTest {
|
||||||
public:
|
public:
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
|
|
|
@ -90,6 +90,7 @@ set(NEO_CORE_tests_mocks
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_sip.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_sip.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_submissions_aggregator.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_submissions_aggregator.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_svm_manager.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_svm_manager.h
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_sync_buffer_handler.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_tbx_csr.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_container.h
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_packet.h
|
${CMAKE_CURRENT_SOURCE_DIR}/mock_timestamp_packet.h
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2024 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "shared/source/program/sync_buffer_handler.h"
|
||||||
|
|
||||||
|
class MockSyncBufferHandler : public NEO::SyncBufferHandler {
|
||||||
|
public:
|
||||||
|
using SyncBufferHandler::bufferSize;
|
||||||
|
using SyncBufferHandler::graphicsAllocation;
|
||||||
|
using SyncBufferHandler::usedBufferSize;
|
||||||
|
};
|
Loading…
Reference in New Issue