mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 13:33:02 +08:00
Patch bindless offset
Related-To: NEO-4724 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
223de6dd8c
commit
c80353587f
@@ -8,6 +8,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
|
||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||
@@ -42,8 +44,12 @@ struct KernelHw : public KernelImp {
|
||||
DEBUG_BREAK_IF(baseAddress != (baseAddress & sshAlignmentMask));
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
auto surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), argInfo.bindful);
|
||||
void *surfaceStateAddress = nullptr;
|
||||
if (NEO::isValidOffset(argInfo.bindless)) {
|
||||
surfaceStateAddress = patchBindlessSurfaceState(alloc, argInfo.bindless);
|
||||
} else {
|
||||
surfaceStateAddress = ptrOffset(surfaceStateHeapData.get(), argInfo.bindful);
|
||||
}
|
||||
uint64_t bufferAddressForSsh = baseAddress;
|
||||
auto alignment = NEO::EncodeSurfaceState<GfxFamily>::getSurfaceBaseAddressAlignment();
|
||||
size_t bufferSizeForSsh = ptrDiff(alloc->getGpuAddress(), bufferAddressForSsh);
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
@@ -464,7 +465,7 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
|
||||
const auto val = argVal;
|
||||
|
||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize), arg, val);
|
||||
if (NEO::isValidOffset(arg.bindful)) {
|
||||
if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) {
|
||||
setBufferSurfaceState(argIndex, reinterpret_cast<void *>(val), allocation);
|
||||
}
|
||||
residencyContainer[argIndex] = allocation;
|
||||
@@ -525,7 +526,11 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||
}
|
||||
|
||||
const auto image = Image::fromHandle(*static_cast<const ze_image_handle_t *>(argVal));
|
||||
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
|
||||
if (kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode == NEO::KernelDescriptor::Bindless) {
|
||||
image->copySurfaceStateToSSH(patchBindlessSurfaceState(image->getAllocation(), arg.bindless), 0u);
|
||||
} else {
|
||||
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
|
||||
}
|
||||
residencyContainer[argIndex] = image->getAllocation();
|
||||
|
||||
auto imageInfo = image->getImageInfo();
|
||||
@@ -707,7 +712,17 @@ void KernelImp::setDebugSurface() {
|
||||
*device->getNEODevice());
|
||||
}
|
||||
}
|
||||
|
||||
void *KernelImp::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless) {
|
||||
auto &hwHelper = NEO::HwHelper::get(this->module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
|
||||
auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize();
|
||||
NEO::BindlessHeapsHelper *bindlessHeapsHelper = this->module->getDevice()->getNEODevice()->getBindlessHeapsHelper();
|
||||
auto ssInHeap = bindlessHeapsHelper->allocateSSInHeap(surfaceStateSize, alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH);
|
||||
this->residencyContainer.push_back(ssInHeap.heapAllocation);
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), bindless);
|
||||
auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeap.surfaceStateOffset));
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
return ssInHeap.ssPtr;
|
||||
}
|
||||
void KernelImp::patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z) {
|
||||
const NEO::KernelDescriptor &desc = kernelImmData->getDescriptor();
|
||||
auto dst = ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize);
|
||||
|
||||
@@ -112,6 +112,7 @@ struct KernelImp : Kernel {
|
||||
void createPrintfBuffer();
|
||||
void setDebugSurface();
|
||||
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
|
||||
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
|
||||
|
||||
const KernelImmutableData *kernelImmData = nullptr;
|
||||
Module *module = nullptr;
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
|
||||
|
||||
#include "level_zero/core/source/kernel/kernel_hw.h"
|
||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/mock.h"
|
||||
#include "level_zero/core/test/unit_tests/white_box.h"
|
||||
@@ -44,6 +45,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using ::L0::KernelImp::module;
|
||||
using ::L0::KernelImp::numThreadsPerThreadGroup;
|
||||
using ::L0::KernelImp::patchBindlessSurfaceState;
|
||||
using ::L0::KernelImp::perThreadDataForWholeThreadGroup;
|
||||
using ::L0::KernelImp::perThreadDataSize;
|
||||
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
|
||||
@@ -61,6 +63,34 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
|
||||
|
||||
WhiteBox() : ::L0::KernelImp(nullptr) {}
|
||||
};
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct WhiteBoxKernelHw : public KernelHw<gfxCoreFamily> {
|
||||
using BaseClass = KernelHw<gfxCoreFamily>;
|
||||
using BaseClass::BaseClass;
|
||||
using ::L0::KernelImp::createPrintfBuffer;
|
||||
using ::L0::KernelImp::crossThreadData;
|
||||
using ::L0::KernelImp::crossThreadDataSize;
|
||||
using ::L0::KernelImp::groupSize;
|
||||
using ::L0::KernelImp::kernelImmData;
|
||||
using ::L0::KernelImp::kernelRequiresGenerationOfLocalIdsByRuntime;
|
||||
using ::L0::KernelImp::module;
|
||||
using ::L0::KernelImp::numThreadsPerThreadGroup;
|
||||
using ::L0::KernelImp::patchBindlessSurfaceState;
|
||||
using ::L0::KernelImp::perThreadDataForWholeThreadGroup;
|
||||
using ::L0::KernelImp::perThreadDataSize;
|
||||
using ::L0::KernelImp::perThreadDataSizeForWholeThreadGroup;
|
||||
using ::L0::KernelImp::printfBuffer;
|
||||
using ::L0::KernelImp::requiredWorkgroupOrder;
|
||||
using ::L0::KernelImp::residencyContainer;
|
||||
using ::L0::KernelImp::surfaceStateHeapData;
|
||||
using ::L0::KernelImp::unifiedMemoryControls;
|
||||
|
||||
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {}
|
||||
|
||||
std::unique_ptr<Kernel> clone() const override { return nullptr; }
|
||||
|
||||
WhiteBoxKernelHw() : ::L0::KernelHw<gfxCoreFamily>(nullptr) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct Mock<::L0::Kernel> : public WhiteBox<::L0::Kernel> {
|
||||
@@ -84,6 +114,7 @@ struct Mock<::L0::Kernel> : public WhiteBox<::L0::Kernel> {
|
||||
|
||||
NEO::populateKernelDescriptor(descriptor, kernelTokens, 8);
|
||||
immutableData.kernelDescriptor = &descriptor;
|
||||
crossThreadData.reset(new uint8_t[100]);
|
||||
}
|
||||
~Mock() override {
|
||||
delete immutableData.isaGraphicsAllocation.release();
|
||||
|
||||
@@ -5,14 +5,19 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device_binary_format/patchtokens_decoder.h"
|
||||
#include "shared/test/unit_test/device_binary_format/patchtokens_tests.h"
|
||||
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/mocks/mock_device.h"
|
||||
#include "shared/test/unit_test/mocks/mock_graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/program/kernel_info.h"
|
||||
#include "opencl/source/program/kernel_info_from_patchtokens.h"
|
||||
#include "test.h"
|
||||
|
||||
#include "level_zero/core/source/image/image_format_desc_helper.h"
|
||||
#include "level_zero/core/source/image/image_hw.h"
|
||||
#include "level_zero/core/source/kernel/kernel_hw.h"
|
||||
#include "level_zero/core/source/module/module_imp.h"
|
||||
#include "level_zero/core/source/sampler/sampler_hw.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
@@ -21,6 +26,8 @@
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
void NEO::populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes);
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
@@ -573,5 +580,215 @@ TEST_F(KernelIsaTests, givenGlobalBuffersWhenCreatingKernelImmutableDataThenBuff
|
||||
EXPECT_EQ(1, std::count(resCont.begin(), resCont.end(), &globalConstBuffer));
|
||||
}
|
||||
|
||||
using KernelImpPatchBindlessTest = Test<ModuleFixture>;
|
||||
|
||||
TEST_F(KernelImpPatchBindlessTest, GivenKernelImpWhenPatchBindlessOffsetCalledThenOffsetPatchedCorrectly) {
|
||||
Mock<Kernel> kernel;
|
||||
WhiteBox<::L0::DeviceImp> mockDevice;
|
||||
mockDevice.neoDevice = neoDevice;
|
||||
neoDevice->incRefInternal();
|
||||
neoDevice->bindlessHeapHelper.reset(new NEO::BindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumAvailableDevices() > 1, neoDevice->getRootDeviceIndex()));
|
||||
Mock<Module> mockModule(&mockDevice, nullptr);
|
||||
kernel.module = &mockModule;
|
||||
NEO::MockGraphicsAllocation alloc;
|
||||
uint32_t bindless = 0x40;
|
||||
auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
|
||||
size_t size = hwHelper.getRenderSurfaceStateSize();
|
||||
auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &alloc, NEO::BindlessHeapsHelper::GLOBAL_SSH);
|
||||
auto patchLocation = ptrOffset(kernel.getCrossThreadData(), bindless);
|
||||
auto patchValue = hwHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(expectedSsInHeap.surfaceStateOffset));
|
||||
|
||||
auto ssPtr = kernel.patchBindlessSurfaceState(&alloc, bindless);
|
||||
|
||||
EXPECT_EQ(ssPtr, expectedSsInHeap.ssPtr);
|
||||
EXPECT_TRUE(memcmp(const_cast<uint8_t *>(patchLocation), &patchValue, sizeof(patchValue)) == 0);
|
||||
EXPECT_TRUE(std::find(kernel.getResidencyContainer().begin(), kernel.getResidencyContainer().end(), expectedSsInHeap.heapAllocation) != kernel.getResidencyContainer().end());
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindlessThenSurfaceStateUpdated, MatchAny) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
arg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
|
||||
neoDevice->bindlessHeapHelper.reset(new NEO::BindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumAvailableDevices() > 1, neoDevice->getRootDeviceIndex()));
|
||||
|
||||
auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
|
||||
size_t size = hwHelper.getRenderSurfaceStateSize();
|
||||
uint64_t gpuAddress = 0x2000;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
|
||||
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
|
||||
auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
|
||||
|
||||
memset(expectedSsInHeap.ssPtr, 0, size);
|
||||
auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
|
||||
|
||||
auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
|
||||
EXPECT_FALSE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImpPatchBindlessTest, GivenKernelImpWhenSetSurfaceStateBindfulThenSurfaceStateNotUpdated, MatchAny) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
|
||||
WhiteBoxKernelHw<gfxCoreFamily> mockKernel;
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].template as<NEO::ArgDescPointer>());
|
||||
arg.bindless = undefined<CrossThreadDataOffset>;
|
||||
arg.bindful = 0x40;
|
||||
|
||||
neoDevice->bindlessHeapHelper.reset(new NEO::BindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumAvailableDevices() > 1, neoDevice->getRootDeviceIndex()));
|
||||
|
||||
auto &hwHelper = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily);
|
||||
size_t size = hwHelper.getRenderSurfaceStateSize();
|
||||
uint64_t gpuAddress = 0x2000;
|
||||
void *buffer = reinterpret_cast<void *>(gpuAddress);
|
||||
|
||||
NEO::MockGraphicsAllocation mockAllocation(buffer, gpuAddress, size);
|
||||
auto expectedSsInHeap = device->getNEODevice()->getBindlessHeapsHelper()->allocateSSInHeap(size, &mockAllocation, NEO::BindlessHeapsHelper::GLOBAL_SSH);
|
||||
|
||||
memset(expectedSsInHeap.ssPtr, 0, size);
|
||||
auto surfaceStateBefore = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
mockKernel.setBufferSurfaceState(0, buffer, &mockAllocation);
|
||||
|
||||
auto surfaceStateAfter = *reinterpret_cast<RENDER_SURFACE_STATE *>(expectedSsInHeap.ssPtr);
|
||||
|
||||
EXPECT_TRUE(memcmp(&surfaceStateAfter, &surfaceStateBefore, size) == 0);
|
||||
}
|
||||
|
||||
struct MyMockKernel : public Mock<Kernel> {
|
||||
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
|
||||
setSurfaceStateCalled = true;
|
||||
}
|
||||
bool setSurfaceStateCalled = false;
|
||||
};
|
||||
|
||||
TEST_F(KernelImpPatchBindlessTest, GivenValidBindlessOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
arg.bindless = 0x40;
|
||||
arg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
|
||||
NEO::MockGraphicsAllocation alloc;
|
||||
|
||||
mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
|
||||
|
||||
EXPECT_TRUE(mockKernel.setSurfaceStateCalled);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpPatchBindlessTest, GivenValidBindfulOffsetWhenSetArgBufferWithAllocThensetBufferSurfaceStateCalled) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
arg.bindless = undefined<CrossThreadDataOffset>;
|
||||
arg.bindful = 0x40;
|
||||
|
||||
NEO::MockGraphicsAllocation alloc;
|
||||
|
||||
mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
|
||||
|
||||
EXPECT_TRUE(mockKernel.setSurfaceStateCalled);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpPatchBindlessTest, GivenUndefiedBidfulAndBindlesstOffsetWhenSetArgBufferWithAllocThenSetBufferSurfaceStateIsNotCalled) {
|
||||
ze_kernel_desc_t desc = {};
|
||||
desc.pKernelName = kernelName.c_str();
|
||||
MyMockKernel mockKernel;
|
||||
|
||||
mockKernel.module = module.get();
|
||||
mockKernel.initialize(&desc);
|
||||
|
||||
auto &arg = const_cast<NEO::ArgDescPointer &>(mockKernel.kernelImmData->getDescriptor().payloadMappings.explicitArgs[0].as<NEO::ArgDescPointer>());
|
||||
arg.bindless = undefined<CrossThreadDataOffset>;
|
||||
arg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
|
||||
NEO::MockGraphicsAllocation alloc;
|
||||
|
||||
mockKernel.setArgBufferWithAlloc(0, 0x1234, &alloc);
|
||||
|
||||
EXPECT_FALSE(mockKernel.setSurfaceStateCalled);
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct MyMockImage : public WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>> {
|
||||
//MyMockImage() : WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>();
|
||||
void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override {
|
||||
passedSurfaceStateHeap = surfaceStateHeap;
|
||||
passedSurfaceStateOffset = surfaceStateOffset;
|
||||
}
|
||||
void *passedSurfaceStateHeap = nullptr;
|
||||
uint32_t passedSurfaceStateOffset = 0;
|
||||
};
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenImageAndBindlessKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
|
||||
createKernel();
|
||||
|
||||
neoDevice->bindlessHeapHelper.reset(new NEO::BindlessHeapsHelper(neoDevice->getMemoryManager(), neoDevice->getNumAvailableDevices() > 1, neoDevice->getRootDeviceIndex()));
|
||||
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
|
||||
auto &addressingMode = kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode;
|
||||
const_cast<NEO::KernelDescriptor::AddressingMode &>(addressingMode) = NEO::KernelDescriptor::Bindless;
|
||||
imageArg.bindless = 0x0;
|
||||
imageArg.bindful = undefined<SurfaceStateHeapOffset>;
|
||||
ze_image_desc_t desc = {};
|
||||
auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
auto surfaceStateSize = hwHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
|
||||
auto ret = imageHW->initialize(device, &desc);
|
||||
auto handle = imageHW->toHandle();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
auto expectedSsInHeap = neoDevice->bindlessHeapHelper->allocateSSInHeap(surfaceStateSize, imageHW->getAllocation(), BindlessHeapsHelper::BindlesHeapType::GLOBAL_SSH);
|
||||
|
||||
kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
|
||||
|
||||
EXPECT_EQ(imageHW->passedSurfaceStateHeap, expectedSsInHeap.ssPtr);
|
||||
EXPECT_EQ(imageHW->passedSurfaceStateOffset, 0u);
|
||||
}
|
||||
|
||||
HWTEST2_F(SetKernelArg, givenImageAndBindfulKernelWhenSetArgImageThenCopySurfaceStateToSSHCalledWithCorrectArgs, ImageSupport) {
|
||||
createKernel();
|
||||
|
||||
auto &imageArg = const_cast<NEO::ArgDescImage &>(kernel->kernelImmData->getDescriptor().payloadMappings.explicitArgs[3].template as<NEO::ArgDescImage>());
|
||||
auto addressingMode = const_cast<NEO::KernelDescriptor::AddressingMode &>(kernel->kernelImmData->getDescriptor().kernelAttributes.imageAddressingMode);
|
||||
addressingMode = NEO::KernelDescriptor::Bindful;
|
||||
imageArg.bindless = undefined<CrossThreadDataOffset>;
|
||||
imageArg.bindful = 0x40;
|
||||
ze_image_desc_t desc = {};
|
||||
|
||||
auto imageHW = std::make_unique<MyMockImage<gfxCoreFamily>>();
|
||||
auto ret = imageHW->initialize(device, &desc);
|
||||
auto handle = imageHW->toHandle();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
kernel->setArgImage(3, sizeof(imageHW.get()), &handle);
|
||||
|
||||
EXPECT_EQ(imageHW->passedSurfaceStateHeap, kernel->getSurfaceStateHeapData());
|
||||
EXPECT_EQ(imageHW->passedSurfaceStateOffset, imageArg.bindful);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -274,3 +274,13 @@ TEST(SubDevicesTest, whenInitializeRootCsrThenDirectSubmissionIsNotInitialized)
|
||||
auto csr = device->getEngine(1u).commandStreamReceiver;
|
||||
EXPECT_FALSE(csr->isDirectSubmissionEnabled());
|
||||
}
|
||||
|
||||
TEST(SubDevicesTest, givenCreateMultipleSubDevicesFlagSetWhenBindlessHeapHelperCreatedThenSubDeviceReturnRootDeviceMember) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.CreateMultipleSubDevices.set(2);
|
||||
VariableBackup<bool> mockDeviceFlagBackup(&MockDevice::createSingleDevice, false);
|
||||
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
|
||||
device->bindlessHeapHelper.reset(new NEO::BindlessHeapsHelper(device->getMemoryManager(), device->getNumAvailableDevices() > 1, device->getRootDeviceIndex()));
|
||||
EXPECT_EQ(device->getBindlessHeapsHelper(), device->subdevices.at(0)->getBindlessHeapsHelper());
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/device/device_info.h"
|
||||
#include "shared/source/execution_environment/execution_environment.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/engine_control.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
@@ -93,6 +94,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
||||
virtual Device *getDeviceById(uint32_t deviceId) const = 0;
|
||||
virtual Device *getParentDevice() const = 0;
|
||||
virtual DeviceBitfield getDeviceBitfield() const = 0;
|
||||
virtual BindlessHeapsHelper *getBindlessHeapsHelper() const = 0;
|
||||
|
||||
static decltype(&PerformanceCounters::create) createPerformanceCountersFunc;
|
||||
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
namespace NEO {
|
||||
|
||||
class SubDevice;
|
||||
class BindlessHeapsHelper;
|
||||
|
||||
class RootDevice : public Device {
|
||||
public:
|
||||
@@ -23,7 +22,7 @@ class RootDevice : public Device {
|
||||
Device *getDeviceById(uint32_t deviceId) const override;
|
||||
Device *getParentDevice() const override;
|
||||
uint32_t getNumSubDevices() const;
|
||||
BindlessHeapsHelper *getBindlessHeapsHelper() const;
|
||||
BindlessHeapsHelper *getBindlessHeapsHelper() const override;
|
||||
|
||||
protected:
|
||||
DeviceBitfield getDeviceBitfield() const override;
|
||||
|
||||
@@ -46,6 +46,9 @@ Device *SubDevice::getDeviceById(uint32_t deviceId) const {
|
||||
Device *SubDevice::getParentDevice() const {
|
||||
return &rootDevice;
|
||||
}
|
||||
BindlessHeapsHelper *SubDevice::getBindlessHeapsHelper() const {
|
||||
return rootDevice.getBindlessHeapsHelper();
|
||||
}
|
||||
|
||||
uint64_t SubDevice::getGlobalMemorySize(uint32_t deviceBitfield) const {
|
||||
auto globalMemorySize = Device::getGlobalMemorySize(static_cast<uint32_t>(maxNBitValue(rootDevice.getNumSubDevices())));
|
||||
|
||||
@@ -20,6 +20,7 @@ class SubDevice : public Device {
|
||||
uint32_t getRootDeviceIndex() const override;
|
||||
Device *getDeviceById(uint32_t deviceId) const override;
|
||||
Device *getParentDevice() const override;
|
||||
BindlessHeapsHelper *getBindlessHeapsHelper() const override;
|
||||
|
||||
uint32_t getSubDeviceIndex() const;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user