fix: opencl support for bindless kernels
Related-To: NEO-11156 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
parent
5e57bb2a32
commit
ee71157f7f
|
@ -266,6 +266,18 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|||
if (EncodeSurfaceState<GfxFamily>::doBindingTablePrefetch()) {
|
||||
bindingTablePrefetchSize = std::min(31u, static_cast<uint32_t>(kernel.getNumberOfBindingTableStates()));
|
||||
}
|
||||
|
||||
const bool isBindlessKernel = NEO::KernelDescriptor::isBindlessAddressingKernel(kernel.getKernelInfo().kernelDescriptor);
|
||||
if (isBindlessKernel) {
|
||||
uint64_t bindlessSurfaceStateBaseOffset = ptrDiff(ssh.getSpace(0), ssh.getCpuBase());
|
||||
|
||||
auto sshHeapSize = kernel.getSurfaceStateHeapSize();
|
||||
// Allocate space for new ssh data
|
||||
auto dstSurfaceState = ssh.getSpace(sshHeapSize);
|
||||
memcpy_s(dstSurfaceState, sshHeapSize, kernel.getSurfaceStateHeap(), sshHeapSize);
|
||||
|
||||
kernel.patchBindlessOffsetsInCrossThreadData(bindlessSurfaceStateBaseOffset);
|
||||
}
|
||||
}
|
||||
|
||||
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
||||
|
|
|
@ -140,12 +140,41 @@ void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, Grap
|
|||
}
|
||||
|
||||
void *ssh = getSurfaceStateHeap();
|
||||
if ((nullptr != ssh) && isValidOffset(arg.bindful)) {
|
||||
auto surfaceState = ptrOffset(ssh, arg.bindful);
|
||||
if (nullptr != ssh) {
|
||||
void *addressToPatch = reinterpret_cast<void *>(allocation.getGpuAddressToPatch());
|
||||
size_t sizeToPatch = allocation.getUnderlyingBufferSize();
|
||||
Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
|
||||
if (isValidOffset(arg.bindful)) {
|
||||
auto surfaceState = ptrOffset(ssh, arg.bindful);
|
||||
Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
} else if (isValidOffset(arg.bindless)) {
|
||||
auto &gfxCoreHelper = clDevice.getDevice().getGfxCoreHelper();
|
||||
void *surfaceState = nullptr;
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
if (clDevice.getDevice().getBindlessHeapsHelper()) {
|
||||
auto ssInHeap = allocation.getBindlessInfo();
|
||||
surfaceState = ssInHeap.ssPtr;
|
||||
auto patchLocation = ptrOffset(crossThreadData, arg.bindless);
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeap.surfaceStateOffset));
|
||||
patchWithRequiredSize(reinterpret_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
} else {
|
||||
auto index = std::numeric_limits<uint32_t>::max();
|
||||
const auto &iter = kernelInfo.kernelDescriptor.getBindlessOffsetToSurfaceState().find(arg.bindless);
|
||||
if (iter != kernelInfo.kernelDescriptor.getBindlessOffsetToSurfaceState().end()) {
|
||||
index = iter->second;
|
||||
}
|
||||
if (index < std::numeric_limits<uint32_t>::max()) {
|
||||
surfaceState = ptrOffset(ssh, index * surfaceStateSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (surfaceState) {
|
||||
Buffer::setSurfaceState(&clDevice.getDevice(), surfaceState, false, false, sizeToPatch, addressToPatch, 0, &allocation, 0, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -223,7 +252,13 @@ cl_int Kernel::initialize() {
|
|||
// copy the ssh into our local copy
|
||||
memcpy_s(pSshLocal.get(), sshLocalSize,
|
||||
heapInfo.pSsh, heapInfo.surfaceStateHeapSize);
|
||||
} else if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelDescriptor)) {
|
||||
auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||
sshLocalSize = kernelDescriptor.kernelAttributes.numArgsStateful * surfaceStateSize;
|
||||
DEBUG_BREAK_IF(kernelDescriptor.kernelAttributes.numArgsStateful != kernelDescriptor.getBindlessOffsetToSurfaceState().size());
|
||||
pSshLocal = std::make_unique<char[]>(sshLocalSize);
|
||||
}
|
||||
|
||||
numberOfBindingTableStates = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
localBindingTableOffset = kernelDescriptor.payloadMappings.bindingTable.tableOffset;
|
||||
|
||||
|
@ -233,7 +268,8 @@ cl_int Kernel::initialize() {
|
|||
return status;
|
||||
}
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless) ||
|
||||
isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless)) {
|
||||
DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr);
|
||||
uint64_t constMemory = isBuiltIn ? castToUint64(program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
|
||||
|
@ -241,7 +277,8 @@ cl_int Kernel::initialize() {
|
|||
patchWithImplicitSurface(constMemory, *program->getConstantSurface(rootDeviceIndex), arg);
|
||||
}
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless) ||
|
||||
isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless)) {
|
||||
DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr);
|
||||
uint64_t globalMemory = isBuiltIn ? castToUint64(program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
|
||||
|
@ -932,6 +969,16 @@ cl_int Kernel::setArgSvm(uint32_t argIndex, size_t svmAllocSize, void *svmPtr, G
|
|||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
} else if (isValidOffset(argAsPtr.bindless)) {
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto ssIndex = getSurfaceStateIndexForBindlessOffset(argAsPtr.bindless);
|
||||
if (ssIndex < std::numeric_limits<uint32_t>::max()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), ssIndex * surfaceStateSize);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, false, false, svmAllocSize + ptrDiff(svmPtr, ptrToPatch), ptrToPatch, 0, svmAlloc, svmFlags, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
storeKernelArg(argIndex, SVM_OBJ, nullptr, svmPtr, sizeof(void *), svmAlloc, svmFlags);
|
||||
|
@ -987,6 +1034,24 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
|
|||
}
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
} else if (isValidOffset(argAsPtr.bindless)) {
|
||||
size_t allocSize = 0;
|
||||
size_t offset = 0;
|
||||
if (svmAlloc != nullptr) {
|
||||
allocSize = svmAlloc->getUnderlyingBufferSize();
|
||||
offset = ptrDiff(ptrToPatch, svmAlloc->getGpuAddressToPatch());
|
||||
allocSize -= offset;
|
||||
}
|
||||
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto ssIndex = getSurfaceStateIndexForBindlessOffset(argAsPtr.bindless);
|
||||
if (ssIndex < std::numeric_limits<uint32_t>::max()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), ssIndex * surfaceStateSize);
|
||||
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0,
|
||||
areMultipleSubDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
|
||||
|
@ -1297,10 +1362,20 @@ void Kernel::makeResident(CommandStreamReceiver &commandStreamReceiver) {
|
|||
|
||||
if (program->getConstantSurface(rootDeviceIndex)) {
|
||||
commandStreamReceiver.makeResident(*(program->getConstantSurface(rootDeviceIndex)));
|
||||
|
||||
auto bindlessHeapAllocation = program->getConstantSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation;
|
||||
if (bindlessHeapAllocation) {
|
||||
commandStreamReceiver.makeResident(*bindlessHeapAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
if (program->getGlobalSurface(rootDeviceIndex)) {
|
||||
commandStreamReceiver.makeResident(*(program->getGlobalSurface(rootDeviceIndex)));
|
||||
|
||||
auto bindlessHeapAllocation = program->getGlobalSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation;
|
||||
if (bindlessHeapAllocation) {
|
||||
commandStreamReceiver.makeResident(*bindlessHeapAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
if (program->getExportedFunctionsSurface(rootDeviceIndex)) {
|
||||
|
@ -1510,11 +1585,14 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
} else if (isValidOffset(argAsPtr.bindless)) {
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), surfaceStateSize * argIndex);
|
||||
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode,
|
||||
disableL3, isAuxTranslationKernel, arg.isReadOnly(), pClDevice->getDevice(),
|
||||
areMultipleSubDevicesInContext());
|
||||
auto ssIndex = getSurfaceStateIndexForBindlessOffset(argAsPtr.bindless);
|
||||
if (ssIndex < std::numeric_limits<uint32_t>::max()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), ssIndex * surfaceStateSize);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode,
|
||||
disableL3, isAuxTranslationKernel, arg.isReadOnly(), pClDevice->getDevice(),
|
||||
areMultipleSubDevicesInContext());
|
||||
}
|
||||
}
|
||||
|
||||
kernelArguments[argIndex].isStatelessUncacheable = argAsPtr.isPureStateful() ? false : buffer->isMemObjUncacheable();
|
||||
|
@ -2080,6 +2158,68 @@ void *Kernel::patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t
|
|||
return ssInHeap.ssPtr;
|
||||
}
|
||||
|
||||
uint32_t Kernel::getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const {
|
||||
const auto &iter = kernelInfo.kernelDescriptor.getBindlessOffsetToSurfaceState().find(bindlessOffset);
|
||||
if (iter != kernelInfo.kernelDescriptor.getBindlessOffsetToSurfaceState().end()) {
|
||||
return iter->second;
|
||||
}
|
||||
DEBUG_BREAK_IF(true);
|
||||
return std::numeric_limits<uint32_t>::max();
|
||||
}
|
||||
|
||||
void Kernel::patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const {
|
||||
auto implicitArgsVec = kernelInfo.kernelDescriptor.getImplicitArgBindlessCandidatesVec();
|
||||
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
for (size_t i = 0; i < implicitArgsVec.size(); i++) {
|
||||
if (NEO::isValidOffset(implicitArgsVec[i]->bindless)) {
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), implicitArgsVec[i]->bindless);
|
||||
auto index = getSurfaceStateIndexForBindlessOffset(implicitArgsVec[i]->bindless);
|
||||
|
||||
if (index < std::numeric_limits<uint32_t>::max()) {
|
||||
auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + index * surfaceStateSize);
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));
|
||||
|
||||
patchWithRequiredSize(reinterpret_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Kernel::patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const {
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
for (size_t argIndex = 0; argIndex < kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size(); argIndex++) {
|
||||
const auto &arg = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs[argIndex];
|
||||
|
||||
auto crossThreadOffset = NEO::undefined<NEO::CrossThreadDataOffset>;
|
||||
if (arg.type == NEO::ArgDescriptor::argTPointer) {
|
||||
crossThreadOffset = arg.as<NEO::ArgDescPointer>().bindless;
|
||||
} else if (arg.type == NEO::ArgDescriptor::argTImage) {
|
||||
crossThreadOffset = arg.as<NEO::ArgDescImage>().bindless;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (NEO::isValidOffset(crossThreadOffset)) {
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), crossThreadOffset);
|
||||
auto index = getSurfaceStateIndexForBindlessOffset(crossThreadOffset);
|
||||
|
||||
if (index < std::numeric_limits<uint32_t>::max()) {
|
||||
auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + index * surfaceStateSize);
|
||||
auto patchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));
|
||||
|
||||
patchWithRequiredSize(reinterpret_cast<uint8_t *>(patchLocation), sizeof(patchValue), patchValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
patchBindlessOffsetsForImplicitArgs(bindlessSurfaceStateBaseOffset);
|
||||
}
|
||||
|
||||
void Kernel::setAdditionalKernelExecInfo(uint32_t additionalKernelExecInfo) {
|
||||
this->additionalKernelExecInfo = additionalKernelExecInfo;
|
||||
}
|
||||
|
|
|
@ -218,6 +218,9 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
|||
bool usesSyncBuffer() const;
|
||||
void patchSyncBuffer(GraphicsAllocation *gfxAllocation, size_t bufferOffset);
|
||||
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
|
||||
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
|
||||
void patchBindlessOffsetsForImplicitArgs(uint64_t bindlessSurfaceStateBaseOffset) const;
|
||||
void patchBindlessOffsetsInCrossThreadData(uint64_t bindlessSurfaceStateBaseOffset) const;
|
||||
|
||||
// Helpers
|
||||
cl_int setArg(uint32_t argIndex, uint32_t argValue);
|
||||
|
|
|
@ -265,16 +265,35 @@ cl_int Program::processProgramInfo(ProgramInfo &src, const ClDevice &clDevice) {
|
|||
}
|
||||
|
||||
kernelInfoArray = std::move(src.kernelInfos);
|
||||
|
||||
bool isBindlessKernelPresent = false;
|
||||
for (auto &kernelInfo : kernelInfoArray) {
|
||||
if (NEO::KernelDescriptor::isBindlessAddressingKernel(kernelInfo->kernelDescriptor)) {
|
||||
isBindlessKernelPresent = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto svmAllocsManager = context ? context->getSVMAllocsManager() : nullptr;
|
||||
auto globalConstDataSize = src.globalConstants.size + src.globalConstants.zeroInitSize;
|
||||
if (globalConstDataSize != 0) {
|
||||
buildInfos[rootDeviceIndex].constantSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalConstDataSize, src.globalConstants.zeroInitSize, true, linkerInput, src.globalConstants.initData);
|
||||
if (isBindlessKernelPresent) {
|
||||
if (!clDevice.getMemoryManager()->allocateBindlessSlot(buildInfos[rootDeviceIndex].constantSurface)) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto globalVariablesDataSize = src.globalVariables.size + src.globalVariables.zeroInitSize;
|
||||
buildInfos[rootDeviceIndex].globalVarTotalSize = globalVariablesDataSize;
|
||||
if (globalVariablesDataSize != 0) {
|
||||
buildInfos[rootDeviceIndex].globalSurface = allocateGlobalsSurface(svmAllocsManager, clDevice.getDevice(), globalVariablesDataSize, src.globalVariables.zeroInitSize, false, linkerInput, src.globalVariables.initData);
|
||||
if (isBindlessKernelPresent) {
|
||||
if (!clDevice.getMemoryManager()->allocateBindlessSlot(buildInfos[rootDeviceIndex].globalSurface)) {
|
||||
return CL_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
}
|
||||
if (clDevice.areOcl21FeaturesEnabled() == false) {
|
||||
buildInfos[rootDeviceIndex].globalVarTotalSize = 0u;
|
||||
}
|
||||
|
|
|
@ -1080,6 +1080,82 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
|
|||
delete[] mockDsh;
|
||||
}
|
||||
|
||||
HWTEST2_F(HardwareCommandsTest, givenBindlessKernelWithBufferArgWhenSendIndirectStateThenSurfaceStateIsCopiedToHeapAndCrossThreadDataIsCorrectlyPatched, IsAtLeastXeHpCore) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using DefaultWalkerType = typename FamilyType::DefaultWalkerType;
|
||||
|
||||
CommandQueueHw<FamilyType> cmdQ(pContext, pClDevice, 0, false);
|
||||
|
||||
auto &commandStream = cmdQ.getCS(1024);
|
||||
auto pWalkerCmd = static_cast<DefaultWalkerType *>(commandStream.getSpace(sizeof(DefaultWalkerType)));
|
||||
|
||||
// define kernel info
|
||||
std::unique_ptr<MockKernelInfo> pKernelInfo = std::make_unique<MockKernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
pKernelInfo->addArgBuffer(0, 0x30, sizeof(void *), 0x0);
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::AddressingMode::BindlessAndStateless;
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
pKernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.crossThreadDataSize = 1024;
|
||||
|
||||
MockKernel mockKernel(mockKernelWithInternal->mockKernel->getProgram(), *pKernelInfo, *pClDevice);
|
||||
|
||||
auto retVal = mockKernel.initialize();
|
||||
EXPECT_EQ(0, retVal);
|
||||
|
||||
memset(mockKernel.getSurfaceStateHeap(), 0x22, mockKernel.getSurfaceStateHeapSize());
|
||||
memset(mockKernel.getCrossThreadData(), 0x00, mockKernel.getCrossThreadDataSize());
|
||||
|
||||
auto &dsh = cmdQ.getIndirectHeap(IndirectHeap::Type::dynamicState, 8192);
|
||||
auto &ioh = cmdQ.getIndirectHeap(IndirectHeap::Type::indirectObject, 8192);
|
||||
auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::surfaceState, 8192);
|
||||
|
||||
const auto expectedDestinationInHeap = ssh.getSpace(0);
|
||||
const uint64_t bindlessSurfaceStateBaseOffset = ptrDiff(ssh.getSpace(0), ssh.getCpuBase());
|
||||
|
||||
const size_t localWorkSize = 256;
|
||||
const size_t localWorkSizes[3]{localWorkSize, 1, 1};
|
||||
const uint32_t threadGroupCount = 1u;
|
||||
uint32_t interfaceDescriptorIndex = 0;
|
||||
auto isCcsUsed = EngineHelpers::isCcs(cmdQ.getGpgpuEngine().osContext->getEngineType());
|
||||
auto kernelUsesLocalIds = HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(mockKernel);
|
||||
|
||||
INTERFACE_DESCRIPTOR_DATA interfaceDescriptorData;
|
||||
HardwareCommandsHelper<FamilyType>::template sendIndirectState<DefaultWalkerType, INTERFACE_DESCRIPTOR_DATA>(
|
||||
commandStream,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
mockKernel,
|
||||
mockKernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
pKernelInfo->getMaxSimdSize(),
|
||||
localWorkSizes,
|
||||
threadGroupCount,
|
||||
0,
|
||||
interfaceDescriptorIndex,
|
||||
pDevice->getPreemptionMode(),
|
||||
pWalkerCmd,
|
||||
&interfaceDescriptorData,
|
||||
true,
|
||||
0,
|
||||
*pDevice);
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(expectedDestinationInHeap, mockKernel.getSurfaceStateHeap(), mockKernel.getSurfaceStateHeapSize()));
|
||||
|
||||
const auto &gfxCoreHelper = mockKernel.getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
const auto ssIndex = pKernelInfo->kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto surfaceStateOffset = static_cast<uint32_t>(bindlessSurfaceStateBaseOffset + ssIndex * surfaceStateSize);
|
||||
const auto expectedPatchValue = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(surfaceStateOffset));
|
||||
const auto expectedPatchLocation = reinterpret_cast<uint32_t *>(ptrOffset(mockKernel.getCrossThreadData(), bindlessOffset));
|
||||
|
||||
EXPECT_EQ(expectedPatchValue, *expectedPatchLocation);
|
||||
}
|
||||
|
||||
HWTEST_F(HardwareCommandsTest, whenNumLocalIdsIsBiggerThanZeroThenExpectLocalIdsInUseIsTrue) {
|
||||
mockKernelWithInternal->kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels = 1;
|
||||
EXPECT_TRUE(HardwareCommandsHelper<FamilyType>::kernelUsesLocalIds(*mockKernelWithInternal->mockKernel));
|
||||
|
|
|
@ -651,6 +651,8 @@ class KernelArgBufferFixtureBindless : public KernelArgBufferFixture {
|
|||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
pKernelInfo->argAsPtr(0).stateless = undefined<CrossThreadDataOffset>;
|
||||
pKernelInfo->argAsPtr(0).bindful = undefined<SurfaceStateHeapOffset>;
|
||||
|
||||
pKernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
}
|
||||
void tearDown() {
|
||||
delete pBuffer;
|
||||
|
@ -674,6 +676,46 @@ HWTEST_F(KernelArgBufferTestBindless, givenUsedBindlessBuffersWhenSettingKernelA
|
|||
EXPECT_EQ(0xdeadu, *patchLocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgBufferTestBindless, givenBindlessArgBufferWhenSettingKernelArgThenSurfaceStateIsEncodedAtProperOffset) {
|
||||
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
|
||||
|
||||
const auto &gfxCoreHelper = pKernel->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
const auto surfaceStateHeapSize = pKernel->getSurfaceStateHeapSize();
|
||||
|
||||
EXPECT_EQ(pKernelInfo->kernelDescriptor.kernelAttributes.numArgsStateful * surfaceStateSize, surfaceStateHeapSize);
|
||||
|
||||
cl_mem memObj = pBuffer;
|
||||
retVal = pKernel->setArg(0, sizeof(memObj), &memObj);
|
||||
|
||||
const auto ssIndex = pKernelInfo->kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto ssOffset = ssIndex * surfaceStateSize;
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
const auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(pKernel->getSurfaceStateHeap(), ssOffset));
|
||||
const auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
const auto bufferAddress = pBuffer->getGraphicsAllocation(pDevice->getRootDeviceIndex())->getGpuAddress();
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgBufferTestBindless, givenBindlessArgBufferAndNotInitializedBindlessOffsetToSurfaceStateWhenSettingKernelArgThenSurfaceStateIsNotEncoded) {
|
||||
using DataPortBindlessSurfaceExtendedMessageDescriptor = typename FamilyType::DataPortBindlessSurfaceExtendedMessageDescriptor;
|
||||
|
||||
const auto surfaceStateHeap = pKernel->getSurfaceStateHeap();
|
||||
const auto surfaceStateHeapSize = pKernel->getSurfaceStateHeapSize();
|
||||
|
||||
auto ssHeapDataInitial = std::make_unique<char[]>(surfaceStateHeapSize);
|
||||
std::memcpy(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize);
|
||||
|
||||
pKernelInfo->kernelDescriptor.bindlessArgsMap.clear();
|
||||
|
||||
cl_mem memObj = pBuffer;
|
||||
retVal = pKernel->setArg(0, sizeof(memObj), &memObj);
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgBufferTestBindless, givenBindlessBuffersWhenPatchBindlessOffsetCalledThenBindlessOffsetToSurfaceStateWrittenInCrossThreadData) {
|
||||
|
||||
pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[pClDevice->getRootDeviceIndex()]->createBindlessHeapsHelper(pDevice,
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
*/
|
||||
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
|
@ -116,6 +117,61 @@ HWTEST_F(KernelArgSvmTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsA
|
|||
delete[] svmPtr;
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, GivenSvmPtrBindlessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto svmPtr = std::make_unique<char[]>(256);
|
||||
|
||||
const auto &gfxCoreHelper = pKernel->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
pKernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
auto retVal = pKernel->setArgSvm(0, 256, svmPtr.get(), nullptr, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
const auto ssIndex = pKernelInfo->kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto ssOffset = ssIndex * surfaceStateSize;
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
ssOffset));
|
||||
|
||||
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
|
||||
EXPECT_EQ(svmPtr.get(), surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, GivenSvmPtrBindlessAndNotInitializedBindlessOffsetToSurfaceStateWhenSettingKernelArgThenSurfaceStateIsNotEncoded) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto svmPtr = std::make_unique<char[]>(256);
|
||||
|
||||
const auto surfaceStateHeap = pKernel->getSurfaceStateHeap();
|
||||
const auto surfaceStateHeapSize = pKernel->getSurfaceStateHeapSize();
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
|
||||
auto ssHeapDataInitial = std::make_unique<char[]>(surfaceStateHeapSize);
|
||||
std::memcpy(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize);
|
||||
|
||||
pKernelInfo->kernelDescriptor.bindlessArgsMap.clear();
|
||||
|
||||
auto retVal = pKernel->setArgSvm(0, 256, svmPtr.get(), nullptr, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize));
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, GivenValidSvmAllocWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
|
@ -221,6 +277,100 @@ HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerWhenSetArgSvmAllocIsCalledThen
|
|||
EXPECT_EQ(offsetedPtr, surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocBindlessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
const auto &gfxCoreHelper = pKernel->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto svmPtr = std::make_unique<char[]>(256);
|
||||
|
||||
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
pKernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
|
||||
|
||||
const auto ssIndex = pKernelInfo->kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto ssOffset = ssIndex * surfaceStateSize;
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
ssOffset));
|
||||
|
||||
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
|
||||
EXPECT_EQ(svmPtr.get(), surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, givenOffsetedSvmPointerBindlessWhenSetArgSvmAllocIsCalledThenProperSvmAddressIsPatched) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
const auto &gfxCoreHelper = pKernel->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
std::unique_ptr<char[]> svmPtr(new char[256]);
|
||||
|
||||
auto offsetedPtr = svmPtr.get() + 4;
|
||||
|
||||
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
pKernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
pKernel->setArgSvmAlloc(0, offsetedPtr, &svmAlloc, 0u);
|
||||
|
||||
const auto ssIndex = pKernelInfo->kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto ssOffset = ssIndex * surfaceStateSize;
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
ssOffset));
|
||||
|
||||
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
|
||||
EXPECT_EQ(offsetedPtr, surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, GivenValidSvmAllocBindlessAndNotInitializedBindlessOffsetToSurfaceStateWhenSettingKernelArgThenSurfaceStateIsNotEncoded) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
const auto surfaceStateHeap = pKernel->getSurfaceStateHeap();
|
||||
const auto surfaceStateHeapSize = pKernel->getSurfaceStateHeapSize();
|
||||
|
||||
auto svmPtr = std::make_unique<char[]>(256);
|
||||
|
||||
MockGraphicsAllocation svmAlloc(svmPtr.get(), 256);
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
|
||||
auto ssHeapDataInitial = std::make_unique<char[]>(surfaceStateHeapSize);
|
||||
std::memcpy(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize);
|
||||
|
||||
pKernelInfo->kernelDescriptor.bindlessArgsMap.clear();
|
||||
|
||||
auto retVal = pKernel->setArgSvmAlloc(0, svmPtr.get(), &svmAlloc, 0u);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetArgSvmIsCalledWithSurfaceStateThenSizeIsMaxAndAddressIsProgrammed) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
|
@ -246,6 +396,42 @@ HWTEST_F(KernelArgSvmTest, givenDeviceSupportingSharedSystemAllocationsWhenSetAr
|
|||
EXPECT_EQ(16384u, surfaceState->getHeight());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelArgSvmTest, givenBindlessArgAndDeviceSupportingSharedSystemAllocationsWhenSetArgSvmIsCalledWithSurfaceStateThenSizeIsMaxAndAddressIsProgrammed) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
const auto &gfxCoreHelper = pKernel->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
this->pClDevice->deviceInfo.sharedSystemMemCapabilities = CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL;
|
||||
|
||||
auto systemPointer = reinterpret_cast<void *>(0xfeedbac);
|
||||
|
||||
const auto bindlessOffset = 0x10;
|
||||
pKernelInfo->argAsPtr(0).bindless = bindlessOffset;
|
||||
pKernelInfo->kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
pKernel->setArgSvmAlloc(0, systemPointer, nullptr, 0u);
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
const auto ssIndex = pKernelInfo->kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto ssOffset = ssIndex * surfaceStateSize;
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
|
||||
ptrOffset(pKernel->getSurfaceStateHeap(),
|
||||
ssOffset));
|
||||
|
||||
void *surfaceAddress = reinterpret_cast<void *>(surfaceState->getSurfaceBaseAddress());
|
||||
|
||||
EXPECT_EQ(systemPointer, surfaceAddress);
|
||||
EXPECT_EQ(128u, surfaceState->getWidth());
|
||||
EXPECT_EQ(2048u, surfaceState->getDepth());
|
||||
EXPECT_EQ(16384u, surfaceState->getHeight());
|
||||
}
|
||||
|
||||
TEST_F(KernelArgSvmTest, WhenSettingKernelArgImmediateThenInvalidArgValueErrorIsReturned) {
|
||||
const ClDeviceInfo &devInfo = pClDevice->getDeviceInfo();
|
||||
if (devInfo.svmCapabilities == 0) {
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "shared/test/common/helpers/gtest_helpers.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/mocks/mock_cpu_page_fault_manager.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
|
@ -395,6 +396,137 @@ TEST_F(KernelTests, WhenIsSingleSubdevicePreferredIsCalledThenCorrectValuesAreRe
|
|||
}
|
||||
}
|
||||
|
||||
using BindlessKernelTests = KernelTests;
|
||||
|
||||
TEST_F(BindlessKernelTests, GivenBindlessAddressingKernelWhenInitializeThenSurfaceStateIsCreatedWithCorrectSize) {
|
||||
KernelInfo kernelInfo = {};
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 32;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Bindless;
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful = 3;
|
||||
|
||||
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||
|
||||
auto retVal = kernel.initialize();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
const auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = static_cast<uint32_t>(gfxCoreHelper.getRenderSurfaceStateSize());
|
||||
const auto expectedSsHeapSize = kernelInfo.kernelDescriptor.kernelAttributes.numArgsStateful * surfaceStateSize;
|
||||
|
||||
const auto ssHeap = kernel.getSurfaceStateHeap();
|
||||
const auto ssHeapSize = kernel.getSurfaceStateHeapSize();
|
||||
|
||||
EXPECT_EQ(expectedSsHeapSize, ssHeapSize);
|
||||
EXPECT_NE(nullptr, ssHeap);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTests, givenBindlessKernelWhenPatchingCrossThreadDataThenCorrectBindlessOffsetsAreWritten) {
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
|
||||
auto argDescriptorImg = NEO::ArgDescriptor(NEO::ArgDescriptor::argTImage);
|
||||
argDescriptorImg.as<NEO::ArgDescImage>() = NEO::ArgDescImage();
|
||||
argDescriptorImg.as<NEO::ArgDescImage>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptorImg.as<NEO::ArgDescImage>().bindless = sizeof(uint64_t);
|
||||
|
||||
auto argDescriptor2 = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor2.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor2.as<NEO::ArgDescPointer>().stateless = 2 * sizeof(uint64_t);
|
||||
|
||||
KernelInfo kernelInfo = {};
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptorImg);
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor2);
|
||||
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = 3 * sizeof(uint64_t);
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.bindless = 4 * sizeof(uint64_t);
|
||||
|
||||
MockKernel mockKernel(pProgram, pProgram->mockKernelInfo, *pClDevice);
|
||||
|
||||
pProgram->mockKernelInfo.kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
mockKernel.crossThreadData = new char[5 * sizeof(uint64_t)];
|
||||
mockKernel.crossThreadDataSize = 5 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData, 0x00, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
auto &gfxCoreHelper = pClDevice->getGfxCoreHelper();
|
||||
auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
auto patchValue1 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress));
|
||||
auto patchValue2 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 1 * surfaceStateSize));
|
||||
auto patchValue3 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 2 * surfaceStateSize));
|
||||
auto patchValue4 = gfxCoreHelper.getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(baseAddress + 3 * surfaceStateSize));
|
||||
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData, mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(patchValue1, crossThreadData[0]);
|
||||
EXPECT_EQ(patchValue2, crossThreadData[1]);
|
||||
EXPECT_EQ(0u, crossThreadData[2]);
|
||||
EXPECT_EQ(patchValue3, crossThreadData[3]);
|
||||
EXPECT_EQ(patchValue4, crossThreadData[4]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTests, givenNoEntryInBindlessOffsetsMapWhenPatchingCrossThreadDataThenMemoryIsNotPatched) {
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTPointer);
|
||||
argDescriptor.as<NEO::ArgDescPointer>() = NEO::ArgDescPointer();
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindful = NEO::undefined<NEO::SurfaceStateHeapOffset>;
|
||||
argDescriptor.as<NEO::ArgDescPointer>().bindless = 0x0;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.bindless = sizeof(uint64_t);
|
||||
|
||||
MockKernel mockKernel(pProgram, pProgram->mockKernelInfo, *pClDevice);
|
||||
|
||||
mockKernel.crossThreadData = new char[4 * sizeof(uint64_t)];
|
||||
mockKernel.crossThreadDataSize = 4 * sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData, 0, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData, mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
}
|
||||
|
||||
TEST_F(BindlessKernelTests, givenNoStatefulArgsWhenPatchingBindlessOffsetsInCrossThreadDataThenMemoryIsNotPatched) {
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
|
||||
pProgram->mockKernelInfo.kernelDescriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
|
||||
|
||||
auto argDescriptor = NEO::ArgDescriptor(NEO::ArgDescriptor::argTValue);
|
||||
argDescriptor.as<NEO::ArgDescValue>() = NEO::ArgDescValue();
|
||||
argDescriptor.as<NEO::ArgDescValue>().elements.push_back(NEO::ArgDescValue::Element{0, 8, 0, false});
|
||||
pProgram->mockKernelInfo.kernelDescriptor.payloadMappings.explicitArgs.push_back(argDescriptor);
|
||||
|
||||
MockKernel mockKernel(pProgram, pProgram->mockKernelInfo, *pClDevice);
|
||||
|
||||
mockKernel.crossThreadData = new char[sizeof(uint64_t)];
|
||||
mockKernel.crossThreadDataSize = sizeof(uint64_t);
|
||||
memset(mockKernel.crossThreadData, 0, mockKernel.crossThreadDataSize);
|
||||
|
||||
const uint64_t baseAddress = 0x1000;
|
||||
mockKernel.patchBindlessOffsetsInCrossThreadData(baseAddress);
|
||||
|
||||
auto crossThreadData = std::make_unique<uint64_t[]>(mockKernel.crossThreadDataSize / sizeof(uint64_t));
|
||||
memcpy(crossThreadData.get(), mockKernel.crossThreadData, mockKernel.crossThreadDataSize);
|
||||
|
||||
EXPECT_EQ(0u, crossThreadData[0]);
|
||||
}
|
||||
|
||||
class KernelFromBinaryTest : public ProgramSimpleFixture {
|
||||
public:
|
||||
void setUp() {
|
||||
|
@ -1218,6 +1350,42 @@ HWTEST_F(KernelResidencyTest, givenKernelWhenMakeResidentIsCalledThenGlobalBuffe
|
|||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenBindlessHeapsHelperAndGlobalAndConstantBuffersWhenMakeResidentIsCalledThenGlobalAndConstantBufferHeapAllocationsAreMadeResident) {
|
||||
auto bindlessHeapHelper = new MockBindlesHeapsHelper(pDevice, false);
|
||||
pDevice->getExecutionEnvironment()->rootDeviceEnvironments[pDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapHelper);
|
||||
|
||||
auto pKernelInfo = std::make_unique<KernelInfo>();
|
||||
pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1;
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
auto memoryManager = commandStreamReceiver.getMemoryManager();
|
||||
pKernelInfo->kernelAllocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize});
|
||||
|
||||
MockProgram program(toClDeviceVector(*pClDevice));
|
||||
MockContext ctx;
|
||||
program.setContext(&ctx);
|
||||
program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface = new MockGraphicsAllocation();
|
||||
program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface = new MockGraphicsAllocation();
|
||||
EXPECT_TRUE(memoryManager->allocateBindlessSlot(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface));
|
||||
EXPECT_TRUE(memoryManager->allocateBindlessSlot(program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface));
|
||||
|
||||
std::unique_ptr<MockKernel> kernel(new MockKernel(&program, *pKernelInfo, *pClDevice));
|
||||
ASSERT_EQ(CL_SUCCESS, kernel->initialize());
|
||||
|
||||
EXPECT_EQ(0u, commandStreamReceiver.makeResidentAllocations.size());
|
||||
kernel->makeResident(pDevice->getGpgpuCommandStreamReceiver());
|
||||
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].globalSurface));
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.getGlobalSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation));
|
||||
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.buildInfos[pDevice->getRootDeviceIndex()].constantSurface));
|
||||
EXPECT_TRUE(commandStreamReceiver.isMadeResident(program.getConstantSurface(rootDeviceIndex)->getBindlessInfo().heapAllocation));
|
||||
|
||||
memoryManager->freeGraphicsMemory(pKernelInfo->kernelAllocation);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelResidencyTest, givenKernelWhenItUsesIndirectUnifiedMemoryDeviceAllocationThenTheyAreMadeResident) {
|
||||
MockKernelWithInternals mockKernel(*this->pClDevice);
|
||||
auto &commandStreamReceiver = this->pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
@ -2962,6 +3130,108 @@ TEST(KernelTest, givenKernelWithPatchInfoCollectionDisabledWhenPatchWithImplicit
|
|||
EXPECT_EQ(0u, kernel.mockKernel->getPatchInfoDataList().size());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelTest, givenBindlessArgBufferWhenPatchWithImplicitSurfaceThenSurfaceStateIsEncodedAtProperOffset) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
|
||||
size_t allocSize = 0x1000;
|
||||
MockGraphicsAllocation mockAllocation(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
const CrossThreadDataOffset bindlessOffset = 0x10;
|
||||
kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *), undefined<CrossThreadDataOffset>, bindlessOffset);
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
uint64_t crossThreadData = 0;
|
||||
kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0));
|
||||
|
||||
const auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
const auto surfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize();
|
||||
|
||||
const auto ssIndex = kernel.kernelInfo.kernelDescriptor.bindlessArgsMap.find(bindlessOffset)->second;
|
||||
const auto ssOffset = ssIndex * surfaceStateSize;
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
const auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ptrOffset(kernel.mockKernel->getSurfaceStateHeap(), ssOffset));
|
||||
const auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
const auto bufferAddress = mockAllocation.getGpuAddressToPatch();
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelTest, givenBindlessArgBufferAndNotInitializedBindlessOffsetToSurfaceStateWhenPatchWithImplicitSurfaceThenSurfaceStateIsNotEncoded) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
|
||||
size_t allocSize = 0x1000;
|
||||
MockGraphicsAllocation mockAllocation(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
const CrossThreadDataOffset bindlessOffset = 0x10;
|
||||
kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *), undefined<CrossThreadDataOffset>, bindlessOffset);
|
||||
|
||||
const auto surfaceStateHeap = kernel.mockKernel->getSurfaceStateHeap();
|
||||
const auto surfaceStateHeapSize = kernel.mockKernel->getSurfaceStateHeapSize();
|
||||
|
||||
auto ssHeapDataInitial = std::make_unique<char[]>(surfaceStateHeapSize);
|
||||
std::memcpy(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize);
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.bindlessArgsMap.clear();
|
||||
|
||||
uint64_t crossThreadData = 0;
|
||||
kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0));
|
||||
|
||||
EXPECT_EQ(0, std::memcmp(ssHeapDataInitial.get(), surfaceStateHeap, surfaceStateHeapSize));
|
||||
}
|
||||
|
||||
HWTEST_F(KernelTest, givenBindlessHeapsHelperAndBindlessArgBufferWhenPatchWithImplicitSurfaceThenCrossThreadDataIsPatchedAndSurfaceStateIsEncoded) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
auto &neoDevice = device->getDevice();
|
||||
|
||||
auto bindlessHeapHelper = new MockBindlesHeapsHelper(&neoDevice, false);
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapHelper);
|
||||
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
||||
uint64_t gpuAddress = 0x1200;
|
||||
const void *cpuPtr = reinterpret_cast<const void *>(gpuAddress);
|
||||
size_t allocSize = 0x1000;
|
||||
MockGraphicsAllocation mockAllocation(const_cast<void *>(cpuPtr), gpuAddress, allocSize);
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
EXPECT_TRUE(device->getMemoryManager()->allocateBindlessSlot(&mockAllocation));
|
||||
|
||||
const CrossThreadDataOffset bindlessOffset = 0x10;
|
||||
kernel.kernelInfo.addArgBuffer(0, 0, sizeof(void *), undefined<CrossThreadDataOffset>, bindlessOffset);
|
||||
|
||||
kernel.kernelInfo.kernelDescriptor.initBindlessOffsetToSurfaceState();
|
||||
|
||||
uint64_t crossThreadData = 0;
|
||||
kernel.mockKernel->patchWithImplicitSurface(castToUint64(&crossThreadData), mockAllocation, kernel.kernelInfo.argAsPtr(0));
|
||||
|
||||
auto ssInHeapInfo = mockAllocation.getBindlessInfo();
|
||||
|
||||
auto patchLocation = reinterpret_cast<uint32_t *>(ptrOffset(kernel.mockKernel->crossThreadData, bindlessOffset));
|
||||
auto patchValue = device->getGfxCoreHelper().getBindlessSurfaceExtendedMessageDescriptorValue(static_cast<uint32_t>(ssInHeapInfo.surfaceStateOffset));
|
||||
|
||||
EXPECT_EQ(patchValue, *patchLocation);
|
||||
|
||||
typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
|
||||
const auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(ssInHeapInfo.ssPtr);
|
||||
const auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
|
||||
|
||||
const auto bufferAddress = mockAllocation.getGpuAddressToPatch();
|
||||
EXPECT_EQ(bufferAddress, surfaceAddress);
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenDefaultKernelWhenItIsCreatedThenItReportsStatelessWrites) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
|
|
@ -98,6 +98,8 @@ class MockKernel : public Kernel {
|
|||
using Kernel::anyKernelArgumentUsingSystemMemory;
|
||||
using Kernel::auxTranslationRequired;
|
||||
using Kernel::containsStatelessWrites;
|
||||
using Kernel::crossThreadData;
|
||||
using Kernel::crossThreadDataSize;
|
||||
using Kernel::dataParameterSimdSize;
|
||||
using Kernel::executionType;
|
||||
using Kernel::getDevice;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -15,6 +15,7 @@
|
|||
#include "shared/test/common/device_binary_format/patchtokens_tests.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/gtest_helpers.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
|
@ -324,6 +325,116 @@ TEST_F(ProgramDataTest, whenGlobalVariablesAreNotExportedThenAllocateSurfacesAsN
|
|||
EXPECT_EQ(nullptr, this->pContext->getSVMAllocsManager()->getSVMAlloc(reinterpret_cast<const void *>(pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex())->getGpuAddress())));
|
||||
}
|
||||
|
||||
using ProgramDataBindlessTest = ProgramDataTest;
|
||||
|
||||
TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndConstantsAndVariablesMemorySurfaceWhenProcessProgramInfoThenConstantsAndVariablesSurfaceBindlessSlotIsAllocated) {
|
||||
auto &neoDevice = pClDevice->getDevice();
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->memoryOperationsInterface =
|
||||
std::make_unique<NEO::MockMemoryOperations>();
|
||||
|
||||
auto bindlessHeapHelper = new MockBindlesHeapsHelper(&neoDevice, false);
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapHelper);
|
||||
|
||||
ProgramInfo programInfo;
|
||||
|
||||
char globalConstantsData[128] = {};
|
||||
programInfo.globalConstants.initData = globalConstantsData;
|
||||
programInfo.globalConstants.size = sizeof(globalConstantsData);
|
||||
|
||||
char globalVariablesData[128] = {};
|
||||
programInfo.globalVariables.initData = globalVariablesData;
|
||||
programInfo.globalVariables.size = sizeof(globalVariablesData);
|
||||
|
||||
auto kernelInfo1 = std::make_unique<KernelInfo>();
|
||||
kernelInfo1->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Bindful;
|
||||
auto kernelInfo2 = std::make_unique<KernelInfo>();
|
||||
kernelInfo1->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
programInfo.kernelInfos.push_back(kernelInfo1.release());
|
||||
programInfo.kernelInfos.push_back(kernelInfo2.release());
|
||||
|
||||
std::unique_ptr<WhiteBox<NEO::LinkerInput>> mockLinkerInput = std::make_unique<WhiteBox<NEO::LinkerInput>>();
|
||||
programInfo.linkerInput = std::move(mockLinkerInput);
|
||||
this->pProgram->processProgramInfo(programInfo, *pClDevice);
|
||||
|
||||
ASSERT_NE(nullptr, pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex()));
|
||||
ASSERT_NE(nullptr, pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex()));
|
||||
|
||||
auto globalConstantsAlloc = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex());
|
||||
auto ssInHeap1 = globalConstantsAlloc->getBindlessInfo();
|
||||
|
||||
EXPECT_NE(nullptr, ssInHeap1.heapAllocation);
|
||||
|
||||
auto globalVariablesAlloc = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex());
|
||||
auto ssInHeap2 = globalVariablesAlloc->getBindlessInfo();
|
||||
|
||||
EXPECT_NE(nullptr, ssInHeap2.heapAllocation);
|
||||
}
|
||||
|
||||
TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndGlobalConstantsMemorySurfaceWhenProcessProgramInfoAndSSAllocationFailsThenGlobalConstantsSurfaceBindlessSlotIsNotAllocatedAndReturnOutOfHostMemory) {
|
||||
auto &neoDevice = pClDevice->getDevice();
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->memoryOperationsInterface =
|
||||
std::make_unique<NEO::MockMemoryOperations>();
|
||||
|
||||
auto bindlessHeapHelper = new MockBindlesHeapsHelper(&neoDevice, false);
|
||||
bindlessHeapHelper->failAllocateSS = true;
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapHelper);
|
||||
|
||||
ProgramInfo programInfo;
|
||||
|
||||
char globalConstantsData[128] = {};
|
||||
programInfo.globalConstants.initData = globalConstantsData;
|
||||
programInfo.globalConstants.size = sizeof(globalConstantsData);
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
programInfo.kernelInfos.push_back(kernelInfo.release());
|
||||
|
||||
std::unique_ptr<WhiteBox<NEO::LinkerInput>> mockLinkerInput = std::make_unique<WhiteBox<NEO::LinkerInput>>();
|
||||
programInfo.linkerInput = std::move(mockLinkerInput);
|
||||
auto ret = this->pProgram->processProgramInfo(programInfo, *pClDevice);
|
||||
EXPECT_EQ(ret, CL_OUT_OF_HOST_MEMORY);
|
||||
|
||||
auto globalConstantsAlloc = pProgram->getConstantSurface(pContext->getDevice(0)->getRootDeviceIndex());
|
||||
ASSERT_NE(nullptr, globalConstantsAlloc);
|
||||
|
||||
auto ssInHeap = globalConstantsAlloc->getBindlessInfo();
|
||||
EXPECT_EQ(nullptr, ssInHeap.heapAllocation);
|
||||
}
|
||||
|
||||
TEST_F(ProgramDataBindlessTest, givenBindlessKernelAndGlobalVariablesMemorySurfaceWhenProcessProgramInfoAndSSAllocationFailsThenGlobalVariablesSurfaceBindlessSlotIsNotAllocatedAndReturnOutOfHostMemory) {
|
||||
auto &neoDevice = pClDevice->getDevice();
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->memoryOperationsInterface =
|
||||
std::make_unique<NEO::MockMemoryOperations>();
|
||||
|
||||
auto bindlessHeapHelper = new MockBindlesHeapsHelper(&neoDevice, false);
|
||||
bindlessHeapHelper->failAllocateSS = true;
|
||||
neoDevice.getExecutionEnvironment()->rootDeviceEnvironments[neoDevice.getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapHelper);
|
||||
|
||||
ProgramInfo programInfo;
|
||||
|
||||
char globalVariablesData[128] = {};
|
||||
programInfo.globalVariables.initData = globalVariablesData;
|
||||
programInfo.globalVariables.size = sizeof(globalVariablesData);
|
||||
|
||||
auto kernelInfo = std::make_unique<KernelInfo>();
|
||||
kernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::BindlessAndStateless;
|
||||
|
||||
programInfo.kernelInfos.push_back(kernelInfo.release());
|
||||
|
||||
std::unique_ptr<WhiteBox<NEO::LinkerInput>> mockLinkerInput = std::make_unique<WhiteBox<NEO::LinkerInput>>();
|
||||
programInfo.linkerInput = std::move(mockLinkerInput);
|
||||
auto ret = this->pProgram->processProgramInfo(programInfo, *pClDevice);
|
||||
EXPECT_EQ(ret, CL_OUT_OF_HOST_MEMORY);
|
||||
|
||||
auto globalVariablesAlloc = pProgram->getGlobalSurface(pContext->getDevice(0)->getRootDeviceIndex());
|
||||
ASSERT_NE(nullptr, globalVariablesAlloc);
|
||||
|
||||
auto ssInHeap = globalVariablesAlloc->getBindlessInfo();
|
||||
EXPECT_EQ(nullptr, ssInHeap.heapAllocation);
|
||||
}
|
||||
|
||||
TEST_F(ProgramDataTest, givenConstantAllocationThatIsInUseByGpuWhenProgramIsBeingDestroyedThenItIsAddedToTemporaryAllocationList) {
|
||||
|
||||
setupConstantAllocation();
|
||||
|
|
Loading…
Reference in New Issue