feature: bindless global allocator with scratch

- allocate SSH in cmdContainer when scratch allocation used with
private heaps
- scratch SurfaceStates are addressed relative to
SurfaceStateBaseAddress and have to be placed on SSH
- remove not used SCRATCH_SSH heap type from bindelssHeapHelper

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2023-07-31 14:33:30 +00:00 committed by Compute-Runtime-Automation
parent 856e9f00f3
commit bcba74f839
11 changed files with 171 additions and 115 deletions

View File

@ -110,6 +110,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
NEO::IndirectHeap *ssh = nullptr;
NEO::IndirectHeap *dsh = nullptr;
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::PrivateHeaps) && (commandListPerThreadScratchSize != 0 || commandListPerThreadPrivateScratchSize != 0)) {
commandContainer.prepareBindfulSsh();
}
if ((this->immediateCmdListHeapSharing || this->stateBaseAddressTracking) &&
(this->cmdListHeapAddressModel == NEO::HeapAddressModel::PrivateHeaps)) {
auto kernelInfo = kernelImmutableData->getKernelInfo();
@ -139,8 +146,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
ssh = sshReserveArgs.indirectHeapReservation;
dsh = dshReserveArgs.indirectHeapReservation;
}
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
auto kernelPreemptionMode = obtainKernelPreemptionMode(kernel);

View File

@ -148,10 +148,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
}
if (NEO::ApiSpecificConfig::getGlobalBindlessHeapConfiguration()) {
scratchController->programBindlessSurfaceStateForScratch(device->getNEODevice()->getBindlessHeapsHelper(), perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState, csr);
}
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
if (scratchAllocation != nullptr) {
csr->makeResident(*scratchAllocation);

View File

@ -17,9 +17,32 @@
#include <sstream>
const char *source = R"===(
typedef ulong16 TYPE;
__attribute__((reqd_work_group_size(32, 1, 1))) // force LWS to 32
__attribute__((intel_reqd_sub_group_size(16))) // force SIMD to 16
__kernel void kernel_copy(__global char *dst, __global char *src){
uint gid = get_global_id(0);
dst[gid] = src[gid];
__local TYPE locMem[32];
{
size_t lid = get_local_id(0);
size_t gid = get_global_id(0);
TYPE res1 = (TYPE)(src[gid * 3]);
TYPE res2 = (TYPE)(src[gid * 3 + 1]);
TYPE res3 = (TYPE)(src[gid * 3 + 2]);
locMem[lid] = res1;
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_GLOBAL_MEM_FENCE);
TYPE res = (locMem[src[gid]] * res3) * res2 + res1;
src[0] += (char)res[lid];
}
barrier(CLK_GLOBAL_MEM_FENCE);
src[0] = dst[0];
}
)===";

View File

@ -15,6 +15,7 @@
#include "shared/source/utilities/software_tags_manager.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
#include "shared/test/common/mocks/mock_compilers.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -1146,7 +1147,7 @@ HWTEST_F(CmdlistAppendLaunchKernelTests, givenKernelWithoutImplicitArgsWhenAppen
EXPECT_EQ(indirectHeap->getUsed(), sizeCrossThreadData + sizePerThreadDataForWholeGroup);
}
HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWitchScratchAndPrivateWhenAppendLaunchKernelThenCmdListHasCorrectPrivateAndScratchSizesSet, IsAtLeastXeHpCore) {
HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWithScratchAndPrivateWhenAppendLaunchKernelThenCmdListHasCorrectPrivateAndScratchSizesSet, IsAtLeastXeHpCore) {
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
@ -1181,6 +1182,88 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWitchScratchAndPrivateWhenA
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(), static_cast<uint32_t>(0x200));
}
HWTEST2_F(CmdlistAppendLaunchKernelTests, givenGlobalBindlessAllocatorAndKernelWithPrivateScratchWhenAppendLaunchKernelThenCmdContainerHasBindfulSSHAllocated, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
auto bindlessHeapsHelper = std::make_unique<MockBindlesHeapsHelper>(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
execEnv->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release());
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = 0x40;
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());
ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernel->initialize(&kernelDesc);
EXPECT_FALSE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
EXPECT_EQ(nullptr, kernel->getImplicitArgs());
kernel->setGroupSize(4, 5, 6);
kernel->setGroupCount(3, 2, 1);
kernel->setGlobalOffsetExp(1, 2, 3);
kernel->patchGlobalOffset();
ze_result_t result{};
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(nullptr, commandList->getCmdContainer().getIndirectHeap(HeapType::SURFACE_STATE));
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, commandList->getCmdContainer().getIndirectHeap(HeapType::SURFACE_STATE));
}
HWTEST2_F(CmdlistAppendLaunchKernelTests, givenGlobalBindlessAllocatorAndKernelWithScratchWhenAppendLaunchKernelThenCmdContainerHasBindfulSSHAllocated, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
auto bindlessHeapsHelper = std::make_unique<MockBindlesHeapsHelper>(neoDevice->getMemoryManager(), neoDevice->getNumGenericSubDevices() > 1, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
execEnv->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(bindlessHeapsHelper.release());
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x40;
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());
ze_kernel_desc_t kernelDesc{ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernel->initialize(&kernelDesc);
EXPECT_FALSE(kernel->getKernelDescriptor().kernelAttributes.flags.requiresImplicitArgs);
EXPECT_EQ(nullptr, kernel->getImplicitArgs());
kernel->setGroupSize(4, 5, 6);
kernel->setGroupCount(3, 2, 1);
kernel->setGlobalOffsetExp(1, 2, 3);
kernel->patchGlobalOffset();
ze_result_t result{};
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, result));
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(nullptr, commandList->getCmdContainer().getIndirectHeap(HeapType::SURFACE_STATE));
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, commandList->getCmdContainer().getIndirectHeap(HeapType::SURFACE_STATE));
}
HWTEST_F(CmdlistAppendLaunchKernelTests, whenEncodingWorkDimForIndirectDispatchThenSizeIsProperlyEstimated) {
Mock<::L0::KernelImp> kernel;

View File

@ -916,65 +916,6 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
scratch->scratchAllocation = nullptr;
}
HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenBindlessEnabledThenHandleScratchSpaceCallsProgramBindlessSurfaceStateForScratch, Platforms) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater {
public:
bool programHeapsCalled = false;
NEO::MockGraphicsAllocation alloc;
MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex,
NEO::ExecutionEnvironment &environment,
InternalAllocationStorage &allocationStorage) : NEO::ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {}
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::CommandStreamReceiver *csr) override {
programHeapsCalled = true;
}
NEO::GraphicsAllocation *getScratchSpaceAllocation() override {
return &alloc;
}
protected:
};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
NEO::ExecutionEnvironment *execEnv = static_cast<NEO::ExecutionEnvironment *>(device->getExecEnvironment());
std::unique_ptr<ScratchSpaceController> scratchController = std::make_unique<MockScratchSpaceControllerXeHPAndLater>(device->getRootDeviceIndex(),
*execEnv,
*csr.getInternalAllocationStorage());
const ze_command_queue_desc_t desc = {};
std::unique_ptr<L0::CommandQueue> commandQueue = std::make_unique<MockCommandQueueHw<gfxCoreFamily>>(device, &csr, &desc);
auto commandQueueHw = static_cast<MockCommandQueueHw<gfxCoreFamily> *>(commandQueue.get());
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
NEO::ResidencyContainer residency;
NEO::HeapContainer heapContainer;
// scratch part
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
EXPECT_TRUE(static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get())->programHeapsCalled);
// private part
static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get())->programHeapsCalled = false;
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x0, 0x1000);
EXPECT_TRUE(static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get())->programHeapsCalled);
}
HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorrectlyPatched, IsAtLeastXeHpCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;

View File

@ -111,11 +111,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::calculateNewGSH() {
uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() {
uint64_t scratchAddress = 0u;
if (scratchAllocation || privateScratchAllocation) {
if (ApiSpecificConfig::getGlobalBindlessHeapConfiguration()) {
scratchAddress = bindlessSS.surfaceStateOffset;
} else {
scratchAddress = static_cast<uint64_t>(getOffsetToSurfaceState(slotId + sshOffset));
}
scratchAddress = static_cast<uint64_t>(getOffsetToSurfaceState(slotId + sshOffset));
}
return scratchAddress;
}
@ -145,11 +141,13 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B
bool scratchSurfaceDirty = false;
prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::SCRATCH_SSH);
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::SPECIAL_SSH);
programSurfaceStateAtPtr(bindlessSS.ssPtr);
vfeStateDirty = true;
}
csr->makeResident(*bindlessSS.heapAllocation);
if (bindlessSS.heapAllocation) {
csr->makeResident(*bindlessSS.heapAllocation);
}
}
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize,

View File

@ -25,18 +25,17 @@ class BindlessHeapsHelper {
SPECIAL_SSH = 0,
GLOBAL_SSH,
GLOBAL_DSH,
SCRATCH_SSH,
NUM_HEAP_TYPES
};
BindlessHeapsHelper(MemoryManager *memManager, bool isMultiOsContextCapable, const uint32_t rootDeviceIndex, DeviceBitfield deviceBitfield);
~BindlessHeapsHelper();
MOCKABLE_VIRTUAL ~BindlessHeapsHelper();
BindlessHeapsHelper(const BindlessHeapsHelper &) = delete;
BindlessHeapsHelper &operator=(const BindlessHeapsHelper &) = delete;
GraphicsAllocation *getHeapAllocation(size_t heapSize, size_t alignment, bool allocInFrontWindow);
SurfaceStateInHeapInfo allocateSSInHeap(size_t ssSize, GraphicsAllocation *surfaceAllocation, BindlesHeapType heapType);
MOCKABLE_VIRTUAL SurfaceStateInHeapInfo allocateSSInHeap(size_t ssSize, GraphicsAllocation *surfaceAllocation, BindlesHeapType heapType);
uint64_t getGlobalHeapsBase();
void *getSpaceInHeap(size_t ssSize, BindlesHeapType heapType);
uint32_t getDefaultBorderColorOffset();

View File

@ -20,6 +20,14 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
scratchSsh = surfaceStateHeaps[BindlesHeapType::SPECIAL_SSH].get();
globalDsh = surfaceStateHeaps[BindlesHeapType::SPECIAL_SSH].get();
}
SurfaceStateInHeapInfo allocateSSInHeap(size_t ssSize, GraphicsAllocation *surfaceAllocation, BindlesHeapType heapType) override {
if (failAllocateSS) {
return SurfaceStateInHeapInfo{};
}
return BaseClass::allocateSSInHeap(ssSize, surfaceAllocation, heapType);
}
using BindlesHeapType = BindlessHeapsHelper::BindlesHeapType;
using BaseClass::borderColorStates;
using BaseClass::globalBindlessDsh;
@ -35,4 +43,5 @@ class MockBindlesHeapsHelper : public BindlessHeapsHelper {
IndirectHeap *globalSsh;
IndirectHeap *scratchSsh;
IndirectHeap *globalDsh;
bool failAllocateSS = false;
};

View File

@ -192,15 +192,6 @@ TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalHe
EXPECT_LT(ssInHeapInfo.surfaceStateOffset, frontWindowSize);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInScratchHeapThenOffsetLessThanFrontWindowSize) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getMemoryManager(), false, rootDeviceIndex, devBitfield);
MockGraphicsAllocation alloc;
size_t size = 0x40;
auto ssInHeapInfo = bindlessHeapHelper->allocateSSInHeap(size, &alloc, BindlessHeapsHelper::BindlesHeapType::SCRATCH_SSH);
auto frontWindowSize = GfxPartition::externalFrontWindowPoolSize;
EXPECT_LT(ssInHeapInfo.surfaceStateOffset, frontWindowSize);
}
TEST_F(BindlessHeapsHelperTests, givenBindlessHeapHelperWhenAllocateSsInGlobalDshThenOffsetGreaterOrEqualFrontWindowSize) {
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(getMemoryManager(), false, rootDeviceIndex, devBitfield);
MockGraphicsAllocation alloc;

View File

@ -8,6 +8,7 @@
#include "shared/source/command_stream/scratch_space_controller_base.h"
#include "shared/source/helpers/blit_properties.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -49,7 +50,7 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase {
using ScratchComtrolerTests = Test<DeviceFixture>;
HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapsCalledThenThenProgramHeapsCalled) {
HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapsCalledThenProgramHeapsCalled) {
MockCsrHw2<FamilyType> csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*pDevice->getDefaultEngine().osContext);
@ -67,8 +68,8 @@ HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapsCalledThenThenP
EXPECT_TRUE(static_cast<MockScratchSpaceControllerBase *>(scratchController.get())->programHeapsCalled);
}
HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapBindlessCalledThenThenProgramBindlessSurfaceStateForScratchCalled) {
MockCsrHw2<FamilyType> csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
HWTEST_F(ScratchComtrolerTests, givenNullptrBindlessHeapHelperWhenProgramBindlessSurfaceStateForScratchCalledThenMakeResidentIsNotCalled) {
MockCommandStreamReceiver csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*pDevice->getDefaultEngine().osContext);
@ -83,4 +84,5 @@ HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapBindlessCalledTh
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_TRUE(static_cast<MockScratchSpaceControllerBase *>(scratchController.get())->programBindlessSurfaceStateForScratchCalled);
}
EXPECT_EQ(0u, csr.makeResidentCalledTimes);
}

View File

@ -10,6 +10,7 @@
#include "shared/source/helpers/blit_properties.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
@ -49,25 +50,9 @@ class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHP
bool scratchDirty = false;
};
using ScratchComtrolerTests = Test<DeviceFixture>;
using ScratchControllerTests = Test<DeviceFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenBindlessModeOnWhenGetPatchedOffsetCalledThenBindlessOffsetReturned) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
MockCsrHw2<FamilyType> csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*pDevice->getDefaultEngine().osContext);
ExecutionEnvironment *execEnv = static_cast<ExecutionEnvironment *>(pDevice->getExecutionEnvironment());
std::unique_ptr<MockScratchSpaceControllerXeHPAndLater> scratchController = std::make_unique<MockScratchSpaceControllerXeHPAndLater>(pDevice->getRootDeviceIndex(),
*execEnv,
*csr.getInternalAllocationStorage());
uint64_t bindlessOffset = 0x4000;
scratchController->bindlessSS.surfaceStateOffset = bindlessOffset;
EXPECT_EQ(scratchController->getScratchPatchAddress(), bindlessOffset);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenDirtyScratchAllocationOnWhenWhenProgramBindlessHeapThenProgramSurfaceStateAtPtrCalled) {
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenDirtyScratchAllocationWhenProgramBindlessHeapThenProgramSurfaceStateAtPtrCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
MockCommandStreamReceiver csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
@ -87,7 +72,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenDirtyScratchAllocationO
EXPECT_TRUE(scratchController->wasProgramSurfaceStateAtPtrCalled);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenNotDirtyScratchAllocationOnWhenWhenProgramBindlessHeapThenProgramSurfaceStateAtPtrWasNotCalled) {
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenNotDirtyScratchAllocationWhenProgramBindlessHeapThenProgramSurfaceStateAtPtrIsNotCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
MockCommandStreamReceiver csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
@ -103,12 +88,35 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenNotDirtyScratchAllocati
bool frontEndStateDirty = false;
scratchController->scratchDirty = false;
scratchController->bindlessSS = bindlessHeapHelper->allocateSSInHeap(0x1000, nullptr, BindlessHeapsHelper::SCRATCH_SSH);
scratchController->bindlessSS = bindlessHeapHelper->allocateSSInHeap(0x1000, nullptr, BindlessHeapsHelper::SPECIAL_SSH);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_GT(csr.makeResidentCalledTimes, 0u);
EXPECT_FALSE(scratchController->wasProgramSurfaceStateAtPtrCalled);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenPrivateScratchEnabledWhenWhenProgramBindlessHeapSurfaceThenSSHasDoubleSize) {
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenNoBindlessSSWhenProgramBindlessHeapThenMakeResidentIsNotCalled) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
MockCommandStreamReceiver csr(*pDevice->getExecutionEnvironment(), 0, pDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*pDevice->getDefaultEngine().osContext);
ExecutionEnvironment *execEnv = static_cast<ExecutionEnvironment *>(pDevice->getExecutionEnvironment());
std::unique_ptr<MockScratchSpaceControllerXeHPAndLater> scratchController = std::make_unique<MockScratchSpaceControllerXeHPAndLater>(pDevice->getRootDeviceIndex(),
*execEnv,
*csr.getInternalAllocationStorage());
auto bindlessHeapHelper = std::make_unique<MockBindlesHeapsHelper>(pDevice->getMemoryManager(), pDevice->getNumGenericSubDevices() > 1, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
scratchController->scratchDirty = false;
bindlessHeapHelper->failAllocateSS = true;
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_EQ(csr.makeResidentCalledTimes, 0u);
EXPECT_FALSE(scratchController->wasProgramSurfaceStateAtPtrCalled);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenPrivateScratchEnabledWhenProgramBindlessHeapSurfaceThenSSHasDoubleSize) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
DebugManager.flags.EnablePrivateScratchSlot1.set(1);
@ -124,13 +132,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenPrivateScratchEnabledWh
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
scratchController->scratchDirty = true;
auto usedBefore = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SCRATCH_SSH)->getUsed();
auto usedBefore = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SPECIAL_SSH)->getUsed();
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
auto usedAfter = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SCRATCH_SSH)->getUsed();
auto usedAfter = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SPECIAL_SSH)->getUsed();
EXPECT_EQ(usedAfter - usedBefore, 2 * scratchController->singleSurfaceStateSize);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenPrivateScratchDisabledWhenWhenProgramBindlessHeapSurfaceThenSSHasSingleSize) {
HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenPrivateScratchDisabledWhenProgramBindlessHeapSurfaceThenSSHasSingleSize) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
DebugManager.flags.EnablePrivateScratchSlot1.set(0);
@ -146,8 +154,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchComtrolerTests, givenPrivateScratchDisabledW
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
scratchController->scratchDirty = true;
auto usedBefore = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SCRATCH_SSH)->getUsed();
auto usedBefore = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SPECIAL_SSH)->getUsed();
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
auto usedAfter = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SCRATCH_SSH)->getUsed();
auto usedAfter = bindlessHeapHelper->getHeap(BindlessHeapsHelper::SPECIAL_SSH)->getUsed();
EXPECT_EQ(usedAfter - usedBefore, scratchController->singleSurfaceStateSize);
}