fix: scratch controller residency

Related-To: HSD-18039519400

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-08-09 13:13:51 +00:00
committed by Compute-Runtime-Automation
parent bc385fa1b9
commit d76ac1d1de
21 changed files with 47 additions and 73 deletions

View File

@@ -125,7 +125,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapC
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
if (perThreadScratchSpaceSlot0Size > 0) {
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSlot0Size, 0u, csr->peekTaskCount(),
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSlot0Size, 0u,
csr->getOsContext(), gsbaState, frontEndState);
auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation();
csr->makeResident(*scratchAllocation);

View File

@@ -138,7 +138,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
if (perThreadScratchSpaceSlot0Size > 0 || perThreadScratchSpaceSlot1Size > 0) {
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
scratchController->setRequiredScratchSpace(globalStatelessAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
scratchController->setRequiredScratchSpace(globalStatelessAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size,
csr->getOsContext(), gsbaState, frontEndState);
}
@@ -148,7 +148,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
if (sshHeaps.size() > 0) {
uint32_t offsetIndex = gfxCoreHelper.getMaxPtssIndex(productHelper) * csr->getOsContext().getEngineType() + 1u;
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size,
csr->getOsContext(), gsbaState, frontEndState);
}

View File

@@ -842,7 +842,6 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
@@ -905,7 +904,6 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {

View File

@@ -108,10 +108,10 @@ inline void HardwareInterface<GfxFamily>::programWalker(
auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size();
auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size();
auto *defaultCsr = device.getDefaultEngine().commandStreamReceiver;
uint64_t scratchAddress = 0u;
EncodeDispatchKernel<GfxFamily>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, &ssh, *defaultCsr);
EncodeDispatchKernel<GfxFamily>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, &ssh, queueCsr);
auto interfaceDescriptor = &walkerCmd.getInterfaceDescriptor();
@@ -154,7 +154,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
}
if (partitionWalker) {
const uint64_t workPartitionAllocationGpuVa = defaultCsr->getWorkPartitionAllocationGpuAddress();
const uint64_t workPartitionAllocationGpuVa = queueCsr.getWorkPartitionAllocationGpuAddress();
uint32_t partitionCount = 0u;
RequiredPartitionDim requiredPartitionDim = kernel.usesImages() ? RequiredPartitionDim::x : RequiredPartitionDim::none;

View File

@@ -597,7 +597,6 @@ struct MockScratchController : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
@@ -616,7 +615,6 @@ struct MockScratchController : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
@@ -624,7 +622,6 @@ struct MockScratchController : public ScratchSpaceController {
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -286,7 +286,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatch
bool vfeStateDirty;
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(DeviceBitfield(8)));
mockCsr->setupContext(osContext);
mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty);
mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty);
DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags();
mockCsr->requiredScratchSlot0Size = 0x200000;
@@ -307,7 +307,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatch
bool vfeStateDirty;
MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(DeviceBitfield(8)));
mockCsr->setupContext(osContext);
mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty);
mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty);
DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags();
mockCsr->requiredScratchSlot0Size = 0x200000;

View File

@@ -482,7 +482,7 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenScratchAl
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
scratchController->setRequiredScratchSpace(reinterpret_cast<void *>(0x2000), 0u, 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
scratchController->setRequiredScratchSpace(reinterpret_cast<void *>(0x2000), 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_FALSE(cfeStateDirty);
EXPECT_FALSE(stateBaseAddressDirty);
EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot0Allocation());
@@ -498,7 +498,7 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddre
bool stateBaseAddressDirty = false;
std::unique_ptr<void, std::function<decltype(alignedFree)>> surfaceHeap(alignedMalloc(0x1000, 0x1000), alignedFree);
scratchController->setRequiredScratchSpace(surfaceHeap.get(), 0u, 0x1000u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
scratchController->setRequiredScratchSpace(surfaceHeap.get(), 0u, 0x1000u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
uint64_t expectedScratchAddress = 0xAAABBBCCCDDD000ull;
auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation();

View File

@@ -113,7 +113,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenScratc
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
ASSERT_NE(nullptr, commandStreamReceiver->getScratchAllocation());
EXPECT_TRUE(cfeStateDirty);
@@ -140,7 +140,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, WhenOsCont
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, 0u, *osContext, stateBaseAddressDirty, cfeStateDirty);
commandStreamReceiver->getScratchSpaceController()->setRequiredScratchSpace(ssh, 0u, perThreadScratchSize, 0u, *osContext, stateBaseAddressDirty, cfeStateDirty);
auto allocation = commandStreamReceiver->getScratchAllocation();
EXPECT_EQ(tileMask, static_cast<uint32_t>(allocation->storageInfo.memoryBanks.to_ulong()));
alignedFree(ssh);
@@ -212,7 +212,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
scratchController->surfaceStateHeap = reinterpret_cast<char *>(0x1000);
scratchController->setRequiredScratchSpace(reinterpret_cast<void *>(0x2000), 0u, 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
scratchController->setRequiredScratchSpace(reinterpret_cast<void *>(0x2000), 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_EQ(scratchController->surfaceStateHeap, reinterpret_cast<char *>(0x2000));
EXPECT_FALSE(cfeStateDirty);
}
@@ -226,7 +226,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
bool cfeStateDirty = false;
void *oldSurfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(oldSurfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
scratchController->setRequiredScratchSpace(oldSurfaceHeap, 0u, 0x1000u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(1u, scratchController->slotId);
EXPECT_EQ(scratchController->surfaceStateHeap, oldSurfaceHeap);
@@ -237,7 +237,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType());
void *newSurfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(newSurfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
scratchController->setRequiredScratchSpace(newSurfaceHeap, 0u, 0x1000u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(1u, scratchController->slotId);
EXPECT_EQ(scratchController->surfaceStateHeap, newSurfaceHeap);
@@ -263,7 +263,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
bool stateBaseAddressDirty = false;
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x1000u, 0u, commandStreamReceiver->taskCount,
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x1000u, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(7u, scratchController->slotId);
@@ -275,7 +275,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
RENDER_SURFACE_STATE *surfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(surfaceStateBuf);
EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress());
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x2000u, 0u, commandStreamReceiver->taskCount,
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, 0x2000u, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(8u, scratchController->slotId);
@@ -301,7 +301,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
bool stateBaseAddressDirty = false;
void *surfaceHeap = alignedMalloc(0x1000, 0x1000);
scratchController->setRequiredScratchSpace(surfaceHeap, 1u, 0x1000u, 0u, commandStreamReceiver->taskCount,
scratchController->setRequiredScratchSpace(surfaceHeap, 1u, 0x1000u, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(1u, scratchController->slotId);
@@ -331,7 +331,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
@@ -362,7 +362,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
container.push_back(&heap2);
container.push_back(&heap3);
scratchController->programHeaps(container, 0u, 1u, 0u, 0u, commandStreamReceiver->getOsContext(), stateBaseAddressDirty, cfeStateDirty);
scratchController->programHeaps(container, 0u, 1u, 0u, commandStreamReceiver->getOsContext(), stateBaseAddressDirty, cfeStateDirty);
auto scratch = static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get());
EXPECT_EQ(scratch->requiredScratchSpaceCalledTimes, 1u);
@@ -462,13 +462,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
uint32_t sizeForPrivateScratch = MemoryConstants::pageSize;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
uint64_t gpuVa = scratchController->scratchSlot1Allocation->getGpuAddress();
EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress());
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch * 2, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch * 2,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
@@ -495,7 +495,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
EXPECT_EQ(0u, scratchController->getScratchPatchAddress());
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
auto expectedPatchAddress = 2 * sizeof(RENDER_SURFACE_STATE);
@@ -519,13 +519,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
uint32_t sizeForPrivateScratch = MemoryConstants::pageSize;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
uint64_t gpuVa = scratchController->scratchSlot1Allocation->getGpuAddress();
cfeStateDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_FALSE(cfeStateDirty);
@@ -547,7 +547,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
uint32_t sizeForScratch = MemoryConstants::pageSize;
scratchController->setRequiredScratchSpace(surfaceState, 0u, sizeForScratch, 0u, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, sizeForScratch, 0u,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_TRUE(cfeStateDirty);
EXPECT_EQ(nullptr, scratchController->scratchSlot1Allocation);
@@ -583,7 +583,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisab
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, MemoryConstants::pageSize, MemoryConstants::pageSize, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, MemoryConstants::pageSize, MemoryConstants::pageSize,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_EQ(0u, scratchController->scratchSlot1SizeInBytes);
EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot1Allocation());

View File

@@ -204,7 +204,7 @@ struct EncodeDispatchKernel {
static size_t getDefaultIOHAlignment();
template <bool isHeapless>
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template <bool isHeapless>
static uint64_t getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args);
template <bool isHeapless>

View File

@@ -913,7 +913,7 @@ size_t EncodeDispatchKernel<GfxFamily>::getScratchPtrOffsetOfImplicitArgs() {
template <typename Family>
template <bool isHeapless>
void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr) {
void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr) {
}
template <typename Family>

View File

@@ -22,8 +22,8 @@ template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerTyp
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &submissionCsr);
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr);
template void NEO::EncodeDispatchKernel<Family>::encodeEuSchedulingPolicy<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const KernelDescriptor &kernelDesc, int32_t defaultPipelinedThreadArbitrationPolicy);
template uint64_t NEO::EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching<false>(CommandContainer &container, EncodeDispatchKernelArgs &args);

View File

@@ -514,6 +514,7 @@ class CommandStreamReceiver {
void setPrimaryCsr(CommandStreamReceiver *primaryCsr) {
this->primaryCsr = primaryCsr;
}
CommandStreamReceiver *getPrimaryCsr() const { return primaryCsr; }
void requestPreallocation();
void releasePreallocationRequest();

View File

@@ -309,7 +309,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushImmediateTask(
0u,
this->requiredScratchSlot0Size,
this->requiredScratchSlot1Size,
this->taskCount,
*this->osContext,
checkSbaStateDirty,
checkFeStateDirty);
@@ -446,7 +445,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
0u,
requiredScratchSlot0Size,
requiredScratchSlot1Size,
this->taskCount,
*this->osContext,
stateBaseAddressDirty,
checkVfeStateDirty);

View File

@@ -44,7 +44,6 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) = 0;
@@ -63,14 +62,12 @@ class ScratchSpaceController : NonCopyableOrMovableClass {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) = 0;
virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -28,7 +28,6 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
@@ -86,7 +85,6 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
uint32_t offset,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
@@ -95,7 +93,6 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer,
void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -18,7 +18,6 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
@@ -31,14 +30,12 @@ class ScratchSpaceControllerBase : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,

View File

@@ -56,13 +56,12 @@ void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAd
uint32_t offset,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false);
bool scratchSurfaceDirty = false;
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
vfeStateDirty = true;
updateSlots = true;
@@ -133,13 +132,12 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType
void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::CommandStreamReceiver *csr) {
bool scratchSurfaceDirty = false;
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty);
if (scratchSurfaceDirty) {
bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (twoSlotScratchSpaceSupported ? 2 : 1), scratchSlot0Allocation, BindlessHeapsHelper::specialSsh);
programSurfaceStateAtPtr(bindlessSS.ssPtr);
@@ -152,7 +150,6 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B
void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,
@@ -197,13 +194,12 @@ void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContain
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) {
sshOffset = scratchSlot;
updateSlots = false;
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, osContext, stateBaseAddressDirty, vfeStateDirty);
for (uint32_t i = 1; i < heapContainer.size(); ++i) {
surfaceStateHeap = static_cast<char *>(heapContainer[i]->getUnderlyingBuffer());

View File

@@ -24,7 +24,6 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
@@ -38,14 +37,12 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override;
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
@@ -56,7 +53,6 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController {
MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation);
MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,

View File

@@ -2886,7 +2886,7 @@ struct MockRequiredScratchSpaceController : public ScratchSpaceControllerBase {
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
@@ -4685,7 +4685,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
uint8_t surfaceHeap[1000];
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, commandStreamReceiver->taskCount, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
scratchController->setRequiredScratchSpace(surfaceHeap, 0u, perThreadScratchSize, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_EQ(expectedValue, scratchController->perThreadScratchSize);
}
@@ -4702,7 +4702,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
bool cfeStateDirty = false;
bool stateBaseAddressDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u,
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
EXPECT_NE(scratchController->scratchSlot1SizeInBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);

View File

@@ -25,22 +25,20 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase {
uint32_t offset,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty) override {
ScratchSpaceControllerBase::programHeaps(heapContainer, offset, requiredPerThreadScratchSizeSlot0, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty);
ScratchSpaceControllerBase::programHeaps(heapContainer, offset, requiredPerThreadScratchSizeSlot0, requiredPerThreadPrivateScratchSize, osContext, stateBaseAddressDirty, vfeStateDirty);
programHeapsCalled = true;
}
void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadPrivateScratchSize,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &vfeStateDirty,
NEO::CommandStreamReceiver *csr) override {
ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSizeSlot0, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, csr);
ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSizeSlot0, requiredPerThreadPrivateScratchSize, osContext, stateBaseAddressDirty, vfeStateDirty, csr);
programBindlessSurfaceStateForScratchCalled = true;
}
ResidencyContainer residencyContainer;
@@ -63,7 +61,7 @@ HWTEST_F(ScratchComtrolerTests, givenCommandQueueWhenProgramHeapsCalledThenProgr
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
HeapContainer heapContainer;
scratchController->programHeaps(heapContainer, 0, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty);
scratchController->programHeaps(heapContainer, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty);
EXPECT_TRUE(static_cast<MockScratchSpaceControllerBase *>(scratchController.get())->programHeapsCalled);
}
@@ -81,7 +79,7 @@ HWTEST_F(ScratchComtrolerTests, givenNullptrBindlessHeapHelperWhenProgramBindles
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
HeapContainer heapContainer;
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
scratchController->programBindlessSurfaceStateForScratch(nullptr, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_TRUE(static_cast<MockScratchSpaceControllerBase *>(scratchController.get())->programBindlessSurfaceStateForScratchCalled);
EXPECT_EQ(0u, csr.makeResidentCalledTimes);

View File

@@ -37,7 +37,6 @@ class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHP
}
void prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
bool &scratchSurfaceDirty,
@@ -67,7 +66,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenDirtyScratchAllocation
bool gsbaStateDirty = false;
bool frontEndStateDirty = false;
scratchController->scratchDirty = true;
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_GT(csr.makeResidentCalledTimes, 0u);
EXPECT_TRUE(scratchController->wasProgramSurfaceStateAtPtrCalled);
}
@@ -89,7 +88,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenNotDirtyScratchAllocat
scratchController->scratchDirty = false;
scratchController->bindlessSS = bindlessHeapHelper->allocateSSInHeap(0x1000, nullptr, BindlessHeapsHelper::specialSsh);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_GT(csr.makeResidentCalledTimes, 0u);
EXPECT_FALSE(scratchController->wasProgramSurfaceStateAtPtrCalled);
}
@@ -111,7 +110,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenNoBindlessSSWhenProgra
scratchController->scratchDirty = false;
bindlessHeapHelper->failAllocateSS = true;
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
EXPECT_EQ(csr.makeResidentCalledTimes, 0u);
EXPECT_FALSE(scratchController->wasProgramSurfaceStateAtPtrCalled);
}
@@ -133,7 +132,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenPrivateScratchEnabledW
bool frontEndStateDirty = false;
scratchController->scratchDirty = true;
auto usedBefore = bindlessHeapHelper->getHeap(BindlessHeapsHelper::specialSsh)->getUsed();
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
auto usedAfter = bindlessHeapHelper->getHeap(BindlessHeapsHelper::specialSsh)->getUsed();
EXPECT_EQ(usedAfter - usedBefore, 2 * scratchController->singleSurfaceStateSize);
}
@@ -155,7 +154,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ScratchControllerTests, givenPrivateScratchDisabled
bool frontEndStateDirty = false;
scratchController->scratchDirty = true;
auto usedBefore = bindlessHeapHelper->getHeap(BindlessHeapsHelper::specialSsh)->getUsed();
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
scratchController->programBindlessSurfaceStateForScratch(bindlessHeapHelper.get(), 0, 0, *pDevice->getDefaultEngine().osContext, gsbaStateDirty, frontEndStateDirty, &csr);
auto usedAfter = bindlessHeapHelper->getHeap(BindlessHeapsHelper::specialSsh)->getUsed();
EXPECT_EQ(usedAfter - usedBefore, scratchController->singleSurfaceStateSize);
}