diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 832c5376d9..b5dec7d0b8 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -184,20 +184,13 @@ struct CommandList : _ze_command_list_handle_t { inline ze_command_list_handle_t toHandle() { return this; } - uint32_t getCommandListPerThreadScratchSize() const { - return commandListPerThreadScratchSize; + uint32_t getCommandListPerThreadScratchSize(uint32_t slotId) const { + return commandListPerThreadScratchSize[slotId]; } - void setCommandListPerThreadScratchSize(uint32_t size) { - commandListPerThreadScratchSize = size; - } - - uint32_t getCommandListPerThreadPrivateScratchSize() const { - return commandListPerThreadPrivateScratchSize; - } - - void setCommandListPerThreadPrivateScratchSize(uint32_t size) { - commandListPerThreadPrivateScratchSize = size; + void setCommandListPerThreadScratchSize(uint32_t slotId, uint32_t size) { + UNRECOVERABLE_IF(slotId > 1); + commandListPerThreadScratchSize[slotId] = size; } uint32_t getCommandListSLMEnable() const { @@ -390,8 +383,7 @@ struct CommandList : _ze_command_list_handle_t { NEO::HeapAddressModel cmdListHeapAddressModel = NEO::HeapAddressModel::privateHeaps; CommandListType cmdListType = CommandListType::typeRegular; - uint32_t commandListPerThreadScratchSize = 0u; - uint32_t commandListPerThreadPrivateScratchSize = 0u; + uint32_t commandListPerThreadScratchSize[2]{}; uint32_t partitionCount = 1; uint32_t defaultMocsIndex = 0; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 752d01b419..c692de05d1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -117,8 +117,8 @@ ze_result_t CommandListCoreFamily::reset() { unifiedMemoryControls.indirectSharedAllocationsAllowed = false; unifiedMemoryControls.indirectDeviceAllocationsAllowed = false; commandListPreemptionMode = device->getDevicePreemptionMode(); - commandListPerThreadScratchSize = 0u; - commandListPerThreadPrivateScratchSize = 0u; + commandListPerThreadScratchSize[0] = 0u; + commandListPerThreadScratchSize[1] = 0u; requiredStreamState.resetState(); finalStreamState.resetState(); containsAnyKernel = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 46ab5e403a..4dc1402d61 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -198,7 +198,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushImmedia } } - this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); + this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u)); } NEO::ImmediateDispatchFlags dispatchFlags{ @@ -259,7 +259,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate::flushRegular if (kernelOperation) { this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags); - this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize()); + this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u)); if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) { ssh = this->csr->getGlobalStatelessHeap(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl index 6525587f31..95c1b24bc1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_skl_to_tgllp.inl @@ -109,9 +109,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } appendEventForProfiling(event, true, false); - auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(), + auto perThreadScratchSize = std::max(this->getCommandListPerThreadScratchSize(0u), kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); - this->setCommandListPerThreadScratchSize(perThreadScratchSize); + this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize); auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0); this->setCommandListSLMEnable(slmEnable); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 017d9287fb..e9c590b1de 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -123,10 +123,15 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K ", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ, ", SIMD: ", kernelInfo->getMaxSimdSize()); - commandListPerThreadScratchSize = std::max(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); - commandListPerThreadPrivateScratchSize = std::max(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); + bool needScratchSpace = false; + for (uint32_t slotId = 0u; slotId < 2; slotId++) { + commandListPerThreadScratchSize[slotId] = std::max(commandListPerThreadScratchSize[slotId], kernelDescriptor.kernelAttributes.perThreadScratchSize[slotId]); + if (commandListPerThreadScratchSize[slotId] > 0) { + needScratchSpace = true; + } + } - if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && (commandListPerThreadScratchSize != 0 || commandListPerThreadPrivateScratchSize != 0)) { + if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && needScratchSpace) { commandContainer.prepareBindfulSsh(); } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index 42027a80c7..354e4f31b1 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -34,7 +34,7 @@ struct CommandQueueHw : public CommandQueueImp { void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed, NEO::StreamProperties *streamProperties); size_t estimateStateBaseAddressCmdSize(); - MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties); + MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties); MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool &isFrontEndStateDirty, int32_t engineInstanced, CommandList *commandList, NEO::StreamProperties &csrState, @@ -51,8 +51,8 @@ struct CommandQueueHw : public CommandQueueImp { MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, - uint32_t perThreadScratchSpaceSize, - uint32_t perThreadPrivateScratchSize); + uint32_t perThreadScratchSpaceSlot0Size, + uint32_t perThreadScratchSpaceSlot1Size); bool getPreemptionCmdProgramming() override; void patchCommands(CommandList &commandList, uint64_t scratchAddress); @@ -84,8 +84,8 @@ struct CommandQueueHw : public CommandQueueImp { NEO::PreemptionMode preemptionMode{}; NEO::PreemptionMode statePreemption{}; - uint32_t perThreadScratchSpaceSize = 0; - uint32_t perThreadPrivateScratchSize = 0; + uint32_t perThreadScratchSpaceSlot0Size = 0; + uint32_t perThreadScratchSpaceSlot1Size = 0; int32_t engineInstanced = -1; UnifiedMemoryControls unifiedMemoryControls{}; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 21c8ce633c..7e64e56e87 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -333,14 +333,14 @@ void CommandQueueHw::programFrontEndAndClearDirtyFlag( } auto scratchSpaceController = this->csr->getScratchSpaceController(); programFrontEnd(scratchSpaceController->getScratchPatchAddress(), - scratchSpaceController->getPerThreadScratchSpaceSize(), + scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(), cmdStream, csrState); ctx.frontEndStateDirty = false; } template -void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &cmdStream, NEO::StreamProperties &streamProperties) { +void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &cmdStream, NEO::StreamProperties &streamProperties) { UNRECOVERABLE_IF(csr == nullptr); auto &hwInfo = device->getHwInfo(); auto &gfxCoreHelper = device->getGfxCoreHelper(); @@ -349,7 +349,7 @@ void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, uin auto pVfeState = NEO::PreambleHelper::getSpaceForVfeState(&cmdStream, hwInfo, engineGroupType); NEO::PreambleHelper::programVfeState(pVfeState, device->getNEODevice()->getRootDeviceEnvironment(), - perThreadScratchSpaceSize, + perThreadScratchSpaceSlot0Size, scratchAddress, device->getMaxNumHwThreads(), streamProperties); @@ -543,11 +543,11 @@ void CommandQueueHw::setupCmdListsAndContextParams( auto &commandContainer = commandList->getCmdContainer(); if (!isCopyOnlyCommandQueue) { - ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); - ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); + ctx.perThreadScratchSpaceSlot0Size = std::max(ctx.perThreadScratchSpaceSlot0Size, commandList->getCommandListPerThreadScratchSize(0u)); + ctx.perThreadScratchSpaceSlot1Size = std::max(ctx.perThreadScratchSpaceSlot1Size, commandList->getCommandListPerThreadScratchSize(1u)); if (commandList->getCmdListHeapAddressModel() == NEO::HeapAddressModel::privateHeaps) { - if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { + if (commandList->getCommandListPerThreadScratchSize(0u) != 0 || commandList->getCommandListPerThreadScratchSize(1u) != 0) { if (commandContainer.getIndirectHeap(NEO::HeapType::surfaceState) != nullptr) { heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::surfaceState)->getGraphicsAllocation()); } @@ -668,7 +668,7 @@ void CommandQueueHw::handleScratchSpaceAndUpdateGSBAStateDirtyFla handleScratchSpace(this->heapContainer, scratchController, ctx.gsbaStateDirty, ctx.frontEndStateDirty, - ctx.perThreadScratchSpaceSize, ctx.perThreadPrivateScratchSize); + ctx.perThreadScratchSpaceSlot0Size, ctx.perThreadScratchSpaceSlot1Size); ctx.gsbaStateDirty |= this->csr->getGSBAStateDirty(); ctx.scratchGsba = scratchController->calculateNewGSH(); @@ -1012,7 +1012,7 @@ void CommandQueueHw::programOneCmdListBatchBufferStartSecondaryBa auto scratchSpaceController = this->csr->getScratchSpaceController(); ctx.cmdListBeginState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(returnPoints[returnPointIdx].configSnapshot.frontEndState); programFrontEnd(scratchSpaceController->getScratchPatchAddress(), - scratchSpaceController->getPerThreadScratchSpaceSize(), + scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(), commandStream, ctx.cmdListBeginState); NEO::EncodeBatchBufferStartOrEnd::programBatchBufferStart(&commandStream, diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl index d0ea897d2e..3359d43fd0 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_skl_to_tgllp.inl @@ -122,12 +122,12 @@ template void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &heapContainer, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, - uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) { + uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) { - if (perThreadScratchSpaceSize > 0) { - scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(), + if (perThreadScratchSpaceSlot0Size > 0) { + scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSlot0Size, 0u, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState); - auto scratchAllocation = scratchController->getScratchSpaceAllocation(); + auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation(); csr->makeResident(*scratchAllocation); } } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index 0d32dafb33..fa8f378a06 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -137,28 +137,28 @@ template void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &sshHeaps, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, - uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) { - if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) { + uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) { + if (perThreadScratchSpaceSlot0Size > 0 || perThreadScratchSpaceSlot1Size > 0) { if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) { auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation(); - scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(), + scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState); } if (sshHeaps.size() > 0) { uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u; - scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(), + scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState); } - auto scratchAllocation = scratchController->getScratchSpaceAllocation(); - if (scratchAllocation != nullptr) { - csr->makeResident(*scratchAllocation); + auto scratch0Allocation = scratchController->getScratchSpaceSlot0Allocation(); + if (scratch0Allocation != nullptr) { + csr->makeResident(*scratch0Allocation); } - auto privateScratchAllocation = scratchController->getPrivateScratchSpaceAllocation(); + auto scratch1Allocation = scratchController->getScratchSpaceSlot1Allocation(); - if (privateScratchAllocation != nullptr) { - csr->makeResident(*privateScratchAllocation); + if (scratch1Allocation != nullptr) { + csr->makeResident(*scratch1Allocation); } } } diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 45cba094e0..ec556d47fe 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -899,7 +899,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize; pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize; pKernelProperties->privateMemSize = gfxCoreHelper.getKernelPrivateMemSize(kernelDescriptor); - pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; + pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.spillFillScratchMemorySize; memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE); memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE); diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp index f3e3aa64fd..e51320465a 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp @@ -21,11 +21,11 @@ namespace ult { ModuleImmutableDataFixture::MockImmutableMemoryManager::MockImmutableMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast(executionEnvironment)) {} ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize) : MockImmutableData(perHwThreadPrivateMemorySize, 0, 0) {} -ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSize, uint32_t perThreaddPrivateScratchSize) { +ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSlot0Size, uint32_t perThreadScratchSlot1Size) { mockKernelDescriptor = new NEO::KernelDescriptor; mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize; - mockKernelDescriptor->kernelAttributes.perThreadScratchSize[0] = perThreadScratchSize; - mockKernelDescriptor->kernelAttributes.perThreadScratchSize[1] = perThreaddPrivateScratchSize; + mockKernelDescriptor->kernelAttributes.perThreadScratchSize[0] = perThreadScratchSlot0Size; + mockKernelDescriptor->kernelAttributes.perThreadScratchSize[1] = perThreadScratchSlot1Size; kernelDescriptor = mockKernelDescriptor; mockKernelInfo = new NEO::KernelInfo; diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 1a48598863..fa825d5766 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -38,7 +38,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture { using KernelImmutableData::kernelDescriptor; using KernelImmutableData::kernelInfo; MockImmutableData(uint32_t perHwThreadPrivateMemorySize); - MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSize, uint32_t perThreaddPrivateScratchSize); + MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSlot0Size, uint32_t perThreadScratchSlot1Size); void setDevice(L0::Device *inDevice) { device = inDevice; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 1ebe8dc720..d3216a9910 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -2528,7 +2528,7 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset); auto scratchSurfaceState = reinterpret_cast(scratchSurfaceStateBuffer); - auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation(); + auto scratchAllocation = scratchSpaceController->getScratchSpaceSlot0Allocation(); EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress()); } @@ -2582,7 +2582,7 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset); auto scratchSurfaceState = reinterpret_cast(scratchSurfaceStateBuffer); - auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation(); + auto scratchAllocation = scratchSpaceController->getScratchSpaceSlot0Allocation(); EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress()); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index 65f61224c4..14f389458c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -611,23 +611,23 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize()); + EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize(0u)); auto ultCsr = reinterpret_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize); + EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSlot0Size); commandList->cmdQImmediate = nullptr; } HWTEST2_F(CmdlistAppendLaunchKernelTests, givenImmediateCommandListUsesFlushTaskWhenDispatchingKernelWithSpillAndPrivateScratchSpaceThenExpectCsrHasCorrectValuesSet, IsAtLeastXeHpCore) { - constexpr uint32_t scratchPerThreadSize = 0x200; - constexpr uint32_t privateScratchPerThreadSize = 0x100; + constexpr uint32_t scratch0PerThreadSize = 0x200; + constexpr uint32_t scratch1PerThreadSize = 0x100; std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false; - kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratchPerThreadSize; - kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = privateScratchPerThreadSize; + kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratch0PerThreadSize; + kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = scratch1PerThreadSize; createModuleFromMockBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); @@ -660,12 +660,12 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize()); - EXPECT_EQ(privateScratchPerThreadSize, commandList->getCommandListPerThreadPrivateScratchSize()); + EXPECT_EQ(scratch0PerThreadSize, commandList->getCommandListPerThreadScratchSize(0u)); + EXPECT_EQ(scratch1PerThreadSize, commandList->getCommandListPerThreadScratchSize(1u)); auto ultCsr = reinterpret_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); - EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize); - EXPECT_EQ(privateScratchPerThreadSize, ultCsr->requiredPrivateScratchSize); + EXPECT_EQ(scratch0PerThreadSize, ultCsr->requiredScratchSlot0Size); + EXPECT_EQ(scratch1PerThreadSize, ultCsr->requiredScratchSlot1Size); commandList->cmdQImmediate = nullptr; } @@ -674,14 +674,14 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, DebugManagerStateRestore restorer; NEO::debugManager.flags.EventWaitOnHost.set(1); - constexpr uint32_t scratchPerThreadSize = 0x200; - constexpr uint32_t privateScratchPerThreadSize = 0x100; + constexpr uint32_t scratch0PerThreadSize = 0x200; + constexpr uint32_t scratch1PerThreadSize = 0x100; std::unique_ptr mockKernelImmData = std::make_unique(0u); auto kernelDescriptor = mockKernelImmData->kernelDescriptor; kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false; - kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratchPerThreadSize; - kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = privateScratchPerThreadSize; + kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratch0PerThreadSize; + kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = scratch1PerThreadSize; createModuleFromMockBinary(0u, false, mockKernelImmData.get()); auto kernel = std::make_unique(module.get()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index e5e80600f7..b6cfca5360 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1182,8 +1182,8 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWithScratchAndPrivateWhenAp result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(commandList->getCommandListPerThreadPrivateScratchSize(), static_cast(0x100)); - EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(), static_cast(0x200)); + EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(1u), static_cast(0x100)); + EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(0u), static_cast(0x200)); } HWTEST2_F(CmdlistAppendLaunchKernelTests, givenGlobalBindlessAllocatorAndKernelWithPrivateScratchWhenAppendLaunchKernelThenCmdContainerHasBindfulSSHAllocated, IsAtLeastXeHpCore) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 6909550040..34ba0a0b86 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -2143,7 +2143,7 @@ HWTEST2_F(ImmediateFlushTaskCsrSharedHeapCmdListTest, EXPECT_EQ(0u, frontEndCmd->getScratchSpaceBuffer()); - EXPECT_EQ(nullptr, csrImmediate.getScratchSpaceController()->getScratchSpaceAllocation()); + EXPECT_EQ(nullptr, csrImmediate.getScratchSpaceController()->getScratchSpaceSlot0Allocation()); mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100; @@ -2164,7 +2164,7 @@ HWTEST2_F(ImmediateFlushTaskCsrSharedHeapCmdListTest, constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE); EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer()); - auto scratchAllocation = csrImmediate.getScratchSpaceController()->getScratchSpaceAllocation(); + auto scratchAllocation = csrImmediate.getScratchSpaceController()->getScratchSpaceSlot0Allocation(); ASSERT_NE(nullptr, scratchAllocation); EXPECT_TRUE(csrImmediate.isMadeResident(scratchAllocation)); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index 2548078786..4bd72ab2a3 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -1089,12 +1089,12 @@ class MockCommandQueue : public L0::CommandQueueHw { void handleScratchSpace(NEO::HeapContainer &heapContainer, NEO::ScratchSpaceController *scratchController, bool &gsbaState, bool &frontEndState, - uint32_t perThreadScratchSpaceSize, - uint32_t perThreadPrivateScratchSize) override { + uint32_t perThreadScratchSpaceSlot0Size, + uint32_t perThreadScratchSpaceSlot1Size) override { this->mockHeapContainer = heapContainer; } - void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties) override { + void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties) override { return; } }; @@ -1108,7 +1108,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta commandQueue->initialize(false, false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::compute, 0u); - commandList->setCommandListPerThreadScratchSize(100u); + commandList->setCommandListPerThreadScratchSize(0u, 100u); auto commandListHandle = commandList->toHandle(); commandList->close(); @@ -1207,7 +1207,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndScratchW commandQueue->initialize(false, false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::compute, 0u); - commandList->setCommandListPerThreadScratchSize(100u); + commandList->setCommandListPerThreadScratchSize(0u, 100u); auto commandListHandle = commandList->toHandle(); commandList->close(); @@ -1234,7 +1234,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndPrivateS commandQueue->initialize(false, false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::compute, 0u); - commandList->setCommandListPerThreadPrivateScratchSize(100u); + commandList->setCommandListPerThreadScratchSize(1u, 100u); auto commandListHandle = commandList->toHandle(); commandList->close(); @@ -1265,7 +1265,7 @@ HWTEST2_F(ExecuteCommandListTests, givenBindlessHelperWhenCommandListIsExecutedO commandQueue->initialize(false, false, false); auto commandList = new CommandListCoreFamily(); commandList->initialize(device, NEO::EngineGroupType::compute, 0u); - commandList->setCommandListPerThreadScratchSize(100u); + commandList->setCommandListPerThreadScratchSize(0u, 100u); auto commandListHandle = commandList->toHandle(); commandList->close(); @@ -1418,10 +1418,10 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen returnValue); auto commandList0 = new CommandListCoreFamily(); commandList0->initialize(device, NEO::EngineGroupType::compute, 0u); - commandList0->setCommandListPerThreadScratchSize(0u); + commandList0->setCommandListPerThreadScratchSize(0u, 0u); auto commandList1 = new CommandListCoreFamily(); commandList1->initialize(device, NEO::EngineGroupType::compute, 0u); - commandList1->setCommandListPerThreadScratchSize(0u); + commandList1->setCommandListPerThreadScratchSize(0u, 0u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); @@ -1458,9 +1458,9 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen false, returnValue)); auto commandList0 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList0->setCommandListPerThreadScratchSize(0u); + commandList0->setCommandListPerThreadScratchSize(0u, 0u); auto commandList1 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList1->setCommandListPerThreadScratchSize(0u); + commandList1->setCommandListPerThreadScratchSize(0u, 0u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); @@ -1503,17 +1503,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); auto commandList0 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); auto commandList1 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList0->setCommandListPerThreadScratchSize(512u); - commandList1->setCommandListPerThreadScratchSize(0u); + commandList0->setCommandListPerThreadScratchSize(0u, 512u); + commandList1->setCommandListPerThreadScratchSize(0u, 0u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); commandList1->close(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); auto usedSpaceAfter = commandQueue->commandStream.getUsed(); @@ -1528,10 +1528,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists ASSERT_EQ(1u, gsbaStates.size()); commandList0->reset(); - commandList0->setCommandListPerThreadScratchSize(0u); + commandList0->setCommandListPerThreadScratchSize(0u, 0u); commandList0->close(); commandList1->reset(); - commandList1->setCommandListPerThreadScratchSize(0u); + commandList1->setCommandListPerThreadScratchSize(0u, 0u); commandList1->close(); auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily, @@ -1544,9 +1544,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); usedSpaceAfter = commandQueue1->commandStream.getUsed(); @@ -1581,17 +1581,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); auto commandList0 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); auto commandList1 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList0->setCommandListPerThreadScratchSize(0u); - commandList1->setCommandListPerThreadScratchSize(512u); + commandList0->setCommandListPerThreadScratchSize(0u, 0u); + commandList1->setCommandListPerThreadScratchSize(0u, 512u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); commandList1->close(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); auto usedSpaceAfter = commandQueue->commandStream.getUsed(); @@ -1606,10 +1606,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists ASSERT_EQ(2u, gsbaStates.size()); commandList0->reset(); - commandList0->setCommandListPerThreadScratchSize(512u); + commandList0->setCommandListPerThreadScratchSize(0u, 512u); commandList0->close(); commandList1->reset(); - commandList1->setCommandListPerThreadScratchSize(0u); + commandList1->setCommandListPerThreadScratchSize(0u, 0u); commandList1->close(); auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily, @@ -1622,9 +1622,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); usedSpaceAfter = commandQueue1->commandStream.getUsed(); @@ -1659,17 +1659,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); auto commandList0 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); auto commandList1 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList0->setCommandListPerThreadScratchSize(512u); - commandList1->setCommandListPerThreadScratchSize(512u); + commandList0->setCommandListPerThreadScratchSize(0u, 512u); + commandList1->setCommandListPerThreadScratchSize(0u, 512u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); commandList1->close(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); auto usedSpaceAfter = commandQueue->commandStream.getUsed(); @@ -1684,10 +1684,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists ASSERT_EQ(1u, gsbaStates.size()); commandList0->reset(); - commandList0->setCommandListPerThreadScratchSize(1024u); + commandList0->setCommandListPerThreadScratchSize(0u, 1024u); commandList0->close(); commandList1->reset(); - commandList1->setCommandListPerThreadScratchSize(1024u); + commandList1->setCommandListPerThreadScratchSize(0u, 1024u); commandList1->close(); auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily, @@ -1700,9 +1700,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); usedSpaceAfter = commandQueue1->commandStream.getUsed(); @@ -1737,17 +1737,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); auto commandList0 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); auto commandList1 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList0->setCommandListPerThreadScratchSize(0u); - commandList1->setCommandListPerThreadScratchSize(512u); + commandList0->setCommandListPerThreadScratchSize(0u, 0u); + commandList1->setCommandListPerThreadScratchSize(0u, 512u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); commandList1->close(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); auto usedSpaceAfter = commandQueue->commandStream.getUsed(); @@ -1762,10 +1762,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists ASSERT_EQ(2u, gsbaStates.size()); commandList0->reset(); - commandList0->setCommandListPerThreadScratchSize(1024u); + commandList0->setCommandListPerThreadScratchSize(0u, 1024u); commandList0->close(); commandList1->reset(); - commandList1->setCommandListPerThreadScratchSize(2048u); + commandList1->setCommandListPerThreadScratchSize(0u, 2048u); commandList1->close(); auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily, @@ -1777,9 +1777,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); usedSpaceAfter = commandQueue1->commandStream.getUsed(); @@ -1813,17 +1813,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists returnValue)); auto commandList0 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); auto commandList1 = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList0->setCommandListPerThreadPrivateScratchSize(0u); - commandList1->setCommandListPerThreadPrivateScratchSize(512u); + commandList0->setCommandListPerThreadScratchSize(1u, 0u); + commandList1->setCommandListPerThreadScratchSize(1u, 512u); auto commandListHandle0 = commandList0->toHandle(); commandList0->close(); auto commandListHandle1 = commandList1->toHandle(); commandList1->close(); commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); + EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1()); commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); + EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1()); auto usedSpaceAfter = commandQueue->commandStream.getUsed(); @@ -1836,10 +1836,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists ASSERT_EQ(2u, mediaVfeStates.size()); commandList0->reset(); - commandList0->setCommandListPerThreadPrivateScratchSize(1024u); + commandList0->setCommandListPerThreadScratchSize(1u, 1024u); commandList0->close(); commandList1->reset(); - commandList1->setCommandListPerThreadPrivateScratchSize(2048u); + commandList1->setCommandListPerThreadScratchSize(1u, 2048u); commandList1->close(); auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily, @@ -1851,9 +1851,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists false, returnValue)); commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false); - EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); + EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1()); commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false); - EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize()); + EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1()); usedSpaceAfter = commandQueue1->commandStream.getUsed(); @@ -1885,7 +1885,7 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledWhenExecutingCmdLi false, returnValue)); auto commandList = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList->setCommandListPerThreadPrivateScratchSize(0u); + commandList->setCommandListPerThreadScratchSize(1u, 0u); auto commandListHandle = commandList->toHandle(); commandList->close(); @@ -1930,7 +1930,7 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledAndDebugFlagSetWhe false, returnValue)); auto commandList = std::unique_ptr(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false))); - commandList->setCommandListPerThreadPrivateScratchSize(0u); + commandList->setCommandListPerThreadScratchSize(1u, 0u); auto commandListHandle = commandList->toHandle(); commandList->close(); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index d936e5ce8b..b3c0cad83c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -840,8 +840,8 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -850,7 +850,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP programHeapsCalled = true; } - NEO::GraphicsAllocation *getScratchSpaceAllocation() override { + NEO::GraphicsAllocation *getScratchSpaceSlot0Allocation() override { return scratchAllocation; } @@ -895,7 +895,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHeapContainerIsZeroSizeThenNoFunctionIsCalled, Platforms) { class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater { public: - using NEO::ScratchSpaceControllerXeHPAndLater::scratchAllocation; + using NEO::ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation; bool programHeapsCalled = false; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, NEO::ExecutionEnvironment &environment, @@ -903,8 +903,8 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -937,11 +937,11 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::buffer, nullptr, 0u, 0u, 0u, MemoryPool::system4KBPages, 0u); auto scratch = static_cast(scratchController.get()); - scratch->scratchAllocation = &graphicsAllocation; + scratch->scratchSlot0Allocation = &graphicsAllocation; commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u); EXPECT_FALSE(scratch->programHeapsCalled); - scratch->scratchAllocation = nullptr; + scratch->scratchSlot0Allocation = nullptr; } HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorrectlyPatched, IsAtLeastXeHpCore) { diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp index 83a5286990..4eaf6985f9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp @@ -369,15 +369,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT false, returnValue)); - CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u); - CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u); + CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 512u); + CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(0u, 1024u); ASSERT_NE(nullptr, commandQueue); auto usedSpaceBefore = commandQueue->commandStream.getUsed(); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); auto usedSpaceAfter = commandQueue->commandStream.getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); @@ -395,15 +395,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT CommandList::fromHandle(commandLists[0])->reset(); CommandList::fromHandle(commandLists[1])->reset(); - CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u); - CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u); + CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 2048u); + CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(0u, 1024u); ASSERT_NE(nullptr, commandQueue); usedSpaceBefore = commandQueue->commandStream.getUsed(); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize()); + EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0()); usedSpaceAfter = commandQueue->commandStream.getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp index db4c1fedb4..f7afdbf85e 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp @@ -138,7 +138,7 @@ HWTEST2_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledAndRequired ze_command_list_handle_t commandLists[] = { CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)->toHandle()}; - CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096); + CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 4096); CommandList::fromHandle(commandLists[0])->close(); uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 69afe3d8a7..28c4a5dc35 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1560,8 +1560,8 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) { auto expectedPrivateSize = 0x200u; auto &kernelDescriptor = const_cast(kernel->getKernelDescriptor()); - kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize; - kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = expectedPrivateSize; + kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = expectedSpillSize; + kernelDescriptor.kernelAttributes.privateScratchMemorySize = expectedPrivateSize; ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); @@ -1580,7 +1580,7 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) { EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups); EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize); - EXPECT_EQ(expectedPrivateSize, kernelProperties.privateMemSize); + EXPECT_EQ(device->getGfxCoreHelper().getKernelPrivateMemSize(kernelDescriptor), kernelProperties.privateMemSize); EXPECT_EQ(expectedSpillSize, kernelProperties.spillMemSize); uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE]; @@ -1603,8 +1603,8 @@ HWTEST2_F(KernelPropertiesTests, givenKernelWithPrivateScratchMemoryThenProperPr auto expectedPrivateSize = 0x200u; auto &kernelDescriptor = const_cast(kernel->getKernelDescriptor()); - kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize; - kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = expectedPrivateSize; + kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = expectedSpillSize; + kernelDescriptor.kernelAttributes.privateScratchMemorySize = expectedPrivateSize; kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0xDEAD; ze_result_t res = kernel->getProperties(&kernelProperties); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 9a14fda04e..f0fce2e648 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -560,7 +560,7 @@ void CommandQueueHw::processDispatchForKernels(const MultiDispatchInf } } - getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(), multiDispatchInfo.getRequiredPrivateScratchSize()); + getGpgpuCommandStreamReceiver().setRequiredScratchSizes(multiDispatchInfo.getRequiredScratchSize(0u), multiDispatchInfo.getRequiredScratchSize(1u)); } template diff --git a/opencl/source/helpers/dispatch_info.cpp b/opencl/source/helpers/dispatch_info.cpp index 95bdb08f73..d7d94995cb 100644 --- a/opencl/source/helpers/dispatch_info.cpp +++ b/opencl/source/helpers/dispatch_info.cpp @@ -20,12 +20,8 @@ bool DispatchInfo::usesStatelessPrintfSurface() const { return (kernel == nullptr) ? false : kernel->hasPrintfOutput(); } -uint32_t DispatchInfo::getRequiredScratchSize() const { - return (kernel == nullptr) ? 0 : kernel->getScratchSize(); -} - -uint32_t DispatchInfo::getRequiredPrivateScratchSize() const { - return (kernel == nullptr) ? 0 : kernel->getPrivateScratchSize(); +uint32_t DispatchInfo::getRequiredScratchSize(uint32_t slotId) const { + return (kernel == nullptr) ? 0 : kernel->getScratchSize(slotId); } MultiDispatchInfo::~MultiDispatchInfo() { diff --git a/opencl/source/helpers/dispatch_info.h b/opencl/source/helpers/dispatch_info.h index 8947f457a5..00019a113f 100644 --- a/opencl/source/helpers/dispatch_info.h +++ b/opencl/source/helpers/dispatch_info.h @@ -39,8 +39,7 @@ class DispatchInfo { void setClDevice(ClDevice *device) { pClDevice = device; } bool usesSlm() const; bool usesStatelessPrintfSurface() const; - uint32_t getRequiredScratchSize() const; - uint32_t getRequiredPrivateScratchSize() const; + uint32_t getRequiredScratchSize(uint32_t slotId) const; void setKernel(Kernel *kernel) { this->kernel = kernel; } Kernel *getKernel() const { return kernel; } uint32_t getDim() const { return dim; } @@ -115,18 +114,10 @@ struct MultiDispatchInfo { return false; } - uint32_t getRequiredScratchSize() const { + uint32_t getRequiredScratchSize(uint32_t slotId) const { uint32_t ret = 0; for (const auto &dispatchInfo : dispatchInfos) { - ret = std::max(ret, dispatchInfo.getRequiredScratchSize()); - } - return ret; - } - - uint32_t getRequiredPrivateScratchSize() const { - uint32_t ret = 0; - for (const auto &dispatchInfo : dispatchInfos) { - ret = std::max(ret, dispatchInfo.getRequiredPrivateScratchSize()); + ret = std::max(ret, dispatchInfo.getRequiredScratchSize(slotId)); } return ret; } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index e408c8318f..67fa4e102d 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -620,7 +620,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, break; case CL_KERNEL_SPILL_MEM_SIZE_INTEL: - scratchSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; + scratchSize = kernelDescriptor.kernelAttributes.spillFillScratchMemorySize; srcSize = sizeof(scratchSize); pSrc = &scratchSize; break; @@ -1848,7 +1848,7 @@ void Kernel::provideInitializationHints() { kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str(), privateSurfaceSize); } - auto scratchSize = kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * + auto scratchSize = kernelInfo.kernelDescriptor.kernelAttributes.spillFillScratchMemorySize * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * kernelInfo.getMaxSimdSize(); if (scratchSize > 0) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL, REGISTER_PRESSURE_TOO_HIGH, diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 51bd7aafb3..d4ff8c1e25 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -211,12 +211,8 @@ class Kernel : public ReferenceTrackedObject { Program *getProgram() const { return program; } - uint32_t getScratchSize() { - return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; - } - - uint32_t getPrivateScratchSize() { - return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[1]; + uint32_t getScratchSize(uint32_t slotId) { + return kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[slotId]; } bool usesSyncBuffer() const; diff --git a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl index 2563f386e4..d29f35bc94 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl @@ -74,16 +74,14 @@ TEST_F(ClGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFrom EXPECT_EQ(CL_INVALID_DEVICE, retVal); } -TEST_F(ClGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettingKernelWorkGroupInfoThenCorrectSpillMemSizeIsReturned) { +TEST_F(ClGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceForSpillWhenGettingKernelWorkGroupInfoThenCorrectSpillMemSizeIsReturned) { size_t paramValueSizeRet; cl_ulong paramValue; auto pDevice = castToObject(testedClDevice); MockKernelWithInternals mockKernel(*pDevice); - mockKernel.kernelInfo.setPerThreadScratchSize(1024, 0); - - cl_ulong scratchSpaceSize = static_cast(mockKernel.mockKernel->getScratchSize()); - EXPECT_EQ(scratchSpaceSize, 1024u); + auto spillMemorySize = 1024u; + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = spillMemorySize; retVal = clGetKernelWorkGroupInfo( mockKernel.mockMultiDeviceKernel, @@ -95,7 +93,7 @@ TEST_F(ClGetKernelWorkGroupInfoTests, GivenKernelRequiringScratchSpaceWhenGettin EXPECT_EQ(retVal, CL_SUCCESS); EXPECT_EQ(paramValueSizeRet, sizeof(cl_ulong)); - EXPECT_EQ(paramValue, scratchSpaceSize); + EXPECT_EQ(paramValue, spillMemorySize); } using matcher = IsWithinProducts; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index a1a62c6439..972438a6e1 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -657,7 +657,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenSecondEnqueueWithTheSameScra uint32_t scratchSize = 4096u; MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; auto sizeToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSize); @@ -691,14 +691,14 @@ HWTEST_F(EnqueueKernelTest, whenEnqueueingKernelThatRequirePrivateScratchThenPri csr.getMemoryManager()->setForce32BitAllocations(false); size_t off[3] = {0, 0, 0}; size_t gws[3] = {1, 1, 1}; - uint32_t privateScratchSize = 4096u; + uint32_t scratchSizeSlot1 = 4096u; MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.setPerThreadScratchSize(privateScratchSize, 1); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = scratchSizeSlot1; pCmdQ->enqueueKernel(mockKernel.mockKernel, 1, off, gws, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(privateScratchSize, csr.requiredPrivateScratchSize); + EXPECT_EQ(scratchSizeSlot1, csr.requiredScratchSlot1Size); } HWTEST_F(EnqueueKernelTest, whenEnqueueKernelWithNoStatelessWriteWhenSbaIsBeingProgrammedThenConstPolicyIsChoosen) { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 4246d79831..ec3345367b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -318,7 +318,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc auto scratchSize = GetParam().scratchSize; MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; uint32_t sizeToProgram = (scratchSize / static_cast(MemoryConstants::kiloByte)); uint32_t bitValue = 0u; @@ -373,7 +373,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueScratchSpaceTests, GivenKernelRequiringScratc } scratchSize *= 2; - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; auto itorfirstBBEnd = find(itorWalker, cmdList.end()); ASSERT_NE(cmdList.end(), itorfirstBBEnd); @@ -447,18 +447,18 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi auto mockCsr = new MockCsrHw(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); - uint32_t scratchSize = 1024u; + uint32_t scratchSizeSlot0 = 1024u; MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSizeSlot0; - uint32_t sizeToProgram = (scratchSize / static_cast(MemoryConstants::kiloByte)); + uint32_t sizeToProgram = (scratchSizeSlot0 / static_cast(MemoryConstants::kiloByte)); uint32_t bitValue = 0u; while (sizeToProgram >>= 1) { bitValue++; } - auto valueToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSize); + auto valueToProgram = PreambleHelper::getScratchSizeValueToProgramMediaVfeState(scratchSizeSlot0); EXPECT_EQ(bitValue, valueToProgram); enqueueKernel(mockKernel); @@ -468,8 +468,8 @@ HWTEST_P(EnqueueKernelWithScratch, GivenKernelRequiringScratchWhenItIsEnqueuedWi EXPECT_TRUE(mockCsr->isMadeResident(graphicsAllocation)); // Enqueue With ScratchSize bigger than previous - scratchSize = 8192; - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + scratchSizeSlot0 = 8192; + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSizeSlot0; enqueueKernel(mockKernel); @@ -490,7 +490,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAll auto scratchSize = 1024; MockKernelWithInternals mockKernel(*pClDevice); - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; enqueueKernel(mockKernel); auto graphicsAllocation = csr->getScratchAllocation(); @@ -519,7 +519,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, EnqueueKernelWithScratch, givenDeviceForcing32bitAll // now re-try to see if SBA is not programmed scratchSize *= 2; - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; enqueueKernel(mockKernel); diff --git a/opencl/test/unit_test/command_queue/multi_dispatch_info_tests.cpp b/opencl/test/unit_test/command_queue/multi_dispatch_info_tests.cpp index 05e8657242..5f13b5aca3 100644 --- a/opencl/test/unit_test/command_queue/multi_dispatch_info_tests.cpp +++ b/opencl/test/unit_test/command_queue/multi_dispatch_info_tests.cpp @@ -26,5 +26,5 @@ TEST_F(MultiDispatchInfoTest, GivenNullKernelWhenCreatingMultiDispatchInfoThenEx EXPECT_FALSE(multiDispatchInfo.begin()->usesSlm()); EXPECT_FALSE(multiDispatchInfo.begin()->usesStatelessPrintfSurface()); - EXPECT_EQ(0u, multiDispatchInfo.begin()->getRequiredScratchSize()); + EXPECT_EQ(0u, multiDispatchInfo.begin()->getRequiredScratchSize(0u)); } \ No newline at end of file diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 94a322e149..288097218d 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -587,40 +587,40 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCommandStreamReceiverWhenFenc } struct MockScratchController : public ScratchSpaceController { - using ScratchSpaceController::privateScratchAllocation; - using ScratchSpaceController::scratchAllocation; + using ScratchSpaceController::scratchSlot0Allocation; + using ScratchSpaceController::scratchSlot1Allocation; using ScratchSpaceController::ScratchSpaceController; void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { - if (requiredPerThreadScratchSize > scratchSizeBytes) { - scratchSizeBytes = requiredPerThreadScratchSize; - scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadScratchSize}); + if (requiredPerThreadScratchSizeSlot0 > scratchSlot0SizeInBytes) { + scratchSlot0SizeInBytes = requiredPerThreadScratchSizeSlot0; + scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadScratchSizeSlot0}); } - if (requiredPerThreadPrivateScratchSize > privateScratchSizeBytes) { - privateScratchSizeBytes = requiredPerThreadPrivateScratchSize; - privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadPrivateScratchSize}); + if (requiredPerThreadScratchSizeSlot1 > scratchSlot1SizeInBytes) { + scratchSlot1SizeInBytes = requiredPerThreadScratchSizeSlot1; + scratchSlot1Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(MockAllocationProperties{rootDeviceIndex, requiredPerThreadScratchSizeSlot1}); } } uint64_t calculateNewGSH() override { return 0u; }; uint64_t getScratchPatchAddress() override { return 0u; }; void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { } void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -640,10 +640,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenScratchIsRequiredForFirstFlush flushTask(*commandStreamReceiver); - EXPECT_NE(nullptr, scratchController->scratchAllocation); - EXPECT_EQ(nullptr, scratchController->privateScratchAllocation); + EXPECT_NE(nullptr, scratchController->scratchSlot0Allocation); + EXPECT_EQ(nullptr, scratchController->scratchSlot1Allocation); - auto scratchAllocation = scratchController->scratchAllocation; + auto scratchAllocation = scratchController->scratchSlot0Allocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); @@ -654,15 +654,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenScratchIsRequiredForFirstFlush flushTask(*commandStreamReceiver); // 2nd flush - EXPECT_NE(nullptr, scratchController->scratchAllocation); - EXPECT_NE(nullptr, scratchController->privateScratchAllocation); + EXPECT_NE(nullptr, scratchController->scratchSlot0Allocation); + EXPECT_NE(nullptr, scratchController->scratchSlot1Allocation); - auto privateScratchAllocation = scratchController->privateScratchAllocation; + auto scratch1Allocation = scratchController->scratchSlot1Allocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); - EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); - EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratch1Allocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratch1Allocation)); } HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPrivateScratchIsRequiredForFirstFlushAndCommonScratchForSecondFlushThenHandleResidencyProperly) { @@ -675,13 +675,13 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPrivateScratchIsRequiredForFir flushTask(*commandStreamReceiver); - EXPECT_EQ(nullptr, scratchController->scratchAllocation); - EXPECT_NE(nullptr, scratchController->privateScratchAllocation); + EXPECT_EQ(nullptr, scratchController->scratchSlot0Allocation); + EXPECT_NE(nullptr, scratchController->scratchSlot1Allocation); - auto privateScratchAllocation = scratchController->privateScratchAllocation; + auto scratch1Allocation = scratchController->scratchSlot1Allocation; - EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); - EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratch1Allocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratch1Allocation)); commandStreamReceiver->madeResidentGfxAllocations.clear(); // this is only history - we can clean this commandStreamReceiver->madeNonResidentGfxAllocations.clear(); @@ -689,15 +689,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, whenPrivateScratchIsRequiredForFir flushTask(*commandStreamReceiver); // 2nd flush - EXPECT_NE(nullptr, scratchController->scratchAllocation); - EXPECT_NE(nullptr, scratchController->privateScratchAllocation); + EXPECT_NE(nullptr, scratchController->scratchSlot0Allocation); + EXPECT_NE(nullptr, scratchController->scratchSlot1Allocation); - auto scratchAllocation = scratchController->scratchAllocation; + auto scratchAllocation = scratchController->scratchSlot0Allocation; EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratchAllocation)); EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratchAllocation)); - EXPECT_TRUE(commandStreamReceiver->isMadeResident(privateScratchAllocation)); - EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(privateScratchAllocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeResident(scratch1Allocation)); + EXPECT_TRUE(commandStreamReceiver->isMadeNonResident(scratch1Allocation)); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecutiveNdRangeKernelsThenStateBaseAddressIsProgrammedOnceAndScratchAddressInMediaVfeStateIsProgrammedTwiceBothWithCorrectAddress) { @@ -713,7 +713,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu size_t gws = 1; uint32_t scratchSize = 1024; - kernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); @@ -786,7 +786,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenTwoConsecu // now re-try to see if SBA is not programmed scratchSize *= 2; - kernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; commandQueue.enqueueKernel(kernel, 1, nullptr, &gws, nullptr, 0, nullptr, nullptr); @@ -827,7 +827,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenNdRangeKer size_t gws = 1; uint32_t scratchSize = 1024; - kernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = scratchSize; EXPECT_EQ(false, kernel.mockKernel->isBuiltIn); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp index 3911c5f9f0..2d14249254 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_gmock_tests.cpp @@ -289,7 +289,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatch mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); - mockCsr->requiredScratchSize = 0x200000; + mockCsr->requiredScratchSlot0Size = 0x200000; mockCsr->programVFEState(commandStream, flags, 10); ASSERT_EQ(1u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); @@ -310,7 +310,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatch mockCsr->getScratchSpaceController()->setRequiredScratchSpace(nullptr, 0u, 10u, 0u, 1u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, vfeStateDirty); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); - mockCsr->requiredScratchSize = 0x200000; + mockCsr->requiredScratchSlot0Size = 0x200000; mockCsr->programVFEState(commandStream, flags, 10); EXPECT_EQ(0u, mockCsr->getFlatBatchBufferHelper().getPatchInfoCollection().size()); @@ -327,7 +327,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskGmockTests, givenPatch mockCsr->overwriteFlatBatchBufferHelper(new MockFlatBatchBufferHelper(*pDevice->executionEnvironment)); DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); - mockCsr->requiredScratchSize = 0x200000; + mockCsr->requiredScratchSlot0Size = 0x200000; MockOsContext osContext(0, EngineDescriptorHelper::getDefaultDescriptor(DeviceBitfield(8))); mockCsr->setupContext(osContext); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 2b64aa80be..552401aac8 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -485,8 +485,8 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenScratchAl scratchController->setRequiredScratchSpace(reinterpret_cast(0x2000), 0u, 0u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_FALSE(cfeStateDirty); EXPECT_FALSE(stateBaseAddressDirty); - EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); - EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot0Allocation()); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot1Allocation()); } HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddressIsReturned) { @@ -501,7 +501,7 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsRequiredThenCorrectAddre scratchController->setRequiredScratchSpace(surfaceHeap.get(), 0u, 0x1000u, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); uint64_t expectedScratchAddress = 0xAAABBBCCCDDD000ull; - auto scratchAllocation = scratchController->getScratchSpaceAllocation(); + auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation(); auto gmmHelper = pDevice->getGmmHelper(); auto canonizedGpuAddress = gmmHelper->canonize(expectedScratchAddress); scratchAllocation->setCpuPtrAndGpuAddress(scratchAllocation->getUnderlyingBuffer(), canonizedGpuAddress); @@ -513,7 +513,7 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenScratchSpaceIsNotRequiredThenGshAddres auto commandStreamReceiver = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); auto scratchController = commandStreamReceiver->getScratchSpaceController(); - EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot0Allocation()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_3_tests.cpp index 996b3e637a..1312870d19 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_3_tests.cpp @@ -16,7 +16,7 @@ using namespace NEO; struct MockScratchSpaceController : ScratchSpaceControllerBase { - using ScratchSpaceControllerBase::privateScratchAllocation; + using ScratchSpaceControllerBase::scratchSlot1Allocation; using ScratchSpaceControllerBase::ScratchSpaceControllerBase; }; @@ -24,8 +24,8 @@ using ScratchSpaceControllerTest = Test; TEST_F(ScratchSpaceControllerTest, whenScratchSpaceControllerIsDestroyedThenItReleasePrivateScratchSpaceAllocation) { MockScratchSpaceController scratchSpaceController(pDevice->getRootDeviceIndex(), *pDevice->getExecutionEnvironment(), *pDevice->getGpgpuCommandStreamReceiver().getInternalAllocationStorage()); - scratchSpaceController.privateScratchAllocation = pDevice->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryInPreferredPool(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}, nullptr); - EXPECT_NE(nullptr, scratchSpaceController.privateScratchAllocation); + scratchSpaceController.scratchSlot1Allocation = pDevice->getExecutionEnvironment()->memoryManager->allocateGraphicsMemoryInPreferredPool(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize}, nullptr); + EXPECT_NE(nullptr, scratchSpaceController.scratchSlot1Allocation); // no memory leak is expected } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp index 746c96d40b..9edab6fd16 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp @@ -185,15 +185,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat parseCommands(commandStreamCSR, 0); findHardwareCommands(); - EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], commandStreamReceiver->requiredScratchSize); - EXPECT_EQ(scratchSpaceSize, scratchController->scratchSizeBytes); - EXPECT_EQ(scratchSpaceSize, scratchController->getScratchSpaceAllocation()->getUnderlyingBufferSize()); + EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], commandStreamReceiver->requiredScratchSlot0Size); + EXPECT_EQ(scratchSpaceSize, scratchController->scratchSlot0SizeInBytes); + EXPECT_EQ(scratchSpaceSize, scratchController->getScratchSpaceSlot0Allocation()->getUnderlyingBufferSize()); ASSERT_NE(nullptr, cmdMediaVfeState); auto cfeState = static_cast(cmdMediaVfeState); uint32_t bufferOffset = static_cast(scratchController->slotId * scratchController->singleSurfaceStateSize * 2); EXPECT_EQ(bufferOffset, cfeState->getScratchSpaceBuffer()); RENDER_SURFACE_STATE *scratchState = reinterpret_cast(scratchController->surfaceStateHeap + bufferOffset); - EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), scratchState->getSurfaceBaseAddress()); + EXPECT_EQ(scratchController->scratchSlot0Allocation->getGpuAddress(), scratchState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, scratchState->getSurfaceType()); SurfaceStateBufferLength length = {0}; @@ -230,9 +230,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat EXPECT_EQ(1u, scratchController->slotId); EXPECT_EQ(scratchController->surfaceStateHeap, oldSurfaceHeap); char *surfaceStateBuf = static_cast(oldSurfaceHeap) + scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2; - GraphicsAllocation *scratchAllocation = scratchController->scratchAllocation; + GraphicsAllocation *scratchAllocation = scratchController->scratchSlot0Allocation; RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateBuf); - EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); + EXPECT_EQ(scratchController->scratchSlot0Allocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType()); void *newSurfaceHeap = alignedMalloc(0x1000, 0x1000); @@ -240,10 +240,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat EXPECT_TRUE(cfeStateDirty); EXPECT_EQ(1u, scratchController->slotId); EXPECT_EQ(scratchController->surfaceStateHeap, newSurfaceHeap); - EXPECT_EQ(scratchAllocation, scratchController->scratchAllocation); + EXPECT_EQ(scratchAllocation, scratchController->scratchSlot0Allocation); surfaceStateBuf = static_cast(newSurfaceHeap) + scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2; surfaceState = reinterpret_cast(surfaceStateBuf); - EXPECT_EQ(scratchController->scratchAllocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); + EXPECT_EQ(scratchController->scratchSlot0Allocation->getGpuAddress(), surfaceState->getSurfaceBaseAddress()); EXPECT_EQ(RENDER_SURFACE_STATE::SURFACE_TYPE_SURFTYPE_SCRATCH, surfaceState->getSurfaceType()); alignedFree(oldSurfaceHeap); @@ -269,7 +269,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat uint64_t offset = static_cast(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2); EXPECT_EQ(offset, scratchController->getScratchPatchAddress()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); - uint64_t gpuVa = scratchController->scratchAllocation->getGpuAddress(); + uint64_t gpuVa = scratchController->scratchSlot0Allocation->getGpuAddress(); char *surfaceStateBuf = static_cast(scratchController->surfaceStateHeap) + offset; RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress()); @@ -280,8 +280,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat EXPECT_EQ(8u, scratchController->slotId); offset = static_cast(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2); EXPECT_EQ(offset, scratchController->getScratchPatchAddress()); - EXPECT_NE(gpuVa, scratchController->scratchAllocation->getGpuAddress()); - gpuVa = scratchController->scratchAllocation->getGpuAddress(); + EXPECT_NE(gpuVa, scratchController->scratchSlot0Allocation->getGpuAddress()); + gpuVa = scratchController->scratchSlot0Allocation->getGpuAddress(); surfaceStateBuf = static_cast(scratchController->surfaceStateHeap) + offset; surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress()); @@ -308,7 +308,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat uint64_t offset = static_cast(scratchController->slotId * sizeof(RENDER_SURFACE_STATE) * 2); EXPECT_EQ(offset, scratchController->getScratchPatchAddress()); EXPECT_EQ(0u, scratchController->calculateNewGSH()); - uint64_t gpuVa = scratchController->scratchAllocation->getGpuAddress(); + uint64_t gpuVa = scratchController->scratchSlot0Allocation->getGpuAddress(); char *surfaceStateBuf = static_cast(scratchController->surfaceStateHeap) + offset; RENDER_SURFACE_STATE *surfaceState = reinterpret_cast(surfaceStateBuf); EXPECT_EQ(gpuVa, surfaceState->getSurfaceBaseAddress()); @@ -324,12 +324,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} - using ScratchSpaceControllerXeHPAndLater::scratchAllocation; + using ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation; void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -378,7 +378,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} - using ScratchSpaceControllerXeHPAndLater::scratchAllocation; + using ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation; using ScratchSpaceControllerXeHPAndLater::slotId; protected: @@ -401,9 +401,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat auto scratch = static_cast(scratchController.get()); scratch->slotId = 10; - scratch->scratchAllocation = &graphicsAllocation; + scratch->scratchSlot0Allocation = &graphicsAllocation; scratch->setNewSshPtr(surfaceHeap, cfeStateDirty, false); - scratch->scratchAllocation = nullptr; + scratch->scratchSlot0Allocation = nullptr; EXPECT_EQ(10u, scratch->slotId); EXPECT_EQ(scratch->programSurfaceStateCalledTimes, 1u); EXPECT_TRUE(cfeStateDirty); @@ -419,7 +419,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) {} using ScratchSpaceControllerXeHPAndLater::programSurfaceState; - using ScratchSpaceControllerXeHPAndLater::scratchAllocation; + using ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation; using ScratchSpaceControllerXeHPAndLater::slotId; using ScratchSpaceControllerXeHPAndLater::surfaceStateHeap; using ScratchSpaceControllerXeHPAndLater::updateSlots; @@ -439,9 +439,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat scratch->surfaceStateHeap = static_cast(surfaceHeap); scratch->slotId = 10; scratch->updateSlots = false; - scratch->scratchAllocation = &graphicsAllocation; + scratch->scratchSlot0Allocation = &graphicsAllocation; scratch->programSurfaceState(); - scratch->scratchAllocation = nullptr; + scratch->scratchSlot0Allocation = nullptr; EXPECT_EQ(10u, scratch->slotId); alignedFree(surfaceHeap); @@ -464,15 +464,15 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); - uint64_t gpuVa = scratchController->privateScratchAllocation->getGpuAddress(); + uint64_t gpuVa = scratchController->scratchSlot1Allocation->getGpuAddress(); EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress()); scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch * 2, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); - EXPECT_NE(gpuVa, scratchController->privateScratchAllocation->getGpuAddress()); - EXPECT_EQ(scratchController->privateScratchAllocation->getGpuAddress(), surfaceState[5].getSurfaceBaseAddress()); + EXPECT_NE(gpuVa, scratchController->scratchSlot1Allocation->getGpuAddress()); + EXPECT_EQ(scratchController->scratchSlot1Allocation->getGpuAddress(), surfaceState[5].getSurfaceBaseAddress()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceControllerWithOnlyPrivateScratchSpaceWhenGettingPatchAddressThenGetCorrectValue) { @@ -489,8 +489,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat uint32_t sizeForPrivateScratch = MemoryConstants::pageSize; - EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); - EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot0Allocation()); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot1Allocation()); EXPECT_EQ(0u, scratchController->getScratchPatchAddress()); @@ -498,8 +498,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); auto expectedPatchAddress = 2 * sizeof(RENDER_SURFACE_STATE); - EXPECT_EQ(nullptr, scratchController->getScratchSpaceAllocation()); - EXPECT_NE(nullptr, scratchController->getPrivateScratchSpaceAllocation()); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot0Allocation()); + EXPECT_NE(nullptr, scratchController->getScratchSpaceSlot1Allocation()); EXPECT_EQ(expectedPatchAddress, scratchController->getScratchPatchAddress()); } @@ -521,14 +521,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); - uint64_t gpuVa = scratchController->privateScratchAllocation->getGpuAddress(); + uint64_t gpuVa = scratchController->scratchSlot1Allocation->getGpuAddress(); cfeStateDirty = false; scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, sizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_FALSE(cfeStateDirty); - EXPECT_EQ(gpuVa, scratchController->privateScratchAllocation->getGpuAddress()); + EXPECT_EQ(gpuVa, scratchController->scratchSlot1Allocation->getGpuAddress()); EXPECT_EQ(gpuVa, surfaceState[3].getSurfaceBaseAddress()); } @@ -549,7 +549,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat scratchController->setRequiredScratchSpace(surfaceState, 0u, sizeForScratch, 0u, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); EXPECT_TRUE(cfeStateDirty); - EXPECT_EQ(nullptr, scratchController->privateScratchAllocation); + EXPECT_EQ(nullptr, scratchController->scratchSlot1Allocation); EXPECT_EQ(0u, surfaceState[3].getSurfaceBaseAddress()); } @@ -584,8 +584,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisab bool stateBaseAddressDirty = false; scratchController->setRequiredScratchSpace(surfaceState, 0u, MemoryConstants::pageSize, MemoryConstants::pageSize, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); - EXPECT_EQ(0u, scratchController->privateScratchSizeBytes); - EXPECT_EQ(nullptr, scratchController->getPrivateScratchSpaceAllocation()); + EXPECT_EQ(0u, scratchController->scratchSlot1SizeInBytes); + EXPECT_EQ(nullptr, scratchController->getScratchSpaceSlot1Allocation()); } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenDisabledPrivateScratchSpaceWhenGettingOffsetForSlotThenEachSlotContainsOnlyOneSurfaceState) { diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index ceffb8fad9..72fcd79979 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -888,9 +888,10 @@ TEST_F(PerformanceHintTest, givenUncompressedImageWhenItsCreatedThenProperPerfor TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenContextProvidesProperHint) { - auto scratchSize = zeroSized ? 0 : 1024; + auto spillSize = zeroSized ? 0 : 1024; MockKernelWithInternals mockKernel(context->getDevices(), context); - mockKernel.kernelInfo.setPerThreadScratchSize(scratchSize, 0); + + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = spillSize; uint32_t computeUnitsForScratch[] = {0x10, 0x20}; auto pClDevice = &mockKernel.mockKernel->getDevice(); @@ -899,7 +900,7 @@ TEST_P(PerformanceHintKernelTest, GivenSpillFillWhenKernelIsInitializedThenConte mockKernel.mockKernel->initialize(); - auto expectedSize = scratchSize * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); + auto expectedSize = spillSize * pClDevice->getSharedDeviceInfo().computeUnitsUsedForScratch * mockKernel.mockKernel->getKernelInfo().getMaxSimdSize(); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[REGISTER_PRESSURE_TOO_HIGH], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str(), expectedSize); EXPECT_EQ(!zeroSized, containsHint(expectedHint, userData)); diff --git a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp index 7bb79d5b4b..7b29031817 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_builder_tests.cpp @@ -38,7 +38,7 @@ class DispatchInfoBuilderFixture : public ContextFixture, public ClDeviceFixture pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::defaultGrfNumber; - pKernelInfo->setPerThreadScratchSize(1024, 0); + pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 1024; pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); pKernelInfo->addArgBuffer(0, 0x10, sizeof(void *)); diff --git a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp index 6fca1a8fee..6b58a5844b 100644 --- a/opencl/test/unit_test/helpers/dispatch_info_tests.cpp +++ b/opencl/test/unit_test/helpers/dispatch_info_tests.cpp @@ -33,7 +33,7 @@ class DispatchInfoFixture : public ContextFixture, public ClDeviceFixture { ContextFixture::setUp(1, &device); pKernelInfo = std::make_unique(); - pKernelInfo->setPerThreadScratchSize(1024, 0); + pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 1024; pKernelInfo->setPrintfSurface(sizeof(uintptr_t), 0); pProgram = new MockProgram(pContext, false, toClDeviceVector(*pClDevice)); @@ -60,7 +60,7 @@ TEST_F(DispatchInfoTest, GivenNoGeometryWhenDispatchInfoIsCreatedThenValuesAreSe std::unique_ptr dispatchInfo(new DispatchInfo); EXPECT_EQ(nullptr, dispatchInfo->getKernel()); - EXPECT_EQ(0u, dispatchInfo->getRequiredScratchSize()); + EXPECT_EQ(0u, dispatchInfo->getRequiredScratchSize(0u)); EXPECT_FALSE(dispatchInfo->usesSlm()); EXPECT_FALSE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(0u, dispatchInfo->getDim()); @@ -84,7 +84,7 @@ TEST_F(DispatchInfoTest, GivenUserGeometryWhenDispatchInfoIsCreatedThenValuesAre std::unique_ptr dispatchInfo(new DispatchInfo(pClDevice, pKernel, 3, gws, elws, offset)); EXPECT_NE(nullptr, dispatchInfo->getKernel()); - EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize()); + EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize(0u)); EXPECT_TRUE(dispatchInfo->usesSlm()); EXPECT_TRUE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(3u, dispatchInfo->getDim()); @@ -117,7 +117,7 @@ TEST_F(DispatchInfoTest, GivenFullGeometryWhenDispatchInfoIsCreatedThenValuesAre std::unique_ptr dispatchInfo(new DispatchInfo(pClDevice, pKernel, 3, gws, elws, offset, agws, lws, twgs, nwgs, swgs)); EXPECT_NE(nullptr, dispatchInfo->getKernel()); - EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize()); + EXPECT_EQ(1024u, dispatchInfo->getRequiredScratchSize(0u)); EXPECT_TRUE(dispatchInfo->usesSlm()); EXPECT_TRUE(dispatchInfo->usesStatelessPrintfSurface()); EXPECT_EQ(3u, dispatchInfo->getDim()); @@ -148,7 +148,7 @@ TEST_F(DispatchInfoTest, WhenMultiDispatchInfoIsCreatedThenItIsNonAssignable) { TEST_F(DispatchInfoTest, WhenMultiDispatchInfoIsCreatedThenItIsEmpty) { MultiDispatchInfo multiDispatchInfo; EXPECT_TRUE(multiDispatchInfo.empty()); - EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize()); + EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize(0u)); EXPECT_FALSE(multiDispatchInfo.usesSlm()); EXPECT_FALSE(multiDispatchInfo.usesStatelessPrintfSurface()); EXPECT_EQ(0u, multiDispatchInfo.getRedescribedSurfaces().size()); @@ -172,7 +172,7 @@ TEST_F(DispatchInfoTest, GivenNoGeometryWhenMultiDispatchInfoIsCreatedThenValues MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); - EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize()); + EXPECT_EQ(0u, multiDispatchInfo.getRequiredScratchSize(0u)); EXPECT_FALSE(multiDispatchInfo.usesSlm()); EXPECT_FALSE(multiDispatchInfo.usesStatelessPrintfSurface()); } @@ -187,7 +187,7 @@ TEST_F(DispatchInfoTest, GivenUserGeometryWhenMultiDispatchInfoIsCreatedThenValu MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); - EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize()); + EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize(0u)); EXPECT_TRUE(multiDispatchInfo.usesSlm()); EXPECT_TRUE(multiDispatchInfo.usesStatelessPrintfSurface()); @@ -220,7 +220,7 @@ TEST_F(DispatchInfoTest, GivenFullGeometryWhenMultiDispatchInfoIsCreatedThenValu MultiDispatchInfo multiDispatchInfo; multiDispatchInfo.push(dispatchInfo); EXPECT_FALSE(multiDispatchInfo.empty()); - EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize()); + EXPECT_EQ(1024u, multiDispatchInfo.getRequiredScratchSize(0u)); EXPECT_TRUE(multiDispatchInfo.usesSlm()); EXPECT_TRUE(multiDispatchInfo.usesStatelessPrintfSurface()); @@ -318,9 +318,3 @@ TEST(DispatchInfoBasicTests, givenDispatchInfoWhenSetCanBePartitionIsCalledThenS dispatchInfo.setCanBePartitioned(true); EXPECT_TRUE(dispatchInfo.peekCanBePartitioned()); } - -TEST(DispatchInfoBasicTests, givenDispatchInfoWithoutKernelWhenGettingSizeForPrivateScratchThenZeroIsReturned) { - DispatchInfo dispatchInfo; - EXPECT_EQ(nullptr, dispatchInfo.getKernel()); - EXPECT_EQ(0u, dispatchInfo.getRequiredPrivateScratchSize()); -} diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index c9d479b12b..4664753d78 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -798,7 +798,7 @@ TEST_F(KernelPrivateSurfaceTest, GivenKernelWhenScratchSizeIsGreaterThanMaxScrat auto pKernelInfo = std::make_unique(); pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 32; pKernelInfo->setPrivateMemory(0x100, false, 0, 0, 0); - pKernelInfo->setPerThreadScratchSize(maxScratchSize + 100, 0); + pKernelInfo->kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = maxScratchSize + 100; MockContext context; MockProgram program(&context, false, toClDeviceVector(*pClDevice)); @@ -2895,22 +2895,16 @@ TEST(KernelTest, givenNotAllArgumentsAreBuffersButAllBuffersAreStatefulWhenIniti EXPECT_TRUE(kernel.mockKernel->allBufferArgsStateful); } -TEST(KernelTest, givenKernelRequiringPrivateScratchSpaceWhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { +TEST(KernelTest, givenKernelRequiringTwoSlotScratchSpaceWhenGettingSizeForScratchSpaceThenCorrectSizeIsReturned) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals mockKernel(*device); - mockKernel.kernelInfo.setPerThreadScratchSize(512u, 0); - mockKernel.kernelInfo.setPerThreadScratchSize(1024u, 1); - - EXPECT_EQ(1024u, mockKernel.mockKernel->getPrivateScratchSize()); -} - -TEST(KernelTest, givenKernelWithoutMediaVfeStateSlot1WhenGettingSizeForPrivateScratchSpaceThenCorrectSizeIsReturned) { - auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); - - MockKernelWithInternals mockKernel(*device); - - EXPECT_EQ(0u, mockKernel.mockKernel->getPrivateScratchSize()); + EXPECT_EQ(0u, mockKernel.mockKernel->getScratchSize(0u)); + EXPECT_EQ(0u, mockKernel.mockKernel->getScratchSize(1u)); + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 512u; + mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = 1024u; + EXPECT_EQ(512u, mockKernel.mockKernel->getScratchSize(0u)); + EXPECT_EQ(1024u, mockKernel.mockKernel->getScratchSize(1u)); } TEST(KernelTest, givenKernelWithPatchInfoCollectionEnabledWhenPatchWithImplicitSurfaceCalledThenPatchInfoDataIsCollected) { diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index fe6482e2ca..649e5853c1 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -353,18 +353,18 @@ class MockKernelWithInternals { operator MockKernel *() { return mockKernel; } + alignas(64) char sshLocal[128]; + alignas(64) char dshLocal[128]; + char crossThreadData[256]; + uint32_t kernelIsa[32]; + MockKernelInfo kernelInfo; MockMultiDeviceKernel *mockMultiDeviceKernel = nullptr; MockKernel *mockKernel; MockProgram *mockProgram; Context *mockContext; KernelInfoContainer kernelInfos; - MockKernelInfo kernelInfo; SKernelBinaryHeaderCommon kernelHeader = {}; - uint32_t kernelIsa[32]; - char crossThreadData[256]; - alignas(64) char sshLocal[128]; - alignas(64) char dshLocal[128]; std::vector defaultKernelArguments; }; class MockDebugKernel : public MockKernel { diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 2420c4a49e..d05a9a3240 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -566,17 +566,17 @@ FlushStamp CommandStreamReceiver::obtainCurrentFlushStamp() const { return flushStamp->peekStamp(); } -void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize) { - if (newRequiredScratchSize > requiredScratchSize) { - requiredScratchSize = newRequiredScratchSize; +void CommandStreamReceiver::setRequiredScratchSizes(uint32_t newRequiredScratchSlot0Size, uint32_t newRequiredScratchSlot1Size) { + if (newRequiredScratchSlot0Size > requiredScratchSlot0Size) { + requiredScratchSlot0Size = newRequiredScratchSlot0Size; } - if (newRequiredPrivateScratchSize > requiredPrivateScratchSize) { - requiredPrivateScratchSize = newRequiredPrivateScratchSize; + if (newRequiredScratchSlot1Size > requiredScratchSlot1Size) { + requiredScratchSlot1Size = newRequiredScratchSlot1Size; } } GraphicsAllocation *CommandStreamReceiver::getScratchAllocation() { - return scratchSpaceController->getScratchSpaceAllocation(); + return scratchSpaceController->getScratchSpaceSlot0Allocation(); } void CommandStreamReceiver::overwriteFlatBatchBufferHelper(FlatBatchBufferHelper *newHelper) { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 4536eb4e7a..eb7aaf60f7 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -179,7 +179,7 @@ class CommandStreamReceiver { bool getBtdCommandDirty() const { return btdCommandDirty; } bool isRayTracingStateProgramingNeeded(Device &device) const; - void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize); + void setRequiredScratchSizes(uint32_t newRequiredScratchSlot0Size, uint32_t newRequiredPrivateScratchSlot1Size); GraphicsAllocation *getScratchAllocation(); GraphicsAllocation *getDebugSurfaceAllocation() const { return debugSurface; } GraphicsAllocation *allocateDebugSurface(size_t size); @@ -534,8 +534,8 @@ class CommandStreamReceiver { uint32_t latestSentStatelessMocsConfig = 0; uint64_t lastSentSliceCount = QueueSliceCount::defaultSliceCount; - uint32_t requiredScratchSize = 0; - uint32_t requiredPrivateScratchSize = 0; + uint32_t requiredScratchSlot0Size = 0; + uint32_t requiredScratchSlot1Size = 0; uint32_t lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::notSet; KernelExecutionType lastKernelExecutionType = KernelExecutionType::defaultType; MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::notApplicable; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 77664b9fb6..d4f7dc3ad8 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -269,13 +269,13 @@ CompletionStamp CommandStreamReceiverHw::flushImmediateTask( flushData.stateComputeModeFullConfigurationNeeded = getStateComputeModeDirty(); flushData.stateBaseAddressFullConfigurationNeeded = getGSBAStateDirty(); - if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSize > 0 || this->requiredPrivateScratchSize > 0)) { + if (dispatchFlags.sshCpuBase != nullptr && (this->requiredScratchSlot0Size > 0 || this->requiredScratchSlot1Size > 0)) { bool checkFeStateDirty = false; bool checkSbaStateDirty = false; scratchSpaceController->setRequiredScratchSpace(dispatchFlags.sshCpuBase, 0u, - this->requiredScratchSize, - this->requiredPrivateScratchSize, + this->requiredScratchSlot0Size, + this->requiredScratchSlot1Size, this->taskCount, *this->osContext, checkSbaStateDirty, @@ -283,11 +283,11 @@ CompletionStamp CommandStreamReceiverHw::flushImmediateTask( flushData.frontEndFullConfigurationNeeded |= checkFeStateDirty; flushData.stateBaseAddressFullConfigurationNeeded |= checkSbaStateDirty; - if (scratchSpaceController->getScratchSpaceAllocation()) { - makeResident(*scratchSpaceController->getScratchSpaceAllocation()); + if (scratchSpaceController->getScratchSpaceSlot0Allocation()) { + makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation()); } - if (scratchSpaceController->getPrivateScratchSpaceAllocation()) { - makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation()); + if (scratchSpaceController->getScratchSpaceSlot1Allocation()) { + makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation()); } } @@ -446,11 +446,11 @@ CompletionStamp CommandStreamReceiverHw::flushTask( bool stateBaseAddressDirty = false; bool checkVfeStateDirty = false; - if (ssh && (requiredScratchSize || requiredPrivateScratchSize)) { + if (ssh && (requiredScratchSlot0Size || requiredScratchSlot1Size)) { scratchSpaceController->setRequiredScratchSpace(ssh->getCpuBase(), 0u, - requiredScratchSize, - requiredPrivateScratchSize, + requiredScratchSlot0Size, + requiredScratchSlot1Size, this->taskCount, *this->osContext, stateBaseAddressDirty, @@ -458,11 +458,11 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (checkVfeStateDirty) { setMediaVFEStateDirty(true); } - if (scratchSpaceController->getScratchSpaceAllocation()) { - makeResident(*scratchSpaceController->getScratchSpaceAllocation()); + if (scratchSpaceController->getScratchSpaceSlot0Allocation()) { + makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation()); } - if (scratchSpaceController->getPrivateScratchSpaceAllocation()) { - makeResident(*scratchSpaceController->getPrivateScratchSpaceAllocation()); + if (scratchSpaceController->getScratchSpaceSlot1Allocation()) { + makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation()); } } @@ -1036,7 +1036,7 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo); auto pVfeState = PreambleHelper::getSpaceForVfeState(&csr, hwInfo, engineGroupType); PreambleHelper::programVfeState( - pVfeState, peekRootDeviceEnvironment(), requiredScratchSize, getScratchPatchAddress(), + pVfeState, peekRootDeviceEnvironment(), requiredScratchSlot0Size, getScratchPatchAddress(), maxFrontEndThreads, streamProperties); auto commandOffset = PreambleHelper::getScratchSpaceAddressOffsetForVfeState(&csr, pVfeState); @@ -1757,10 +1757,10 @@ inline void CommandStreamReceiverHw::reprogramStateBaseAddress(const uint64_t newGshBase = 0; gsbaFor32BitProgrammed = false; - if (is64bit && scratchSpaceController->getScratchSpaceAllocation() && !force32BitAllocations) { + if (is64bit && scratchSpaceController->getScratchSpaceSlot0Allocation() && !force32BitAllocations) { newGshBase = scratchSpaceController->calculateNewGSH(); } else if (is64bit && force32BitAllocations && dispatchFlags.gsba32BitRequired) { - bool useLocalMemory = scratchSpaceController->getScratchSpaceAllocation() ? scratchSpaceController->getScratchSpaceAllocation()->isAllocatedInLocalMemoryPool() : false; + bool useLocalMemory = scratchSpaceController->getScratchSpaceSlot0Allocation() ? scratchSpaceController->getScratchSpaceSlot0Allocation()->isAllocatedInLocalMemoryPool() : false; newGshBase = getMemoryManager()->getExternalHeapBaseAddress(rootDeviceIndex, useLocalMemory); gsbaFor32BitProgrammed = true; } @@ -1950,7 +1950,7 @@ void CommandStreamReceiverHw::dispatchImmediateFlushFrontEndCommand(I auto feStateCmdSpace = PreambleHelper::getSpaceForVfeState(&csrStream, peekHwInfo(), engineGroupType); PreambleHelper::programVfeState(feStateCmdSpace, peekRootDeviceEnvironment(), - requiredScratchSize, + requiredScratchSlot0Size, getScratchPatchAddress(), device.getDeviceInfo().maxFrontEndThreads, this->streamProperties); diff --git a/shared/source/command_stream/scratch_space_controller.cpp b/shared/source/command_stream/scratch_space_controller.cpp index 9197b75a84..3662401484 100644 --- a/shared/source/command_stream/scratch_space_controller.cpp +++ b/shared/source/command_stream/scratch_space_controller.cpp @@ -24,11 +24,11 @@ ScratchSpaceController::ScratchSpaceController(uint32_t rootDeviceIndex, Executi } ScratchSpaceController::~ScratchSpaceController() { - if (scratchAllocation) { - getMemoryManager()->freeGraphicsMemory(scratchAllocation); + if (scratchSlot0Allocation) { + getMemoryManager()->freeGraphicsMemory(scratchSlot0Allocation); } - if (privateScratchAllocation) { - getMemoryManager()->freeGraphicsMemory(privateScratchAllocation); + if (scratchSlot1Allocation) { + getMemoryManager()->freeGraphicsMemory(scratchSlot1Allocation); } } diff --git a/shared/source/command_stream/scratch_space_controller.h b/shared/source/command_stream/scratch_space_controller.h index 208449f1f3..36034b9d11 100644 --- a/shared/source/command_stream/scratch_space_controller.h +++ b/shared/source/command_stream/scratch_space_controller.h @@ -34,16 +34,16 @@ class ScratchSpaceController : NonCopyableOrMovableClass { ScratchSpaceController(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage); virtual ~ScratchSpaceController(); - MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceAllocation() { - return scratchAllocation; + MOCKABLE_VIRTUAL GraphicsAllocation *getScratchSpaceSlot0Allocation() { + return scratchSlot0Allocation; } - GraphicsAllocation *getPrivateScratchSpaceAllocation() { - return privateScratchAllocation; + GraphicsAllocation *getScratchSpaceSlot1Allocation() { + return scratchSlot1Allocation; } virtual void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -51,25 +51,25 @@ class ScratchSpaceController : NonCopyableOrMovableClass { virtual uint64_t calculateNewGSH() = 0; virtual uint64_t getScratchPatchAddress() = 0; - inline uint32_t getPerThreadScratchSpaceSize() { - return static_cast(scratchSizeBytes / computeUnitsUsedForScratch); + inline uint32_t getPerThreadScratchSpaceSizeSlot0() { + return static_cast(scratchSlot0SizeInBytes / computeUnitsUsedForScratch); } - inline uint32_t getPerThreadPrivateScratchSize() { - return static_cast(privateScratchSizeBytes / computeUnitsUsedForScratch); + inline uint32_t getPerThreadScratchSizeSlot1() { + return static_cast(scratchSlot1SizeInBytes / computeUnitsUsedForScratch); } virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0; virtual void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) = 0; virtual void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -81,11 +81,11 @@ class ScratchSpaceController : NonCopyableOrMovableClass { const uint32_t rootDeviceIndex; ExecutionEnvironment &executionEnvironment; - GraphicsAllocation *scratchAllocation = nullptr; - GraphicsAllocation *privateScratchAllocation = nullptr; + GraphicsAllocation *scratchSlot0Allocation = nullptr; + GraphicsAllocation *scratchSlot1Allocation = nullptr; InternalAllocationStorage &csrAllocationStorage; - size_t scratchSizeBytes = 0; - size_t privateScratchSizeBytes = 0; + size_t scratchSlot0SizeInBytes = 0; + size_t scratchSlot1SizeInBytes = 0; bool force32BitAllocation = false; uint32_t computeUnitsUsedForScratch = 0; }; diff --git a/shared/source/command_stream/scratch_space_controller_base.cpp b/shared/source/command_stream/scratch_space_controller_base.cpp index bbacb4ae39..acf3d7db00 100644 --- a/shared/source/command_stream/scratch_space_controller_base.cpp +++ b/shared/source/command_stream/scratch_space_controller_base.cpp @@ -26,19 +26,19 @@ ScratchSpaceControllerBase::ScratchSpaceControllerBase(uint32_t rootDeviceIndex, void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { - size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch; - if (requiredScratchSizeInBytes && (scratchSizeBytes < requiredScratchSizeInBytes)) { - if (scratchAllocation) { - scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); - csrAllocationStorage.storeAllocation(std::unique_ptr(scratchAllocation), TEMPORARY_ALLOCATION); + size_t requiredScratchSizeInBytes = requiredPerThreadScratchSizeSlot0 * computeUnitsUsedForScratch; + if (requiredScratchSizeInBytes && (scratchSlot0SizeInBytes < requiredScratchSizeInBytes)) { + if (scratchSlot0Allocation) { + scratchSlot0Allocation->updateTaskCount(currentTaskCount, osContext.getContextId()); + csrAllocationStorage.storeAllocation(std::unique_ptr(scratchSlot0Allocation), TEMPORARY_ALLOCATION); } - scratchSizeBytes = requiredScratchSizeInBytes; + scratchSlot0SizeInBytes = requiredScratchSizeInBytes; createScratchSpaceAllocation(); vfeStateDirty = true; force32BitAllocation = getMemoryManager()->peekForce32BitAllocations(); @@ -49,14 +49,14 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress, } void ScratchSpaceControllerBase::createScratchSpaceAllocation() { - scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSizeBytes, AllocationType::scratchSurface, this->csrAllocationStorage.getDeviceBitfield()}); - UNRECOVERABLE_IF(scratchAllocation == nullptr); + scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex, scratchSlot0SizeInBytes, AllocationType::scratchSurface, this->csrAllocationStorage.getDeviceBitfield()}); + UNRECOVERABLE_IF(scratchSlot0Allocation == nullptr); } uint64_t ScratchSpaceControllerBase::calculateNewGSH() { uint64_t gsh = 0; - if (scratchAllocation) { - gsh = scratchAllocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; + if (scratchSlot0Allocation) { + gsh = scratchSlot0Allocation->getGpuAddress() - ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; } return gsh; } @@ -65,8 +65,8 @@ uint64_t ScratchSpaceControllerBase::getScratchPatchAddress() { // for 64 bit, scratch space pointer is being programmed as "General State Base Address - scratchSpaceOffsetFor64bit" // and "0 + scratchSpaceOffsetFor64bit" is being programmed in Media VFE state uint64_t scratchAddress = 0; - if (scratchAllocation) { - scratchAddress = scratchAllocation->getGpuAddressToPatch(); + if (scratchSlot0Allocation) { + scratchAddress = scratchSlot0Allocation->getGpuAddressToPatch(); if (is64bit && !getMemoryManager()->peekForce32BitAllocations()) { // this is to avoid scractch allocation offset "0" scratchAddress = ScratchSpaceConstants::scratchSpaceOffsetFor64Bit; @@ -85,8 +85,8 @@ void ScratchSpaceControllerBase::reserveHeap(IndirectHeap::Type heapType, Indire void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer, uint32_t offset, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -94,8 +94,8 @@ void ScratchSpaceControllerBase::programHeaps(HeapContainer &heapContainer, } void ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, diff --git a/shared/source/command_stream/scratch_space_controller_base.h b/shared/source/command_stream/scratch_space_controller_base.h index 2144166931..4d588e0bfb 100644 --- a/shared/source/command_stream/scratch_space_controller_base.h +++ b/shared/source/command_stream/scratch_space_controller_base.h @@ -16,8 +16,8 @@ class ScratchSpaceControllerBase : public ScratchSpaceController { void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -29,15 +29,15 @@ class ScratchSpaceControllerBase : public ScratchSpaceController { void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) override; void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp index e603238aad..2269fd5886 100644 --- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp +++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.cpp @@ -30,9 +30,9 @@ ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t auto &gfxCoreHelper = environment.rootDeviceEnvironments[rootDeviceIndex]->getHelper(); singleSurfaceStateSize = gfxCoreHelper.getRenderSurfaceStateSize(); if (debugManager.flags.EnablePrivateScratchSlot1.get() != -1) { - privateScratchSpaceSupported = !!debugManager.flags.EnablePrivateScratchSlot1.get(); + twoSlotScratchSpaceSupported = !!debugManager.flags.EnablePrivateScratchSlot1.get(); } - if (privateScratchSpaceSupported) { + if (twoSlotScratchSpaceSupported) { ScratchSpaceControllerXeHPAndLater::stateSlotsCount *= 2; } } @@ -40,7 +40,7 @@ ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDirty, bool changeId) { if (surfaceStateHeap != newSsh) { surfaceStateHeap = static_cast(newSsh); - if (scratchAllocation == nullptr) { + if (scratchSlot0Allocation == nullptr) { cfeDirty = false; } else { if (changeId) { @@ -54,15 +54,15 @@ void ScratchSpaceControllerXeHPAndLater::setNewSshPtr(void *newSsh, bool &cfeDir void ScratchSpaceControllerXeHPAndLater::setRequiredScratchSpace(void *sshBaseAddress, uint32_t offset, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { setNewSshPtr(sshBaseAddress, vfeStateDirty, offset == 0 ? true : false); bool scratchSurfaceDirty = false; - prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); + prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); if (scratchSurfaceDirty) { vfeStateDirty = true; updateSlots = true; @@ -75,7 +75,7 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() { slotId++; } UNRECOVERABLE_IF(slotId >= stateSlotsCount); - UNRECOVERABLE_IF(scratchAllocation == nullptr && privateScratchAllocation == nullptr); + UNRECOVERABLE_IF(scratchSlot0Allocation == nullptr && scratchSlot1Allocation == nullptr); void *surfaceStateForScratchAllocation = ptrOffset(static_cast(surfaceStateHeap), getOffsetToSurfaceState(slotId + sshOffset)); programSurfaceStateAtPtr(surfaceStateForScratchAllocation); @@ -84,23 +84,23 @@ void ScratchSpaceControllerXeHPAndLater::programSurfaceState() { void ScratchSpaceControllerXeHPAndLater::programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) { auto &gfxCoreHelper = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper(); uint64_t scratchAllocationAddress = 0u; - if (scratchAllocation) { - scratchAllocationAddress = scratchAllocation->getGpuAddress(); + if (scratchSlot0Allocation) { + scratchAllocationAddress = scratchSlot0Allocation->getGpuAddress(); } gfxCoreHelper.setRenderSurfaceStateForScratchResource(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], surfaceStateForScratchAllocation, computeUnitsUsedForScratch, scratchAllocationAddress, 0, perThreadScratchSize, nullptr, false, scratchType, false, true); - if (privateScratchSpaceSupported) { - void *surfaceStateForPrivateScratchAllocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize); - uint64_t privateScratchAllocationAddress = 0u; + if (twoSlotScratchSpaceSupported) { + void *surfaceStateForSlot1Allocation = ptrOffset(surfaceStateForScratchAllocation, singleSurfaceStateSize); + uint64_t scratchSlot1AllocationAddress = 0u; - if (privateScratchAllocation) { - privateScratchAllocationAddress = privateScratchAllocation->getGpuAddress(); + if (scratchSlot1Allocation) { + scratchSlot1AllocationAddress = scratchSlot1Allocation->getGpuAddress(); } gfxCoreHelper.setRenderSurfaceStateForScratchResource(*executionEnvironment.rootDeviceEnvironments[rootDeviceIndex], - surfaceStateForPrivateScratchAllocation, computeUnitsUsedForScratch, - privateScratchAllocationAddress, 0, perThreadPrivateScratchSize, nullptr, false, + surfaceStateForSlot1Allocation, computeUnitsUsedForScratch, + scratchSlot1AllocationAddress, 0, perThreadScratchSpaceSlot1Size, nullptr, false, scratchType, false, true); } } @@ -110,7 +110,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::calculateNewGSH() { } uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() { uint64_t scratchAddress = 0u; - if (scratchAllocation || privateScratchAllocation) { + if (scratchSlot0Allocation || scratchSlot1Allocation) { scratchAddress = static_cast(getOffsetToSurfaceState(slotId + sshOffset)); } return scratchAddress; @@ -118,7 +118,7 @@ uint64_t ScratchSpaceControllerXeHPAndLater::getScratchPatchAddress() { size_t ScratchSpaceControllerXeHPAndLater::getOffsetToSurfaceState(uint32_t requiredSlotCount) const { auto offset = requiredSlotCount * singleSurfaceStateSize; - if (privateScratchSpaceSupported) { + if (twoSlotScratchSpaceSupported) { offset *= 2; } return offset; @@ -131,17 +131,17 @@ void ScratchSpaceControllerXeHPAndLater::reserveHeap(IndirectHeap::Type heapType } void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) { bool scratchSurfaceDirty = false; - prepareScratchAllocation(requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); + prepareScratchAllocation(requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, scratchSurfaceDirty, vfeStateDirty); if (scratchSurfaceDirty) { - bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (privateScratchSpaceSupported ? 2 : 1), scratchAllocation, BindlessHeapsHelper::specialSsh); + bindlessSS = heapsHelper->allocateSSInHeap(singleSurfaceStateSize * (twoSlotScratchSpaceSupported ? 2 : 1), scratchSlot0Allocation, BindlessHeapsHelper::specialSsh); programSurfaceStateAtPtr(bindlessSS.ssPtr); vfeStateDirty = true; } @@ -150,62 +150,62 @@ void ScratchSpaceControllerXeHPAndLater::programBindlessSurfaceStateForScratch(B } } -void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, +void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &scratchSurfaceDirty, bool &vfeStateDirty) { - uint32_t requiredPerThreadScratchSizeAlignedUp = requiredPerThreadScratchSize; - if (!Math::isPow2(requiredPerThreadScratchSizeAlignedUp)) { - requiredPerThreadScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSize); + uint32_t requiredPerThreadScratchSizeSlot0AlignedUp = requiredPerThreadScratchSizeSlot0; + if (!Math::isPow2(requiredPerThreadScratchSizeSlot0AlignedUp)) { + requiredPerThreadScratchSizeSlot0AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot0); } - size_t requiredScratchSizeInBytes = static_cast(requiredPerThreadScratchSizeAlignedUp) * computeUnitsUsedForScratch; + size_t requiredScratchSizeInBytes = static_cast(requiredPerThreadScratchSizeSlot0AlignedUp) * computeUnitsUsedForScratch; scratchSurfaceDirty = false; auto multiTileCapable = osContext.getNumSupportedDevices() > 1; - if (scratchSizeBytes < requiredScratchSizeInBytes) { - if (scratchAllocation) { - scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); - csrAllocationStorage.storeAllocation(std::unique_ptr(scratchAllocation), TEMPORARY_ALLOCATION); + if (scratchSlot0SizeInBytes < requiredScratchSizeInBytes) { + if (scratchSlot0Allocation) { + scratchSlot0Allocation->updateTaskCount(currentTaskCount, osContext.getContextId()); + csrAllocationStorage.storeAllocation(std::unique_ptr(scratchSlot0Allocation), TEMPORARY_ALLOCATION); } scratchSurfaceDirty = true; - scratchSizeBytes = requiredScratchSizeInBytes; - perThreadScratchSize = requiredPerThreadScratchSizeAlignedUp; - AllocationProperties properties{this->rootDeviceIndex, true, scratchSizeBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()}; - scratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); + scratchSlot0SizeInBytes = requiredScratchSizeInBytes; + perThreadScratchSize = requiredPerThreadScratchSizeSlot0AlignedUp; + AllocationProperties properties{this->rootDeviceIndex, true, scratchSlot0SizeInBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()}; + scratchSlot0Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } - if (privateScratchSpaceSupported) { - uint32_t requiredPerThreadPrivateScratchSizeAlignedUp = requiredPerThreadPrivateScratchSize; - if (!Math::isPow2(requiredPerThreadPrivateScratchSizeAlignedUp)) { - requiredPerThreadPrivateScratchSizeAlignedUp = Math::nextPowerOfTwo(requiredPerThreadPrivateScratchSize); + if (twoSlotScratchSpaceSupported) { + uint32_t requiredPerThreadScratchSizeSlot1AlignedUp = requiredPerThreadScratchSizeSlot1; + if (!Math::isPow2(requiredPerThreadScratchSizeSlot1AlignedUp)) { + requiredPerThreadScratchSizeSlot1AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot1); } - size_t requiredPrivateScratchSizeInBytes = static_cast(requiredPerThreadPrivateScratchSizeAlignedUp) * computeUnitsUsedForScratch; - if (privateScratchSizeBytes < requiredPrivateScratchSizeInBytes) { - if (privateScratchAllocation) { - privateScratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); - csrAllocationStorage.storeAllocation(std::unique_ptr(privateScratchAllocation), TEMPORARY_ALLOCATION); + size_t requiredScratchSlot1SizeInBytes = static_cast(requiredPerThreadScratchSizeSlot1AlignedUp) * computeUnitsUsedForScratch; + if (scratchSlot1SizeInBytes < requiredScratchSlot1SizeInBytes) { + if (scratchSlot1Allocation) { + scratchSlot1Allocation->updateTaskCount(currentTaskCount, osContext.getContextId()); + csrAllocationStorage.storeAllocation(std::unique_ptr(scratchSlot1Allocation), TEMPORARY_ALLOCATION); } - privateScratchSizeBytes = requiredPrivateScratchSizeInBytes; - perThreadPrivateScratchSize = requiredPerThreadPrivateScratchSizeAlignedUp; + scratchSlot1SizeInBytes = requiredScratchSlot1SizeInBytes; + perThreadScratchSpaceSlot1Size = requiredPerThreadScratchSizeSlot1AlignedUp; scratchSurfaceDirty = true; - AllocationProperties properties{this->rootDeviceIndex, true, privateScratchSizeBytes, AllocationType::privateSurface, multiTileCapable, false, osContext.getDeviceBitfield()}; - privateScratchAllocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); + AllocationProperties properties{this->rootDeviceIndex, true, scratchSlot1SizeInBytes, AllocationType::scratchSurface, multiTileCapable, false, osContext.getDeviceBitfield()}; + scratchSlot1Allocation = getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); } } } void ScratchSpaceControllerXeHPAndLater::programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) { sshOffset = scratchSlot; updateSlots = false; - setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty); + setRequiredScratchSpace(heapContainer[0]->getUnderlyingBuffer(), sshOffset, requiredPerThreadScratchSizeSlot0, requiredPerThreadScratchSizeSlot1, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty); for (uint32_t i = 1; i < heapContainer.size(); ++i) { surfaceStateHeap = static_cast(heapContainer[i]->getUnderlyingBuffer()); diff --git a/shared/source/command_stream/scratch_space_controller_xehp_and_later.h b/shared/source/command_stream/scratch_space_controller_xehp_and_later.h index 6eace20be1..94e335bce6 100644 --- a/shared/source/command_stream/scratch_space_controller_xehp_and_later.h +++ b/shared/source/command_stream/scratch_space_controller_xehp_and_later.h @@ -22,8 +22,8 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -36,15 +36,15 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { void programHeaps(HeapContainer &heapContainer, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override; void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -54,8 +54,8 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { protected: MOCKABLE_VIRTUAL void programSurfaceState(); MOCKABLE_VIRTUAL void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation); - MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + MOCKABLE_VIRTUAL void prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -66,13 +66,13 @@ class ScratchSpaceControllerXeHPAndLater : public ScratchSpaceController { bool updateSlots = true; uint32_t stateSlotsCount = 16; static const uint32_t scratchType = 6; - bool privateScratchSpaceSupported = true; + bool twoSlotScratchSpaceSupported = true; char *surfaceStateHeap = nullptr; size_t singleSurfaceStateSize = 0; uint32_t slotId = 0; uint32_t perThreadScratchSize = 0; - uint32_t perThreadPrivateScratchSize = 0; + uint32_t perThreadScratchSpaceSlot1Size = 0; uint32_t sshOffset = 0; SurfaceStateInHeapInfo bindlessSS = {}; }; diff --git a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp index 1bd00862a4..dbed9c646e 100644 --- a/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp +++ b/shared/source/device_binary_format/zebin/zeinfo_decoder.cpp @@ -1540,7 +1540,12 @@ DecodeError populateKernelPerThreadMemoryBuffer(KernelDescriptor &dst, const Ker dst.kernelAttributes.perHwThreadPrivateMemorySize = size; break; case AllocationTypeScratch: - if (src.slot > 1) { + + if (src.slot == 0) { + dst.kernelAttributes.spillFillScratchMemorySize = src.size; + } else if (src.slot == 1) { + dst.kernelAttributes.privateScratchMemorySize = src.size; + } else { outErrReason.append("DeviceBinaryFormat::zebin : Invalid scratch buffer slot " + std::to_string(src.slot) + " in context of : " + dst.kernelMetadata.kernelName + ". Expected 0 or 1.\n"); return DecodeError::invalidBinary; } diff --git a/shared/source/helpers/gfx_core_helper_xehp_and_later.inl b/shared/source/helpers/gfx_core_helper_xehp_and_later.inl index 4048f224da..f21168d761 100644 --- a/shared/source/helpers/gfx_core_helper_xehp_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_xehp_and_later.inl @@ -202,6 +202,6 @@ bool GfxCoreHelperHw::largeGrfModeSupported() const { template uint32_t GfxCoreHelperHw::getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const { const auto &kernelAttributes = kernelDescriptor.kernelAttributes; - return (kernelAttributes.perThreadScratchSize[1] > 0) ? kernelAttributes.perThreadScratchSize[1] : kernelAttributes.perHwThreadPrivateMemorySize; + return (kernelAttributes.privateScratchMemorySize > 0) ? kernelAttributes.privateScratchMemorySize : kernelAttributes.perHwThreadPrivateMemorySize; } } // namespace NEO diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 7708760e45..f843552b7b 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -54,6 +54,8 @@ struct KernelDescriptor { uint32_t perHwThreadPrivateMemorySize = 0U; uint32_t perThreadSystemThreadSurfaceSize = 0U; uint32_t numThreadsRequired = 0u; + uint32_t spillFillScratchMemorySize = 0u; + uint32_t privateScratchMemorySize = 0u; ThreadArbitrationPolicy threadArbitrationPolicy = NotPresent; uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U}; uint16_t crossThreadDataSize = 0U; diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp index e4f3e9e5bc..b006a213f5 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -88,6 +88,11 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchAllocateLocalSu void populateKernelDescriptor(KernelDescriptor &dst, const SPatchMediaVFEState &token, uint32_t slot) { UNRECOVERABLE_IF(slot >= 2U); dst.kernelAttributes.perThreadScratchSize[slot] = token.PerThreadScratchSpace; + if (slot == 0) { + dst.kernelAttributes.spillFillScratchMemorySize = token.PerThreadScratchSpace; + } else if (slot == 1) { + dst.kernelAttributes.privateScratchMemorySize = token.PerThreadScratchSpace; + } } void populateKernelDescriptor(KernelDescriptor &dst, const SPatchThreadPayload &token) { diff --git a/shared/source/os_interface/linux/drm_command_stream.h b/shared/source/os_interface/linux/drm_command_stream.h index 0edda3d02e..eef9eccd98 100644 --- a/shared/source/os_interface/linux/drm_command_stream.h +++ b/shared/source/os_interface/linux/drm_command_stream.h @@ -28,7 +28,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver { using BaseClass::makeResident; using BaseClass::mediaVfeStateDirty; using BaseClass::osContext; - using BaseClass::requiredScratchSize; + using BaseClass::requiredScratchSlot0Size; using CommandStreamReceiverHw::CommandStreamReceiver::getTagAddress; using CommandStreamReceiverHw::CommandStreamReceiver::getTagAllocation; using CommandStreamReceiverHw::CommandStreamReceiver::latestSentTaskCount; diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index ba011945e4..75aa0bd6b8 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -136,8 +136,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::pipelineSupportFlags; using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requestedPreallocationsAmount; - using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize; - using BaseClass::CommandStreamReceiver::requiredScratchSize; + using BaseClass::CommandStreamReceiver::requiredScratchSlot0Size; + using BaseClass::CommandStreamReceiver::requiredScratchSlot1Size; using BaseClass::CommandStreamReceiver::resourcesInitialized; using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired; using BaseClass::CommandStreamReceiver::sbaSupportFlags; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index d95ac0022b..b18d0e7319 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -279,7 +279,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiver::mediaVfeStateDirty; using CommandStreamReceiver::nTo1SubmissionModelEnabled; using CommandStreamReceiver::pageTableManagerInitialized; - using CommandStreamReceiver::requiredScratchSize; + using CommandStreamReceiver::requiredScratchSlot0Size; using CommandStreamReceiver::sbaSupportFlags; using CommandStreamReceiver::streamProperties; using CommandStreamReceiver::tagAddress; diff --git a/shared/test/common/mocks/mock_kernel_info.cpp b/shared/test/common/mocks/mock_kernel_info.cpp index c17ec8d76f..b3e1b9fd34 100644 --- a/shared/test/common/mocks/mock_kernel_info.cpp +++ b/shared/test/common/mocks/mock_kernel_info.cpp @@ -175,9 +175,6 @@ void MockKernelInfo::setSamplerTable(DynamicStateHeapOffset borderColor, uint8_t samplerTable.tableOffset = tableOffset; } -void MockKernelInfo::setPerThreadScratchSize(uint32_t perThreadScratchSize, uint32_t slot) { - kernelDescriptor.kernelAttributes.perThreadScratchSize[slot] = perThreadScratchSize; -} void MockKernelInfo::setLocalIds(const std::array &localIds) { kernelDescriptor.kernelAttributes.numLocalIdChannels = localIds[0] + localIds[1] + localIds[2]; kernelDescriptor.kernelAttributes.localId[0] = localIds[0]; diff --git a/shared/test/common/mocks/mock_kernel_info.h b/shared/test/common/mocks/mock_kernel_info.h index 75fbe4c6d5..5f963cfe75 100644 --- a/shared/test/common/mocks/mock_kernel_info.h +++ b/shared/test/common/mocks/mock_kernel_info.h @@ -64,7 +64,6 @@ class MockKernelInfo : public KernelInfo { void setSyncBuffer(uint8_t pointerSize, CrossThreadDataOffset stateless, SurfaceStateHeapOffset bindful = undefined); void setPrivateMemory(uint32_t perThreadPrivateMemorySize, bool isSimtThread, uint8_t pointerSize, CrossThreadDataOffset stateless, SurfaceStateHeapOffset bindful = undefined); void setSamplerTable(DynamicStateHeapOffset borderColor, uint8_t numSamplers, DynamicStateHeapOffset tableOffset); - void setPerThreadScratchSize(uint32_t perThreadScratchSize, uint32_t slot); void setLocalIds(const std::array &localIds); private: diff --git a/shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h b/shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h index ac4322dc0c..3d0a24a2a7 100644 --- a/shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h +++ b/shared/test/common/mocks/mock_scratch_space_controller_xehp_and_later.h @@ -13,10 +13,10 @@ struct MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeH using ScratchSpaceControllerXeHPAndLater::computeUnitsUsedForScratch; using ScratchSpaceControllerXeHPAndLater::getOffsetToSurfaceState; using ScratchSpaceControllerXeHPAndLater::perThreadScratchSize; - using ScratchSpaceControllerXeHPAndLater::privateScratchAllocation; - using ScratchSpaceControllerXeHPAndLater::privateScratchSizeBytes; - using ScratchSpaceControllerXeHPAndLater::scratchAllocation; - using ScratchSpaceControllerXeHPAndLater::scratchSizeBytes; + using ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation; + using ScratchSpaceControllerXeHPAndLater::scratchSlot0SizeInBytes; + using ScratchSpaceControllerXeHPAndLater::scratchSlot1Allocation; + using ScratchSpaceControllerXeHPAndLater::scratchSlot1SizeInBytes; using ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater; using ScratchSpaceControllerXeHPAndLater::singleSurfaceStateSize; using ScratchSpaceControllerXeHPAndLater::slotId; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index a0cd524d57..be00c40693 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -2852,8 +2852,8 @@ struct MockRequiredScratchSpaceController : public ScratchSpaceControllerBase { InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerBase(rootDeviceIndex, environment, allocationStorage) {} void setRequiredScratchSpace(void *sshBaseAddress, uint32_t scratchSlot, - uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, @@ -3925,7 +3925,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE); EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiver.getScratchSpaceController()->getScratchSpaceAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiver.getScratchSpaceController()->getScratchSpaceSlot0Allocation())); commandStreamReceiver.setRequiredScratchSizes(0x400, 0); @@ -3979,7 +3979,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE); EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer()); - EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiver.getScratchSpaceController()->getPrivateScratchSpaceAllocation())); + EXPECT_TRUE(commandStreamReceiver.isMadeResident(commandStreamReceiver.getScratchSpaceController()->getScratchSpaceSlot1Allocation())); } HWTEST2_F(CommandStreamReceiverHwTest, @@ -4511,9 +4511,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa bool stateBaseAddressDirty = false; scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch, 0u, *pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty); - EXPECT_NE(scratchController->privateScratchSizeBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch); - EXPECT_EQ(scratchController->privateScratchSizeBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch); - EXPECT_EQ(scratchController->privateScratchSizeBytes, scratchController->getPrivateScratchSpaceAllocation()->getUnderlyingBufferSize()); + EXPECT_NE(scratchController->scratchSlot1SizeInBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch); + EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch); + EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, scratchController->getScratchSpaceSlot1Allocation()->getUnderlyingBufferSize()); } HWTEST_F(CommandStreamReceiverHwTest, givenDcFlushRequiredWhenProgramStallingPostSyncCommandsForBarrierCalledThenDcFlushSet) { diff --git a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp index 0beae2a534..f33cbea633 100644 --- a/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/zebin_decoder_tests.cpp @@ -2704,7 +2704,7 @@ kernels: usage: private_space size: 128 - type: scratch - usage: single_space + usage: private_space size: 256 slot: 1 ... @@ -2737,7 +2737,7 @@ kernels: EXPECT_FALSE(buffers[1].isSimtThread); EXPECT_EQ(NEO::Zebin::ZeInfo::Types::Kernel::PerThreadMemoryBuffer::AllocationTypeScratch, buffers[2].allocationType); - EXPECT_EQ(NEO::Zebin::ZeInfo::Types::Kernel::PerThreadMemoryBuffer::MemoryUsageSingleSpace, buffers[2].memoryUsage); + EXPECT_EQ(NEO::Zebin::ZeInfo::Types::Kernel::PerThreadMemoryBuffer::MemoryUsagePrivateSpace, buffers[2].memoryUsage); EXPECT_FALSE(buffers[2].isSimtThread); EXPECT_EQ(256, buffers[2].size); EXPECT_EQ(1, buffers[2].slot); @@ -4041,7 +4041,7 @@ kernels: simd_size: 8 per_thread_memory_buffers: - type: scratch - usage: private_space + usage: spill_fill_space size: 2048 is_simt_thread: true )==="; @@ -4051,6 +4051,7 @@ kernels: EXPECT_TRUE(warnings.empty()) << warnings; EXPECT_EQ(2048U, kernelDescriptor->kernelAttributes.perThreadScratchSize[0]); EXPECT_EQ(0U, kernelDescriptor->kernelAttributes.perThreadScratchSize[1]); + EXPECT_EQ(2048U, kernelDescriptor->kernelAttributes.spillFillScratchMemorySize); } TEST_F(decodeZeInfoKernelEntryTest, GivenPerThreadMemoryBufferBiggerThanMinimalWhenSlotAndSimtThreadIsProvidedThenSetsProperFieldsInDescriptorInCorrectSlot) { @@ -4072,6 +4073,7 @@ kernels: EXPECT_TRUE(warnings.empty()) << warnings; EXPECT_EQ(0U, kernelDescriptor->kernelAttributes.perThreadScratchSize[0]); EXPECT_EQ(2048U, kernelDescriptor->kernelAttributes.perThreadScratchSize[1]); + EXPECT_EQ(2048U, kernelDescriptor->kernelAttributes.privateScratchMemorySize); } TEST_F(decodeZeInfoKernelEntryTest, GivenPerThreadMemoryBufferOfSizeSmallerThanMinimalWhenTypeIsScratchThenSetsProperFieldsInDescriptor) { diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 8ab642cf72..114c463774 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1667,6 +1667,6 @@ HWTEST_F(GfxCoreHelperTest, givenCooperativeKernelWhenAskingForSingleTileDispatc HWTEST2_F(GfxCoreHelperTest, whenPrivateScratchSizeIsDefinedThenItIsReturnedAsKernelPrivateMemorySize, IsAtLeastXeHpCore) { KernelDescriptor kernelDescriptor{}; kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0x100u; - kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = 0x200u; + kernelDescriptor.kernelAttributes.privateScratchMemorySize = 0x200u; EXPECT_EQ(0x200u, getHelper().getKernelPrivateMemSize(kernelDescriptor)); } diff --git a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp index fed5d875b9..3862cd3fbb 100644 --- a/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp +++ b/shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -240,7 +240,9 @@ TEST(KernelDescriptorFromPatchtokens, GivenImplicitArgsThenSetsProperPartsOfDesc kernelTokens.tokens.mediaVfeState[1] = &mediaVfeState1; NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 4); EXPECT_EQ(mediaVfeState0.PerThreadScratchSpace, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]); + EXPECT_EQ(mediaVfeState0.PerThreadScratchSpace, kernelDescriptor.kernelAttributes.spillFillScratchMemorySize); EXPECT_EQ(mediaVfeState1.PerThreadScratchSpace, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]); + EXPECT_EQ(mediaVfeState1.PerThreadScratchSpace, kernelDescriptor.kernelAttributes.privateScratchMemorySize); kernelTokens.tokens.mediaVfeState[0] = nullptr; kernelTokens.tokens.mediaVfeState[1] = nullptr; diff --git a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp index 0e9829fb04..d3b4ae6bd2 100644 --- a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp +++ b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_tests.cpp @@ -23,24 +23,24 @@ class MockScratchSpaceControllerBase : public ScratchSpaceControllerBase { void programHeaps(HeapContainer &heapContainer, uint32_t offset, - uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, uint32_t requiredPerThreadPrivateScratchSize, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty) override { - ScratchSpaceControllerBase::programHeaps(heapContainer, offset, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty); + ScratchSpaceControllerBase::programHeaps(heapContainer, offset, requiredPerThreadScratchSizeSlot0, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty); programHeapsCalled = true; } void programBindlessSurfaceStateForScratch(BindlessHeapsHelper *heapsHelper, - uint32_t requiredPerThreadScratchSize, + uint32_t requiredPerThreadScratchSizeSlot0, uint32_t requiredPerThreadPrivateScratchSize, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty, bool &vfeStateDirty, NEO::CommandStreamReceiver *csr) override { - ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSize, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, csr); + ScratchSpaceControllerBase::programBindlessSurfaceStateForScratch(heapsHelper, requiredPerThreadScratchSizeSlot0, requiredPerThreadPrivateScratchSize, currentTaskCount, osContext, stateBaseAddressDirty, vfeStateDirty, csr); programBindlessSurfaceStateForScratchCalled = true; } ResidencyContainer residencyContainer; diff --git a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp index 4398618f86..2d5cc42350 100644 --- a/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp +++ b/shared/test/unit_test/scratch_space_controler/scratch_space_controler_xehp_and_later_tests.cpp @@ -21,22 +21,22 @@ using namespace NEO; class MockScratchSpaceControllerXeHPAndLater : public ScratchSpaceControllerXeHPAndLater { public: using ScratchSpaceControllerXeHPAndLater::bindlessSS; - using ScratchSpaceControllerXeHPAndLater::scratchAllocation; + using ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation; using ScratchSpaceControllerXeHPAndLater::singleSurfaceStateSize; MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex, ExecutionEnvironment &environment, InternalAllocationStorage &allocationStorage) : ScratchSpaceControllerXeHPAndLater(rootDeviceIndex, environment, allocationStorage) { - scratchAllocation = &alloc; + scratchSlot0Allocation = &alloc; } ~MockScratchSpaceControllerXeHPAndLater() override { - scratchAllocation = nullptr; + scratchSlot0Allocation = nullptr; } void programSurfaceStateAtPtr(void *surfaceStateForScratchAllocation) override { wasProgramSurfaceStateAtPtrCalled = true; } - void prepareScratchAllocation(uint32_t requiredPerThreadScratchSize, - uint32_t requiredPerThreadPrivateScratchSize, + void prepareScratchAllocation(uint32_t requiredPerThreadScratchSizeSlot0, + uint32_t requiredPerThreadScratchSizeSlot1, TaskCountType currentTaskCount, OsContext &osContext, bool &stateBaseAddressDirty,