refactor: create new members for storing spill and private memory in scratch

rename private scratch space into scratch space slot 1 as it can be generic

Related-To: NEO-9944
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2024-01-19 12:38:56 +00:00
committed by Compute-Runtime-Automation
parent 45534fe388
commit 87eb5f554a
63 changed files with 492 additions and 520 deletions

View File

@@ -184,20 +184,13 @@ struct CommandList : _ze_command_list_handle_t {
inline ze_command_list_handle_t toHandle() { return this; }
uint32_t getCommandListPerThreadScratchSize() const {
return commandListPerThreadScratchSize;
uint32_t getCommandListPerThreadScratchSize(uint32_t slotId) const {
return commandListPerThreadScratchSize[slotId];
}
void setCommandListPerThreadScratchSize(uint32_t size) {
commandListPerThreadScratchSize = size;
}
uint32_t getCommandListPerThreadPrivateScratchSize() const {
return commandListPerThreadPrivateScratchSize;
}
void setCommandListPerThreadPrivateScratchSize(uint32_t size) {
commandListPerThreadPrivateScratchSize = size;
void setCommandListPerThreadScratchSize(uint32_t slotId, uint32_t size) {
UNRECOVERABLE_IF(slotId > 1);
commandListPerThreadScratchSize[slotId] = size;
}
uint32_t getCommandListSLMEnable() const {
@@ -390,8 +383,7 @@ struct CommandList : _ze_command_list_handle_t {
NEO::HeapAddressModel cmdListHeapAddressModel = NEO::HeapAddressModel::privateHeaps;
CommandListType cmdListType = CommandListType::typeRegular;
uint32_t commandListPerThreadScratchSize = 0u;
uint32_t commandListPerThreadPrivateScratchSize = 0u;
uint32_t commandListPerThreadScratchSize[2]{};
uint32_t partitionCount = 1;
uint32_t defaultMocsIndex = 0;

View File

@@ -117,8 +117,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
commandListPreemptionMode = device->getDevicePreemptionMode();
commandListPerThreadScratchSize = 0u;
commandListPerThreadPrivateScratchSize = 0u;
commandListPerThreadScratchSize[0] = 0u;
commandListPerThreadScratchSize[1] = 0u;
requiredStreamState.resetState();
finalStreamState.resetState();
containsAnyKernel = false;

View File

@@ -198,7 +198,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
}
}
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u));
}
NEO::ImmediateDispatchFlags dispatchFlags{
@@ -259,7 +259,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
if (kernelOperation) {
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u));
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
ssh = this->csr->getGlobalStatelessHeap();

View File

@@ -109,9 +109,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
appendEventForProfiling(event, true, false);
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
this->setCommandListSLMEnable(slmEnable);

View File

@@ -123,10 +123,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ,
", SIMD: ", kernelInfo->getMaxSimdSize());
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
bool needScratchSpace = false;
for (uint32_t slotId = 0u; slotId < 2; slotId++) {
commandListPerThreadScratchSize[slotId] = std::max<uint32_t>(commandListPerThreadScratchSize[slotId], kernelDescriptor.kernelAttributes.perThreadScratchSize[slotId]);
if (commandListPerThreadScratchSize[slotId] > 0) {
needScratchSpace = true;
}
}
if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && (commandListPerThreadScratchSize != 0 || commandListPerThreadPrivateScratchSize != 0)) {
if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && needScratchSpace) {
commandContainer.prepareBindfulSsh();
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -34,7 +34,7 @@ struct CommandQueueHw : public CommandQueueImp {
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed, NEO::StreamProperties *streamProperties);
size_t estimateStateBaseAddressCmdSize();
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties);
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties);
MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool &isFrontEndStateDirty, int32_t engineInstanced, CommandList *commandList,
NEO::StreamProperties &csrState,
@@ -51,8 +51,8 @@ struct CommandQueueHw : public CommandQueueImp {
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize,
uint32_t perThreadPrivateScratchSize);
uint32_t perThreadScratchSpaceSlot0Size,
uint32_t perThreadScratchSpaceSlot1Size);
bool getPreemptionCmdProgramming() override;
void patchCommands(CommandList &commandList, uint64_t scratchAddress);
@@ -84,8 +84,8 @@ struct CommandQueueHw : public CommandQueueImp {
NEO::PreemptionMode preemptionMode{};
NEO::PreemptionMode statePreemption{};
uint32_t perThreadScratchSpaceSize = 0;
uint32_t perThreadPrivateScratchSize = 0;
uint32_t perThreadScratchSpaceSlot0Size = 0;
uint32_t perThreadScratchSpaceSlot1Size = 0;
int32_t engineInstanced = -1;
UnifiedMemoryControls unifiedMemoryControls{};

View File

@@ -333,14 +333,14 @@ void CommandQueueHw<gfxCoreFamily>::programFrontEndAndClearDirtyFlag(
}
auto scratchSpaceController = this->csr->getScratchSpaceController();
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
scratchSpaceController->getPerThreadScratchSpaceSize(),
scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
cmdStream,
csrState);
ctx.frontEndStateDirty = false;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &cmdStream, NEO::StreamProperties &streamProperties) {
void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &cmdStream, NEO::StreamProperties &streamProperties) {
UNRECOVERABLE_IF(csr == nullptr);
auto &hwInfo = device->getHwInfo();
auto &gfxCoreHelper = device->getGfxCoreHelper();
@@ -349,7 +349,7 @@ void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uin
auto pVfeState = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(&cmdStream, hwInfo, engineGroupType);
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState,
device->getNEODevice()->getRootDeviceEnvironment(),
perThreadScratchSpaceSize,
perThreadScratchSpaceSlot0Size,
scratchAddress,
device->getMaxNumHwThreads(),
streamProperties);
@@ -543,11 +543,11 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
auto &commandContainer = commandList->getCmdContainer();
if (!isCopyOnlyCommandQueue) {
ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
ctx.perThreadScratchSpaceSlot0Size = std::max(ctx.perThreadScratchSpaceSlot0Size, commandList->getCommandListPerThreadScratchSize(0u));
ctx.perThreadScratchSpaceSlot1Size = std::max(ctx.perThreadScratchSpaceSlot1Size, commandList->getCommandListPerThreadScratchSize(1u));
if (commandList->getCmdListHeapAddressModel() == NEO::HeapAddressModel::privateHeaps) {
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandList->getCommandListPerThreadScratchSize(0u) != 0 || commandList->getCommandListPerThreadScratchSize(1u) != 0) {
if (commandContainer.getIndirectHeap(NEO::HeapType::surfaceState) != nullptr) {
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::surfaceState)->getGraphicsAllocation());
}
@@ -668,7 +668,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpaceAndUpdateGSBAStateDirtyFla
handleScratchSpace(this->heapContainer,
scratchController,
ctx.gsbaStateDirty, ctx.frontEndStateDirty,
ctx.perThreadScratchSpaceSize, ctx.perThreadPrivateScratchSize);
ctx.perThreadScratchSpaceSlot0Size, ctx.perThreadScratchSpaceSlot1Size);
ctx.gsbaStateDirty |= this->csr->getGSBAStateDirty();
ctx.scratchGsba = scratchController->calculateNewGSH();
@@ -1012,7 +1012,7 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStartSecondaryBa
auto scratchSpaceController = this->csr->getScratchSpaceController();
ctx.cmdListBeginState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(returnPoints[returnPointIdx].configSnapshot.frontEndState);
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
scratchSpaceController->getPerThreadScratchSpaceSize(),
scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
commandStream,
ctx.cmdListBeginState);
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream,

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -122,12 +122,12 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
if (perThreadScratchSpaceSize > 0) {
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
if (perThreadScratchSpaceSlot0Size > 0) {
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSlot0Size, 0u, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation();
csr->makeResident(*scratchAllocation);
}
}

View File

@@ -137,28 +137,28 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHeaps,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) {
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
if (perThreadScratchSpaceSlot0Size > 0 || perThreadScratchSpaceSlot1Size > 0) {
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation();
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
}
if (sshHeaps.size() > 0) {
uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u;
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState);
}
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
if (scratchAllocation != nullptr) {
csr->makeResident(*scratchAllocation);
auto scratch0Allocation = scratchController->getScratchSpaceSlot0Allocation();
if (scratch0Allocation != nullptr) {
csr->makeResident(*scratch0Allocation);
}
auto privateScratchAllocation = scratchController->getPrivateScratchSpaceAllocation();
auto scratch1Allocation = scratchController->getScratchSpaceSlot1Allocation();
if (privateScratchAllocation != nullptr) {
csr->makeResident(*privateScratchAllocation);
if (scratch1Allocation != nullptr) {
csr->makeResident(*scratch1Allocation);
}
}
}

View File

@@ -899,7 +899,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize;
pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize;
pKernelProperties->privateMemSize = gfxCoreHelper.getKernelPrivateMemSize(kernelDescriptor);
pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.spillFillScratchMemorySize;
memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE);
memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -21,11 +21,11 @@ namespace ult {
ModuleImmutableDataFixture::MockImmutableMemoryManager::MockImmutableMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment)) {}
ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize) : MockImmutableData(perHwThreadPrivateMemorySize, 0, 0) {}
ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSize, uint32_t perThreaddPrivateScratchSize) {
ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSlot0Size, uint32_t perThreadScratchSlot1Size) {
mockKernelDescriptor = new NEO::KernelDescriptor;
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[0] = perThreadScratchSize;
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[1] = perThreaddPrivateScratchSize;
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[0] = perThreadScratchSlot0Size;
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[1] = perThreadScratchSlot1Size;
kernelDescriptor = mockKernelDescriptor;
mockKernelInfo = new NEO::KernelInfo;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -38,7 +38,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
using KernelImmutableData::kernelDescriptor;
using KernelImmutableData::kernelInfo;
MockImmutableData(uint32_t perHwThreadPrivateMemorySize);
MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSize, uint32_t perThreaddPrivateScratchSize);
MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSlot0Size, uint32_t perThreadScratchSlot1Size);
void setDevice(L0::Device *inDevice) {
device = inDevice;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -2528,7 +2528,7 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
auto scratchAllocation = scratchSpaceController->getScratchSpaceSlot0Allocation();
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
}
@@ -2582,7 +2582,7 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
auto scratchAllocation = scratchSpaceController->getScratchSpaceSlot0Allocation();
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -611,23 +611,23 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize());
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize(0u));
auto ultCsr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize);
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSlot0Size);
commandList->cmdQImmediate = nullptr;
}
HWTEST2_F(CmdlistAppendLaunchKernelTests,
givenImmediateCommandListUsesFlushTaskWhenDispatchingKernelWithSpillAndPrivateScratchSpaceThenExpectCsrHasCorrectValuesSet, IsAtLeastXeHpCore) {
constexpr uint32_t scratchPerThreadSize = 0x200;
constexpr uint32_t privateScratchPerThreadSize = 0x100;
constexpr uint32_t scratch0PerThreadSize = 0x200;
constexpr uint32_t scratch1PerThreadSize = 0x100;
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratchPerThreadSize;
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = privateScratchPerThreadSize;
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratch0PerThreadSize;
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = scratch1PerThreadSize;
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());
@@ -660,12 +660,12 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize());
EXPECT_EQ(privateScratchPerThreadSize, commandList->getCommandListPerThreadPrivateScratchSize());
EXPECT_EQ(scratch0PerThreadSize, commandList->getCommandListPerThreadScratchSize(0u));
EXPECT_EQ(scratch1PerThreadSize, commandList->getCommandListPerThreadScratchSize(1u));
auto ultCsr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize);
EXPECT_EQ(privateScratchPerThreadSize, ultCsr->requiredPrivateScratchSize);
EXPECT_EQ(scratch0PerThreadSize, ultCsr->requiredScratchSlot0Size);
EXPECT_EQ(scratch1PerThreadSize, ultCsr->requiredScratchSlot1Size);
commandList->cmdQImmediate = nullptr;
}
@@ -674,14 +674,14 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
DebugManagerStateRestore restorer;
NEO::debugManager.flags.EventWaitOnHost.set(1);
constexpr uint32_t scratchPerThreadSize = 0x200;
constexpr uint32_t privateScratchPerThreadSize = 0x100;
constexpr uint32_t scratch0PerThreadSize = 0x200;
constexpr uint32_t scratch1PerThreadSize = 0x100;
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratchPerThreadSize;
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = privateScratchPerThreadSize;
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratch0PerThreadSize;
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = scratch1PerThreadSize;
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
auto kernel = std::make_unique<MockKernel>(module.get());

View File

@@ -1182,8 +1182,8 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWithScratchAndPrivateWhenAp
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(commandList->getCommandListPerThreadPrivateScratchSize(), static_cast<uint32_t>(0x100));
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(), static_cast<uint32_t>(0x200));
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(1u), static_cast<uint32_t>(0x100));
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(0u), static_cast<uint32_t>(0x200));
}
HWTEST2_F(CmdlistAppendLaunchKernelTests, givenGlobalBindlessAllocatorAndKernelWithPrivateScratchWhenAppendLaunchKernelThenCmdContainerHasBindfulSSHAllocated, IsAtLeastXeHpCore) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -2143,7 +2143,7 @@ HWTEST2_F(ImmediateFlushTaskCsrSharedHeapCmdListTest,
EXPECT_EQ(0u, frontEndCmd->getScratchSpaceBuffer());
EXPECT_EQ(nullptr, csrImmediate.getScratchSpaceController()->getScratchSpaceAllocation());
EXPECT_EQ(nullptr, csrImmediate.getScratchSpaceController()->getScratchSpaceSlot0Allocation());
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100;
@@ -2164,7 +2164,7 @@ HWTEST2_F(ImmediateFlushTaskCsrSharedHeapCmdListTest,
constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE);
EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer());
auto scratchAllocation = csrImmediate.getScratchSpaceController()->getScratchSpaceAllocation();
auto scratchAllocation = csrImmediate.getScratchSpaceController()->getScratchSpaceSlot0Allocation();
ASSERT_NE(nullptr, scratchAllocation);
EXPECT_TRUE(csrImmediate.isMadeResident(scratchAllocation));

View File

@@ -1089,12 +1089,12 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
void handleScratchSpace(NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize,
uint32_t perThreadPrivateScratchSize) override {
uint32_t perThreadScratchSpaceSlot0Size,
uint32_t perThreadScratchSpaceSlot1Size) override {
this->mockHeapContainer = heapContainer;
}
void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties) override {
void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties) override {
return;
}
};
@@ -1108,7 +1108,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
commandQueue->initialize(false, false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
commandList->setCommandListPerThreadScratchSize(100u);
commandList->setCommandListPerThreadScratchSize(0u, 100u);
auto commandListHandle = commandList->toHandle();
commandList->close();
@@ -1207,7 +1207,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndScratchW
commandQueue->initialize(false, false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
commandList->setCommandListPerThreadScratchSize(100u);
commandList->setCommandListPerThreadScratchSize(0u, 100u);
auto commandListHandle = commandList->toHandle();
commandList->close();
@@ -1234,7 +1234,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndPrivateS
commandQueue->initialize(false, false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
commandList->setCommandListPerThreadPrivateScratchSize(100u);
commandList->setCommandListPerThreadScratchSize(1u, 100u);
auto commandListHandle = commandList->toHandle();
commandList->close();
@@ -1265,7 +1265,7 @@ HWTEST2_F(ExecuteCommandListTests, givenBindlessHelperWhenCommandListIsExecutedO
commandQueue->initialize(false, false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
commandList->setCommandListPerThreadScratchSize(100u);
commandList->setCommandListPerThreadScratchSize(0u, 100u);
auto commandListHandle = commandList->toHandle();
commandList->close();
@@ -1418,10 +1418,10 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
returnValue);
auto commandList0 = new CommandListCoreFamily<gfxCoreFamily>();
commandList0->initialize(device, NEO::EngineGroupType::compute, 0u);
commandList0->setCommandListPerThreadScratchSize(0u);
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
auto commandList1 = new CommandListCoreFamily<gfxCoreFamily>();
commandList1->initialize(device, NEO::EngineGroupType::compute, 0u);
commandList1->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
@@ -1458,9 +1458,9 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
false,
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList0->setCommandListPerThreadScratchSize(0u);
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList1->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
@@ -1503,17 +1503,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList0->setCommandListPerThreadScratchSize(512u);
commandList1->setCommandListPerThreadScratchSize(0u);
commandList0->setCommandListPerThreadScratchSize(0u, 512u);
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
commandList1->close();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
@@ -1528,10 +1528,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
ASSERT_EQ(1u, gsbaStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(0u);
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
commandList0->close();
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
commandList1->close();
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
@@ -1544,9 +1544,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
usedSpaceAfter = commandQueue1->commandStream.getUsed();
@@ -1581,17 +1581,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList0->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(512u);
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
commandList1->setCommandListPerThreadScratchSize(0u, 512u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
commandList1->close();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
@@ -1606,10 +1606,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
ASSERT_EQ(2u, gsbaStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(512u);
commandList0->setCommandListPerThreadScratchSize(0u, 512u);
commandList0->close();
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
commandList1->close();
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
@@ -1622,9 +1622,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
usedSpaceAfter = commandQueue1->commandStream.getUsed();
@@ -1659,17 +1659,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList0->setCommandListPerThreadScratchSize(512u);
commandList1->setCommandListPerThreadScratchSize(512u);
commandList0->setCommandListPerThreadScratchSize(0u, 512u);
commandList1->setCommandListPerThreadScratchSize(0u, 512u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
commandList1->close();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
@@ -1684,10 +1684,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
ASSERT_EQ(1u, gsbaStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(1024u);
commandList0->setCommandListPerThreadScratchSize(0u, 1024u);
commandList0->close();
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(1024u);
commandList1->setCommandListPerThreadScratchSize(0u, 1024u);
commandList1->close();
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
@@ -1700,9 +1700,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
usedSpaceAfter = commandQueue1->commandStream.getUsed();
@@ -1737,17 +1737,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList0->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(512u);
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
commandList1->setCommandListPerThreadScratchSize(0u, 512u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
commandList1->close();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
@@ -1762,10 +1762,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
ASSERT_EQ(2u, gsbaStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(1024u);
commandList0->setCommandListPerThreadScratchSize(0u, 1024u);
commandList0->close();
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(2048u);
commandList1->setCommandListPerThreadScratchSize(0u, 2048u);
commandList1->close();
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
@@ -1777,9 +1777,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
usedSpaceAfter = commandQueue1->commandStream.getUsed();
@@ -1813,17 +1813,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList0->setCommandListPerThreadPrivateScratchSize(0u);
commandList1->setCommandListPerThreadPrivateScratchSize(512u);
commandList0->setCommandListPerThreadScratchSize(1u, 0u);
commandList1->setCommandListPerThreadScratchSize(1u, 512u);
auto commandListHandle0 = commandList0->toHandle();
commandList0->close();
auto commandListHandle1 = commandList1->toHandle();
commandList1->close();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
@@ -1836,10 +1836,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
ASSERT_EQ(2u, mediaVfeStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadPrivateScratchSize(1024u);
commandList0->setCommandListPerThreadScratchSize(1u, 1024u);
commandList0->close();
commandList1->reset();
commandList1->setCommandListPerThreadPrivateScratchSize(2048u);
commandList1->setCommandListPerThreadScratchSize(1u, 2048u);
commandList1->close();
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
@@ -1851,9 +1851,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
usedSpaceAfter = commandQueue1->commandStream.getUsed();
@@ -1885,7 +1885,7 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledWhenExecutingCmdLi
false,
returnValue));
auto commandList = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList->setCommandListPerThreadPrivateScratchSize(0u);
commandList->setCommandListPerThreadScratchSize(1u, 0u);
auto commandListHandle = commandList->toHandle();
commandList->close();
@@ -1930,7 +1930,7 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledAndDebugFlagSetWhe
false,
returnValue));
auto commandList = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
commandList->setCommandListPerThreadPrivateScratchSize(0u);
commandList->setCommandListPerThreadScratchSize(1u, 0u);
auto commandListHandle = commandList->toHandle();
commandList->close();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -840,8 +840,8 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
void programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -850,7 +850,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
programHeapsCalled = true;
}
NEO::GraphicsAllocation *getScratchSpaceAllocation() override {
NEO::GraphicsAllocation *getScratchSpaceSlot0Allocation() override {
return scratchAllocation;
}
@@ -895,7 +895,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHeapContainerIsZeroSizeThenNoFunctionIsCalled, Platforms) {
class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater {
public:
using NEO::ScratchSpaceControllerXeHPAndLater::scratchAllocation;
using NEO::ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation;
bool programHeapsCalled = false;
MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex,
NEO::ExecutionEnvironment &environment,
@@ -903,8 +903,8 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
void programHeaps(HeapContainer &heapContainer,
uint32_t scratchSlot,
uint32_t requiredPerThreadScratchSize,
uint32_t requiredPerThreadPrivateScratchSize,
uint32_t requiredPerThreadScratchSizeSlot0,
uint32_t requiredPerThreadScratchSizeSlot1,
TaskCountType currentTaskCount,
OsContext &osContext,
bool &stateBaseAddressDirty,
@@ -937,11 +937,11 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::buffer, nullptr, 0u, 0u, 0u, MemoryPool::system4KBPages, 0u);
auto scratch = static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get());
scratch->scratchAllocation = &graphicsAllocation;
scratch->scratchSlot0Allocation = &graphicsAllocation;
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
EXPECT_FALSE(scratch->programHeapsCalled);
scratch->scratchAllocation = nullptr;
scratch->scratchSlot0Allocation = nullptr;
}
HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorrectlyPatched, IsAtLeastXeHpCore) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -369,15 +369,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT
false,
returnValue));
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u);
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 512u);
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(0u, 1024u);
ASSERT_NE(nullptr, commandQueue);
auto usedSpaceBefore = commandQueue->commandStream.getUsed();
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@@ -395,15 +395,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT
CommandList::fromHandle(commandLists[0])->reset();
CommandList::fromHandle(commandLists[1])->reset();
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u);
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 2048u);
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(0u, 1024u);
ASSERT_NE(nullptr, commandQueue);
usedSpaceBefore = commandQueue->commandStream.getUsed();
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
usedSpaceAfter = commandQueue->commandStream.getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
* Copyright (C) 2022-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -138,7 +138,7 @@ HWTEST2_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledAndRequired
ze_command_list_handle_t commandLists[] = {
CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)->toHandle()};
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096);
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 4096);
CommandList::fromHandle(commandLists[0])->close();
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);

View File

@@ -1560,8 +1560,8 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
auto expectedPrivateSize = 0x200u;
auto &kernelDescriptor = const_cast<KernelDescriptor &>(kernel->getKernelDescriptor());
kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize;
kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = expectedPrivateSize;
kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = expectedSpillSize;
kernelDescriptor.kernelAttributes.privateScratchMemorySize = expectedPrivateSize;
ze_result_t res = kernel->getProperties(&kernelProperties);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
@@ -1580,7 +1580,7 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups);
EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize);
EXPECT_EQ(expectedPrivateSize, kernelProperties.privateMemSize);
EXPECT_EQ(device->getGfxCoreHelper().getKernelPrivateMemSize(kernelDescriptor), kernelProperties.privateMemSize);
EXPECT_EQ(expectedSpillSize, kernelProperties.spillMemSize);
uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE];
@@ -1603,8 +1603,8 @@ HWTEST2_F(KernelPropertiesTests, givenKernelWithPrivateScratchMemoryThenProperPr
auto expectedPrivateSize = 0x200u;
auto &kernelDescriptor = const_cast<KernelDescriptor &>(kernel->getKernelDescriptor());
kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize;
kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = expectedPrivateSize;
kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = expectedSpillSize;
kernelDescriptor.kernelAttributes.privateScratchMemorySize = expectedPrivateSize;
kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0xDEAD;
ze_result_t res = kernel->getProperties(&kernelProperties);