mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 04:48:08 +08:00
refactor: create new members for storing spill and private memory in scratch
rename private scratch space into scratch space slot 1 as it can be generic Related-To: NEO-9944 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
45534fe388
commit
87eb5f554a
@@ -184,20 +184,13 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
|
||||
inline ze_command_list_handle_t toHandle() { return this; }
|
||||
|
||||
uint32_t getCommandListPerThreadScratchSize() const {
|
||||
return commandListPerThreadScratchSize;
|
||||
uint32_t getCommandListPerThreadScratchSize(uint32_t slotId) const {
|
||||
return commandListPerThreadScratchSize[slotId];
|
||||
}
|
||||
|
||||
void setCommandListPerThreadScratchSize(uint32_t size) {
|
||||
commandListPerThreadScratchSize = size;
|
||||
}
|
||||
|
||||
uint32_t getCommandListPerThreadPrivateScratchSize() const {
|
||||
return commandListPerThreadPrivateScratchSize;
|
||||
}
|
||||
|
||||
void setCommandListPerThreadPrivateScratchSize(uint32_t size) {
|
||||
commandListPerThreadPrivateScratchSize = size;
|
||||
void setCommandListPerThreadScratchSize(uint32_t slotId, uint32_t size) {
|
||||
UNRECOVERABLE_IF(slotId > 1);
|
||||
commandListPerThreadScratchSize[slotId] = size;
|
||||
}
|
||||
|
||||
uint32_t getCommandListSLMEnable() const {
|
||||
@@ -390,8 +383,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
NEO::HeapAddressModel cmdListHeapAddressModel = NEO::HeapAddressModel::privateHeaps;
|
||||
|
||||
CommandListType cmdListType = CommandListType::typeRegular;
|
||||
uint32_t commandListPerThreadScratchSize = 0u;
|
||||
uint32_t commandListPerThreadPrivateScratchSize = 0u;
|
||||
uint32_t commandListPerThreadScratchSize[2]{};
|
||||
uint32_t partitionCount = 1;
|
||||
uint32_t defaultMocsIndex = 0;
|
||||
|
||||
|
||||
@@ -117,8 +117,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
|
||||
unifiedMemoryControls.indirectDeviceAllocationsAllowed = false;
|
||||
commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
commandListPerThreadScratchSize = 0u;
|
||||
commandListPerThreadPrivateScratchSize = 0u;
|
||||
commandListPerThreadScratchSize[0] = 0u;
|
||||
commandListPerThreadScratchSize[1] = 0u;
|
||||
requiredStreamState.resetState();
|
||||
finalStreamState.resetState();
|
||||
containsAnyKernel = false;
|
||||
|
||||
@@ -198,7 +198,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
|
||||
}
|
||||
}
|
||||
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u));
|
||||
}
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
@@ -259,7 +259,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
|
||||
if (kernelOperation) {
|
||||
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(), this->getCommandListPerThreadPrivateScratchSize());
|
||||
this->csr->setRequiredScratchSizes(this->getCommandListPerThreadScratchSize(0u), this->getCommandListPerThreadScratchSize(1u));
|
||||
|
||||
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
|
||||
ssh = this->csr->getGlobalStatelessHeap();
|
||||
|
||||
@@ -109,9 +109,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
appendEventForProfiling(event, true, false);
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
|
||||
auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(0u),
|
||||
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
|
||||
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
|
||||
this->setCommandListPerThreadScratchSize(0u, perThreadScratchSize);
|
||||
|
||||
auto slmEnable = (kernel->getImmutableData()->getDescriptor().kernelAttributes.slmInlineSize > 0);
|
||||
this->setCommandListSLMEnable(slmEnable);
|
||||
|
||||
@@ -123,10 +123,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
", Group count: ", threadGroupDimensions.groupCountX, ", ", threadGroupDimensions.groupCountY, ", ", threadGroupDimensions.groupCountZ,
|
||||
", SIMD: ", kernelInfo->getMaxSimdSize());
|
||||
|
||||
commandListPerThreadScratchSize = std::max<uint32_t>(commandListPerThreadScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[0]);
|
||||
commandListPerThreadPrivateScratchSize = std::max<uint32_t>(commandListPerThreadPrivateScratchSize, kernelDescriptor.kernelAttributes.perThreadScratchSize[1]);
|
||||
bool needScratchSpace = false;
|
||||
for (uint32_t slotId = 0u; slotId < 2; slotId++) {
|
||||
commandListPerThreadScratchSize[slotId] = std::max<uint32_t>(commandListPerThreadScratchSize[slotId], kernelDescriptor.kernelAttributes.perThreadScratchSize[slotId]);
|
||||
if (commandListPerThreadScratchSize[slotId] > 0) {
|
||||
needScratchSpace = true;
|
||||
}
|
||||
}
|
||||
|
||||
if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && (commandListPerThreadScratchSize != 0 || commandListPerThreadPrivateScratchSize != 0)) {
|
||||
if ((this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) && needScratchSpace) {
|
||||
commandContainer.prepareBindfulSsh();
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -34,7 +34,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
|
||||
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream, bool cachedMOCSAllowed, NEO::StreamProperties *streamProperties);
|
||||
size_t estimateStateBaseAddressCmdSize();
|
||||
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties);
|
||||
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties);
|
||||
|
||||
MOCKABLE_VIRTUAL size_t estimateFrontEndCmdSizeForMultipleCommandLists(bool &isFrontEndStateDirty, int32_t engineInstanced, CommandList *commandList,
|
||||
NEO::StreamProperties &csrState,
|
||||
@@ -51,8 +51,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize,
|
||||
uint32_t perThreadPrivateScratchSize);
|
||||
uint32_t perThreadScratchSpaceSlot0Size,
|
||||
uint32_t perThreadScratchSpaceSlot1Size);
|
||||
|
||||
bool getPreemptionCmdProgramming() override;
|
||||
void patchCommands(CommandList &commandList, uint64_t scratchAddress);
|
||||
@@ -84,8 +84,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
|
||||
NEO::PreemptionMode preemptionMode{};
|
||||
NEO::PreemptionMode statePreemption{};
|
||||
uint32_t perThreadScratchSpaceSize = 0;
|
||||
uint32_t perThreadPrivateScratchSize = 0;
|
||||
uint32_t perThreadScratchSpaceSlot0Size = 0;
|
||||
uint32_t perThreadScratchSpaceSlot1Size = 0;
|
||||
int32_t engineInstanced = -1;
|
||||
UnifiedMemoryControls unifiedMemoryControls{};
|
||||
|
||||
|
||||
@@ -333,14 +333,14 @@ void CommandQueueHw<gfxCoreFamily>::programFrontEndAndClearDirtyFlag(
|
||||
}
|
||||
auto scratchSpaceController = this->csr->getScratchSpaceController();
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSize(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
|
||||
cmdStream,
|
||||
csrState);
|
||||
ctx.frontEndStateDirty = false;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &cmdStream, NEO::StreamProperties &streamProperties) {
|
||||
void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &cmdStream, NEO::StreamProperties &streamProperties) {
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
@@ -349,7 +349,7 @@ void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uin
|
||||
auto pVfeState = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(&cmdStream, hwInfo, engineGroupType);
|
||||
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState,
|
||||
device->getNEODevice()->getRootDeviceEnvironment(),
|
||||
perThreadScratchSpaceSize,
|
||||
perThreadScratchSpaceSlot0Size,
|
||||
scratchAddress,
|
||||
device->getMaxNumHwThreads(),
|
||||
streamProperties);
|
||||
@@ -543,11 +543,11 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
auto &commandContainer = commandList->getCmdContainer();
|
||||
|
||||
if (!isCopyOnlyCommandQueue) {
|
||||
ctx.perThreadScratchSpaceSize = std::max(ctx.perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
|
||||
ctx.perThreadPrivateScratchSize = std::max(ctx.perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
|
||||
ctx.perThreadScratchSpaceSlot0Size = std::max(ctx.perThreadScratchSpaceSlot0Size, commandList->getCommandListPerThreadScratchSize(0u));
|
||||
ctx.perThreadScratchSpaceSlot1Size = std::max(ctx.perThreadScratchSpaceSlot1Size, commandList->getCommandListPerThreadScratchSize(1u));
|
||||
|
||||
if (commandList->getCmdListHeapAddressModel() == NEO::HeapAddressModel::privateHeaps) {
|
||||
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
|
||||
if (commandList->getCommandListPerThreadScratchSize(0u) != 0 || commandList->getCommandListPerThreadScratchSize(1u) != 0) {
|
||||
if (commandContainer.getIndirectHeap(NEO::HeapType::surfaceState) != nullptr) {
|
||||
heapContainer.push_back(commandContainer.getIndirectHeap(NEO::HeapType::surfaceState)->getGraphicsAllocation());
|
||||
}
|
||||
@@ -668,7 +668,7 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpaceAndUpdateGSBAStateDirtyFla
|
||||
handleScratchSpace(this->heapContainer,
|
||||
scratchController,
|
||||
ctx.gsbaStateDirty, ctx.frontEndStateDirty,
|
||||
ctx.perThreadScratchSpaceSize, ctx.perThreadPrivateScratchSize);
|
||||
ctx.perThreadScratchSpaceSlot0Size, ctx.perThreadScratchSpaceSlot1Size);
|
||||
ctx.gsbaStateDirty |= this->csr->getGSBAStateDirty();
|
||||
ctx.scratchGsba = scratchController->calculateNewGSH();
|
||||
|
||||
@@ -1012,7 +1012,7 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStartSecondaryBa
|
||||
auto scratchSpaceController = this->csr->getScratchSpaceController();
|
||||
ctx.cmdListBeginState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(returnPoints[returnPointIdx].configSnapshot.frontEndState);
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSize(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSizeSlot0(),
|
||||
commandStream,
|
||||
ctx.cmdListBeginState);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -122,12 +122,12 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
|
||||
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
|
||||
|
||||
if (perThreadScratchSpaceSize > 0) {
|
||||
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
|
||||
if (perThreadScratchSpaceSlot0Size > 0) {
|
||||
scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSlot0Size, 0u, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
|
||||
auto scratchAllocation = scratchController->getScratchSpaceSlot0Allocation();
|
||||
csr->makeResident(*scratchAllocation);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -137,28 +137,28 @@ template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHeaps,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize, uint32_t perThreadPrivateScratchSize) {
|
||||
if (perThreadScratchSpaceSize > 0 || perThreadPrivateScratchSize > 0) {
|
||||
uint32_t perThreadScratchSpaceSlot0Size, uint32_t perThreadScratchSpaceSlot1Size) {
|
||||
if (perThreadScratchSpaceSlot0Size > 0 || perThreadScratchSpaceSlot1Size > 0) {
|
||||
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
|
||||
auto globalStatelessHeapAllocation = csr->getGlobalStatelessHeapAllocation();
|
||||
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
|
||||
scratchController->setRequiredScratchSpace(globalStatelessHeapAllocation->getUnderlyingBuffer(), 0, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
}
|
||||
if (sshHeaps.size() > 0) {
|
||||
uint32_t offsetIndex = maxPtssIndex * csr->getOsContext().getEngineType() + 1u;
|
||||
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSize, perThreadPrivateScratchSize, csr->peekTaskCount(),
|
||||
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size, csr->peekTaskCount(),
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
}
|
||||
|
||||
auto scratchAllocation = scratchController->getScratchSpaceAllocation();
|
||||
if (scratchAllocation != nullptr) {
|
||||
csr->makeResident(*scratchAllocation);
|
||||
auto scratch0Allocation = scratchController->getScratchSpaceSlot0Allocation();
|
||||
if (scratch0Allocation != nullptr) {
|
||||
csr->makeResident(*scratch0Allocation);
|
||||
}
|
||||
|
||||
auto privateScratchAllocation = scratchController->getPrivateScratchSpaceAllocation();
|
||||
auto scratch1Allocation = scratchController->getScratchSpaceSlot1Allocation();
|
||||
|
||||
if (privateScratchAllocation != nullptr) {
|
||||
csr->makeResident(*privateScratchAllocation);
|
||||
if (scratch1Allocation != nullptr) {
|
||||
csr->makeResident(*scratch1Allocation);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -899,7 +899,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
|
||||
pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize;
|
||||
pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize;
|
||||
pKernelProperties->privateMemSize = gfxCoreHelper.getKernelPrivateMemSize(kernelDescriptor);
|
||||
pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.spillFillScratchMemorySize;
|
||||
memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE);
|
||||
memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -21,11 +21,11 @@ namespace ult {
|
||||
ModuleImmutableDataFixture::MockImmutableMemoryManager::MockImmutableMemoryManager(NEO::ExecutionEnvironment &executionEnvironment) : NEO::MockMemoryManager(const_cast<NEO::ExecutionEnvironment &>(executionEnvironment)) {}
|
||||
|
||||
ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize) : MockImmutableData(perHwThreadPrivateMemorySize, 0, 0) {}
|
||||
ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSize, uint32_t perThreaddPrivateScratchSize) {
|
||||
ModuleImmutableDataFixture::MockImmutableData::MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSlot0Size, uint32_t perThreadScratchSlot1Size) {
|
||||
mockKernelDescriptor = new NEO::KernelDescriptor;
|
||||
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
|
||||
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[0] = perThreadScratchSize;
|
||||
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[1] = perThreaddPrivateScratchSize;
|
||||
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[0] = perThreadScratchSlot0Size;
|
||||
mockKernelDescriptor->kernelAttributes.perThreadScratchSize[1] = perThreadScratchSlot1Size;
|
||||
kernelDescriptor = mockKernelDescriptor;
|
||||
|
||||
mockKernelInfo = new NEO::KernelInfo;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -38,7 +38,7 @@ struct ModuleImmutableDataFixture : public DeviceFixture {
|
||||
using KernelImmutableData::kernelDescriptor;
|
||||
using KernelImmutableData::kernelInfo;
|
||||
MockImmutableData(uint32_t perHwThreadPrivateMemorySize);
|
||||
MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSize, uint32_t perThreaddPrivateScratchSize);
|
||||
MockImmutableData(uint32_t perHwThreadPrivateMemorySize, uint32_t perThreadScratchSlot0Size, uint32_t perThreadScratchSlot1Size);
|
||||
void setDevice(L0::Device *inDevice) {
|
||||
device = inDevice;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -2528,7 +2528,7 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
|
||||
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
|
||||
|
||||
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
|
||||
auto scratchAllocation = scratchSpaceController->getScratchSpaceSlot0Allocation();
|
||||
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
@@ -2582,7 +2582,7 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest,
|
||||
void *scratchSurfaceStateBuffer = ptrOffset(surfaceStateHeapAlloc->getUnderlyingBuffer(), expectedScratchOffset);
|
||||
auto scratchSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(scratchSurfaceStateBuffer);
|
||||
|
||||
auto scratchAllocation = scratchSpaceController->getScratchSpaceAllocation();
|
||||
auto scratchAllocation = scratchSpaceController->getScratchSpaceSlot0Allocation();
|
||||
EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress());
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -611,23 +611,23 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize());
|
||||
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize(0u));
|
||||
|
||||
auto ultCsr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize);
|
||||
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSlot0Size);
|
||||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(CmdlistAppendLaunchKernelTests,
|
||||
givenImmediateCommandListUsesFlushTaskWhenDispatchingKernelWithSpillAndPrivateScratchSpaceThenExpectCsrHasCorrectValuesSet, IsAtLeastXeHpCore) {
|
||||
constexpr uint32_t scratchPerThreadSize = 0x200;
|
||||
constexpr uint32_t privateScratchPerThreadSize = 0x100;
|
||||
constexpr uint32_t scratch0PerThreadSize = 0x200;
|
||||
constexpr uint32_t scratch1PerThreadSize = 0x100;
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
|
||||
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratchPerThreadSize;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = privateScratchPerThreadSize;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratch0PerThreadSize;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = scratch1PerThreadSize;
|
||||
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
|
||||
|
||||
auto kernel = std::make_unique<MockKernel>(module.get());
|
||||
@@ -660,12 +660,12 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(scratchPerThreadSize, commandList->getCommandListPerThreadScratchSize());
|
||||
EXPECT_EQ(privateScratchPerThreadSize, commandList->getCommandListPerThreadPrivateScratchSize());
|
||||
EXPECT_EQ(scratch0PerThreadSize, commandList->getCommandListPerThreadScratchSize(0u));
|
||||
EXPECT_EQ(scratch1PerThreadSize, commandList->getCommandListPerThreadScratchSize(1u));
|
||||
|
||||
auto ultCsr = reinterpret_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_EQ(scratchPerThreadSize, ultCsr->requiredScratchSize);
|
||||
EXPECT_EQ(privateScratchPerThreadSize, ultCsr->requiredPrivateScratchSize);
|
||||
EXPECT_EQ(scratch0PerThreadSize, ultCsr->requiredScratchSlot0Size);
|
||||
EXPECT_EQ(scratch1PerThreadSize, ultCsr->requiredScratchSlot1Size);
|
||||
commandList->cmdQImmediate = nullptr;
|
||||
}
|
||||
|
||||
@@ -674,14 +674,14 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
|
||||
DebugManagerStateRestore restorer;
|
||||
NEO::debugManager.flags.EventWaitOnHost.set(1);
|
||||
|
||||
constexpr uint32_t scratchPerThreadSize = 0x200;
|
||||
constexpr uint32_t privateScratchPerThreadSize = 0x100;
|
||||
constexpr uint32_t scratch0PerThreadSize = 0x200;
|
||||
constexpr uint32_t scratch1PerThreadSize = 0x100;
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
auto kernelDescriptor = mockKernelImmData->kernelDescriptor;
|
||||
kernelDescriptor->kernelAttributes.flags.requiresImplicitArgs = false;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratchPerThreadSize;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = privateScratchPerThreadSize;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = scratch0PerThreadSize;
|
||||
kernelDescriptor->kernelAttributes.perThreadScratchSize[1] = scratch1PerThreadSize;
|
||||
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
|
||||
|
||||
auto kernel = std::make_unique<MockKernel>(module.get());
|
||||
|
||||
@@ -1182,8 +1182,8 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests, givenKernelWithScratchAndPrivateWhenAp
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(commandList->getCommandListPerThreadPrivateScratchSize(), static_cast<uint32_t>(0x100));
|
||||
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(), static_cast<uint32_t>(0x200));
|
||||
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(1u), static_cast<uint32_t>(0x100));
|
||||
EXPECT_EQ(commandList->getCommandListPerThreadScratchSize(0u), static_cast<uint32_t>(0x200));
|
||||
}
|
||||
|
||||
HWTEST2_F(CmdlistAppendLaunchKernelTests, givenGlobalBindlessAllocatorAndKernelWithPrivateScratchWhenAppendLaunchKernelThenCmdContainerHasBindfulSSHAllocated, IsAtLeastXeHpCore) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -2143,7 +2143,7 @@ HWTEST2_F(ImmediateFlushTaskCsrSharedHeapCmdListTest,
|
||||
|
||||
EXPECT_EQ(0u, frontEndCmd->getScratchSpaceBuffer());
|
||||
|
||||
EXPECT_EQ(nullptr, csrImmediate.getScratchSpaceController()->getScratchSpaceAllocation());
|
||||
EXPECT_EQ(nullptr, csrImmediate.getScratchSpaceController()->getScratchSpaceSlot0Allocation());
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x100;
|
||||
|
||||
@@ -2164,7 +2164,7 @@ HWTEST2_F(ImmediateFlushTaskCsrSharedHeapCmdListTest,
|
||||
constexpr size_t expectedScratchOffset = 2 * sizeof(RENDER_SURFACE_STATE);
|
||||
EXPECT_EQ(expectedScratchOffset, frontEndCmd->getScratchSpaceBuffer());
|
||||
|
||||
auto scratchAllocation = csrImmediate.getScratchSpaceController()->getScratchSpaceAllocation();
|
||||
auto scratchAllocation = csrImmediate.getScratchSpaceController()->getScratchSpaceSlot0Allocation();
|
||||
ASSERT_NE(nullptr, scratchAllocation);
|
||||
|
||||
EXPECT_TRUE(csrImmediate.isMadeResident(scratchAllocation));
|
||||
|
||||
@@ -1089,12 +1089,12 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
NEO::ScratchSpaceController *scratchController,
|
||||
bool &gsbaState, bool &frontEndState,
|
||||
uint32_t perThreadScratchSpaceSize,
|
||||
uint32_t perThreadPrivateScratchSize) override {
|
||||
uint32_t perThreadScratchSpaceSlot0Size,
|
||||
uint32_t perThreadScratchSpaceSlot1Size) override {
|
||||
this->mockHeapContainer = heapContainer;
|
||||
}
|
||||
|
||||
void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties) override {
|
||||
void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSlot0Size, NEO::LinearStream &commandStream, NEO::StreamProperties &streamProperties) override {
|
||||
return;
|
||||
}
|
||||
};
|
||||
@@ -1108,7 +1108,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
|
||||
commandQueue->initialize(false, false, false);
|
||||
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
commandList->setCommandListPerThreadScratchSize(100u);
|
||||
commandList->setCommandListPerThreadScratchSize(0u, 100u);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
@@ -1207,7 +1207,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndScratchW
|
||||
commandQueue->initialize(false, false, false);
|
||||
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
commandList->setCommandListPerThreadScratchSize(100u);
|
||||
commandList->setCommandListPerThreadScratchSize(0u, 100u);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
@@ -1234,7 +1234,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndPrivateS
|
||||
commandQueue->initialize(false, false, false);
|
||||
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
commandList->setCommandListPerThreadPrivateScratchSize(100u);
|
||||
commandList->setCommandListPerThreadScratchSize(1u, 100u);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
@@ -1265,7 +1265,7 @@ HWTEST2_F(ExecuteCommandListTests, givenBindlessHelperWhenCommandListIsExecutedO
|
||||
commandQueue->initialize(false, false, false);
|
||||
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
commandList->setCommandListPerThreadScratchSize(100u);
|
||||
commandList->setCommandListPerThreadScratchSize(0u, 100u);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
@@ -1418,10 +1418,10 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
|
||||
returnValue);
|
||||
auto commandList0 = new CommandListCoreFamily<gfxCoreFamily>();
|
||||
commandList0->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
auto commandList1 = new CommandListCoreFamily<gfxCoreFamily>();
|
||||
commandList1->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
@@ -1458,9 +1458,9 @@ HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThen
|
||||
false,
|
||||
returnValue));
|
||||
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList0->setCommandListPerThreadScratchSize(0u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList1->setCommandListPerThreadScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
@@ -1503,17 +1503,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList0->setCommandListPerThreadScratchSize(512u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
commandList1->close();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
|
||||
@@ -1528,10 +1528,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
ASSERT_EQ(1u, gsbaStates.size());
|
||||
|
||||
commandList0->reset();
|
||||
commandList0->setCommandListPerThreadScratchSize(0u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
commandList0->close();
|
||||
commandList1->reset();
|
||||
commandList1->setCommandListPerThreadScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
commandList1->close();
|
||||
|
||||
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
|
||||
@@ -1544,9 +1544,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
usedSpaceAfter = commandQueue1->commandStream.getUsed();
|
||||
|
||||
@@ -1581,17 +1581,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList0->setCommandListPerThreadScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(512u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
commandList1->close();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
|
||||
@@ -1606,10 +1606,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
ASSERT_EQ(2u, gsbaStates.size());
|
||||
|
||||
commandList0->reset();
|
||||
commandList0->setCommandListPerThreadScratchSize(512u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
commandList0->close();
|
||||
commandList1->reset();
|
||||
commandList1->setCommandListPerThreadScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
commandList1->close();
|
||||
|
||||
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
|
||||
@@ -1622,9 +1622,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
usedSpaceAfter = commandQueue1->commandStream.getUsed();
|
||||
|
||||
@@ -1659,17 +1659,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList0->setCommandListPerThreadScratchSize(512u);
|
||||
commandList1->setCommandListPerThreadScratchSize(512u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
commandList1->close();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
|
||||
@@ -1684,10 +1684,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
ASSERT_EQ(1u, gsbaStates.size());
|
||||
|
||||
commandList0->reset();
|
||||
commandList0->setCommandListPerThreadScratchSize(1024u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 1024u);
|
||||
commandList0->close();
|
||||
commandList1->reset();
|
||||
commandList1->setCommandListPerThreadScratchSize(1024u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 1024u);
|
||||
commandList1->close();
|
||||
|
||||
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
|
||||
@@ -1700,9 +1700,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
usedSpaceAfter = commandQueue1->commandStream.getUsed();
|
||||
|
||||
@@ -1737,17 +1737,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList0->setCommandListPerThreadScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(512u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
commandList1->close();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
|
||||
@@ -1762,10 +1762,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
ASSERT_EQ(2u, gsbaStates.size());
|
||||
|
||||
commandList0->reset();
|
||||
commandList0->setCommandListPerThreadScratchSize(1024u);
|
||||
commandList0->setCommandListPerThreadScratchSize(0u, 1024u);
|
||||
commandList0->close();
|
||||
commandList1->reset();
|
||||
commandList1->setCommandListPerThreadScratchSize(2048u);
|
||||
commandList1->setCommandListPerThreadScratchSize(0u, 2048u);
|
||||
commandList1->close();
|
||||
|
||||
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
|
||||
@@ -1777,9 +1777,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
false,
|
||||
returnValue));
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
usedSpaceAfter = commandQueue1->commandStream.getUsed();
|
||||
|
||||
@@ -1813,17 +1813,17 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
returnValue));
|
||||
auto commandList0 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
auto commandList1 = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList0->setCommandListPerThreadPrivateScratchSize(0u);
|
||||
commandList1->setCommandListPerThreadPrivateScratchSize(512u);
|
||||
commandList0->setCommandListPerThreadScratchSize(1u, 0u);
|
||||
commandList1->setCommandListPerThreadScratchSize(1u, 512u);
|
||||
auto commandListHandle0 = commandList0->toHandle();
|
||||
commandList0->close();
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
commandList1->close();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
|
||||
@@ -1836,10 +1836,10 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
ASSERT_EQ(2u, mediaVfeStates.size());
|
||||
|
||||
commandList0->reset();
|
||||
commandList0->setCommandListPerThreadPrivateScratchSize(1024u);
|
||||
commandList0->setCommandListPerThreadScratchSize(1u, 1024u);
|
||||
commandList0->close();
|
||||
commandList1->reset();
|
||||
commandList1->setCommandListPerThreadPrivateScratchSize(2048u);
|
||||
commandList1->setCommandListPerThreadScratchSize(1u, 2048u);
|
||||
commandList1->close();
|
||||
|
||||
auto commandQueue1 = whiteboxCast(CommandQueue::create(productFamily,
|
||||
@@ -1851,9 +1851,9 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
false,
|
||||
returnValue));
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
|
||||
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadPrivateScratchSize());
|
||||
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
usedSpaceAfter = commandQueue1->commandStream.getUsed();
|
||||
|
||||
@@ -1885,7 +1885,7 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledWhenExecutingCmdLi
|
||||
false,
|
||||
returnValue));
|
||||
auto commandList = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList->setCommandListPerThreadPrivateScratchSize(0u);
|
||||
commandList->setCommandListPerThreadScratchSize(1u, 0u);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
@@ -1930,7 +1930,7 @@ HWTEST_F(ExecuteCommandListTests, givenDirectSubmissionEnabledAndDebugFlagSetWhe
|
||||
false,
|
||||
returnValue));
|
||||
auto commandList = std::unique_ptr<CommandList>(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)));
|
||||
commandList->setCommandListPerThreadPrivateScratchSize(0u);
|
||||
commandList->setCommandListPerThreadScratchSize(1u, 0u);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -840,8 +840,8 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
|
||||
|
||||
void programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -850,7 +850,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
|
||||
programHeapsCalled = true;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *getScratchSpaceAllocation() override {
|
||||
NEO::GraphicsAllocation *getScratchSpaceSlot0Allocation() override {
|
||||
return scratchAllocation;
|
||||
}
|
||||
|
||||
@@ -895,7 +895,7 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceThenP
|
||||
HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHeapContainerIsZeroSizeThenNoFunctionIsCalled, Platforms) {
|
||||
class MockScratchSpaceControllerXeHPAndLater : public NEO::ScratchSpaceControllerXeHPAndLater {
|
||||
public:
|
||||
using NEO::ScratchSpaceControllerXeHPAndLater::scratchAllocation;
|
||||
using NEO::ScratchSpaceControllerXeHPAndLater::scratchSlot0Allocation;
|
||||
bool programHeapsCalled = false;
|
||||
MockScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex,
|
||||
NEO::ExecutionEnvironment &environment,
|
||||
@@ -903,8 +903,8 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
|
||||
|
||||
void programHeaps(HeapContainer &heapContainer,
|
||||
uint32_t scratchSlot,
|
||||
uint32_t requiredPerThreadScratchSize,
|
||||
uint32_t requiredPerThreadPrivateScratchSize,
|
||||
uint32_t requiredPerThreadScratchSizeSlot0,
|
||||
uint32_t requiredPerThreadScratchSizeSlot1,
|
||||
TaskCountType currentTaskCount,
|
||||
OsContext &osContext,
|
||||
bool &stateBaseAddressDirty,
|
||||
@@ -937,11 +937,11 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandQueueWhenHandleScratchSpaceAndHe
|
||||
NEO::GraphicsAllocation graphicsAllocation(1u, NEO::AllocationType::buffer, nullptr, 0u, 0u, 0u, MemoryPool::system4KBPages, 0u);
|
||||
|
||||
auto scratch = static_cast<MockScratchSpaceControllerXeHPAndLater *>(scratchController.get());
|
||||
scratch->scratchAllocation = &graphicsAllocation;
|
||||
scratch->scratchSlot0Allocation = &graphicsAllocation;
|
||||
commandQueueHw->handleScratchSpace(heapContainer, scratchController.get(), gsbaStateDirty, frontEndStateDirty, 0x1000, 0u);
|
||||
|
||||
EXPECT_FALSE(scratch->programHeapsCalled);
|
||||
scratch->scratchAllocation = nullptr;
|
||||
scratch->scratchSlot0Allocation = nullptr;
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorrectlyPatched, IsAtLeastXeHpCore) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -369,15 +369,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT
|
||||
false,
|
||||
returnValue));
|
||||
|
||||
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u);
|
||||
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
|
||||
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 512u);
|
||||
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(0u, 1024u);
|
||||
|
||||
ASSERT_NE(nullptr, commandQueue);
|
||||
auto usedSpaceBefore = commandQueue->commandStream.getUsed();
|
||||
|
||||
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
@@ -395,15 +395,15 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsT
|
||||
|
||||
CommandList::fromHandle(commandLists[0])->reset();
|
||||
CommandList::fromHandle(commandLists[1])->reset();
|
||||
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u);
|
||||
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
|
||||
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 2048u);
|
||||
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(0u, 1024u);
|
||||
|
||||
ASSERT_NE(nullptr, commandQueue);
|
||||
usedSpaceBefore = commandQueue->commandStream.getUsed();
|
||||
|
||||
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
|
||||
EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSizeSlot0());
|
||||
|
||||
usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -138,7 +138,7 @@ HWTEST2_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledAndRequired
|
||||
|
||||
ze_command_list_handle_t commandLists[] = {
|
||||
CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false)->toHandle()};
|
||||
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096);
|
||||
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(0u, 4096);
|
||||
CommandList::fromHandle(commandLists[0])->close();
|
||||
|
||||
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
|
||||
|
||||
@@ -1560,8 +1560,8 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
|
||||
auto expectedPrivateSize = 0x200u;
|
||||
|
||||
auto &kernelDescriptor = const_cast<KernelDescriptor &>(kernel->getKernelDescriptor());
|
||||
kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize;
|
||||
kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = expectedPrivateSize;
|
||||
kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = expectedSpillSize;
|
||||
kernelDescriptor.kernelAttributes.privateScratchMemorySize = expectedPrivateSize;
|
||||
|
||||
ze_result_t res = kernel->getProperties(&kernelProperties);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
@@ -1580,7 +1580,7 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
|
||||
EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups);
|
||||
|
||||
EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize);
|
||||
EXPECT_EQ(expectedPrivateSize, kernelProperties.privateMemSize);
|
||||
EXPECT_EQ(device->getGfxCoreHelper().getKernelPrivateMemSize(kernelDescriptor), kernelProperties.privateMemSize);
|
||||
EXPECT_EQ(expectedSpillSize, kernelProperties.spillMemSize);
|
||||
|
||||
uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE];
|
||||
@@ -1603,8 +1603,8 @@ HWTEST2_F(KernelPropertiesTests, givenKernelWithPrivateScratchMemoryThenProperPr
|
||||
auto expectedPrivateSize = 0x200u;
|
||||
|
||||
auto &kernelDescriptor = const_cast<KernelDescriptor &>(kernel->getKernelDescriptor());
|
||||
kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize;
|
||||
kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = expectedPrivateSize;
|
||||
kernelDescriptor.kernelAttributes.spillFillScratchMemorySize = expectedSpillSize;
|
||||
kernelDescriptor.kernelAttributes.privateScratchMemorySize = expectedPrivateSize;
|
||||
kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0xDEAD;
|
||||
|
||||
ze_result_t res = kernel->getProperties(&kernelProperties);
|
||||
|
||||
Reference in New Issue
Block a user