Fix to dispatch MEDIA_VFE_STATE with updated scratchSpacePointer & Size

Minor fix to reset scratch space size and indirect params during cmdList reset

Signed-off-by: Vinod Tipparaju <vinod.tipparaju@intel.com>
This commit is contained in:
Vinod Tipparaju
2021-01-15 20:18:38 +05:30
committed by Compute-Runtime-Automation
parent 4e737adb05
commit 494a59c7df
15 changed files with 452 additions and 34 deletions

View File

@@ -147,6 +147,10 @@ struct CommandList : _ze_command_list_handle_t {
return commandListPerThreadScratchSize; return commandListPerThreadScratchSize;
} }
void setCommandListPerThreadScratchSize(uint32_t size) {
commandListPerThreadScratchSize = size;
}
NEO::PreemptionMode getCommandListPreemptionMode() const { NEO::PreemptionMode getCommandListPreemptionMode() const {
return commandListPreemptionMode; return commandListPreemptionMode;
} }

View File

@@ -69,6 +69,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
removeHostPtrAllocations(); removeHostPtrAllocations();
commandContainer.reset(); commandContainer.reset();
containsStatelessUncachedResource = false; containsStatelessUncachedResource = false;
indirectAllocationsAllowed = false;
unifiedMemoryControls.indirectHostAllocationsAllowed = false;
unifiedMemoryControls.indirectSharedAllocationsAllowed = false;
commandListPreemptionMode = device->getDevicePreemptionMode();
commandListPerThreadScratchSize = 0u;
if (!isCopyOnly()) { if (!isCopyOnly()) {
if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) { if (!NEO::ApiSpecificConfig::getBindlessConfiguration()) {

View File

@@ -57,8 +57,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
UNRECOVERABLE_IF(kernel == nullptr); UNRECOVERABLE_IF(kernel == nullptr);
appendEventForProfiling(hEvent, true); appendEventForProfiling(hEvent, true);
const auto functionImmutableData = kernel->getImmutableData(); const auto functionImmutableData = kernel->getImmutableData();
commandListPerThreadScratchSize = std::max<std::uint32_t>(commandListPerThreadScratchSize, auto perThreadScratchSize = std::max<std::uint32_t>(this->getCommandListPerThreadScratchSize(),
kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]); kernel->getImmutableData()->getDescriptor().kernelAttributes.perThreadScratchSize[0]);
this->setCommandListPerThreadScratchSize(perThreadScratchSize);
auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel); auto kernelPreemptionMode = obtainFunctionPreemptionMode(kernel);
commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode); commandListPreemptionMode = std::min(commandListPreemptionMode, kernelPreemptionMode);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2019-2020 Intel Corporation * Copyright (C) 2019-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -54,7 +54,6 @@ struct CommandQueue : _ze_command_queue_handle_t {
} }
protected: protected:
std::atomic<uint32_t> commandQueuePerThreadScratchSize;
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
bool commandQueueDebugCmdsProgrammed = false; bool commandQueueDebugCmdsProgrammed = false;
bool isCopyOnlyCommandQueue = false; bool isCopyOnlyCommandQueue = false;

View File

@@ -37,7 +37,7 @@ struct CommandQueueHw : public CommandQueueImp {
void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream); void programStateBaseAddress(uint64_t gsba, bool useLocalMemoryForIndirectHeap, NEO::LinearStream &commandStream);
size_t estimateStateBaseAddressCmdSize(); size_t estimateStateBaseAddressCmdSize();
MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream); MOCKABLE_VIRTUAL void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream);
size_t estimateFrontEndCmdSize(); size_t estimateFrontEndCmdSize();
size_t estimatePipelineSelect(); size_t estimatePipelineSelect();
@@ -46,7 +46,8 @@ struct CommandQueueHw : public CommandQueueImp {
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::ResidencyContainer &residency, MOCKABLE_VIRTUAL void handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer, NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController, NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState); bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize);
}; };
} // namespace L0 } // namespace L0

View File

@@ -17,7 +17,6 @@
#include "shared/source/device/device.h" #include "shared/source/device/device.h"
#include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/interlocked_max.h"
#include "shared/source/helpers/preamble.h" #include "shared/source/helpers/preamble.h"
#include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/residency_container.h" #include "shared/source/memory_manager/residency_container.h"
@@ -118,6 +117,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
device->activateMetricGroups(); device->activateMetricGroups();
size_t totalCmdBuffers = 0; size_t totalCmdBuffers = 0;
uint32_t perThreadScratchSpaceSize = 0;
for (auto i = 0u; i < numCommandLists; i++) { for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]); auto commandList = CommandList::fromHandle(phCommandLists[i]);
@@ -140,7 +140,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
statePreemption = commandListPreemption; statePreemption = commandListPreemption;
} }
interlockedMax(commandQueuePerThreadScratchSize, commandList->getCommandListPerThreadScratchSize()); if (perThreadScratchSpaceSize < commandList->getCommandListPerThreadScratchSize()) {
perThreadScratchSpaceSize = commandList->getCommandListPerThreadScratchSize();
}
if (commandList->getCommandListPerThreadScratchSize() != 0) { if (commandList->getCommandListPerThreadScratchSize() != 0) {
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
@@ -176,10 +179,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
handleScratchSpace(residencyContainer, handleScratchSpace(residencyContainer,
heapContainer, heapContainer,
scratchSpaceController, scratchSpaceController,
gsbaStateDirty, frontEndStateDirty); gsbaStateDirty, frontEndStateDirty,
perThreadScratchSpaceSize);
gsbaStateDirty |= !gsbaInit; gsbaStateDirty |= !gsbaInit;
frontEndStateDirty |= !frontEndInit; frontEndStateDirty |= csr->getMediaVFEStateDirty();
if (!isCopyOnlyCommandQueue) { if (!isCopyOnlyCommandQueue) {
if (!gpgpuEnabled) { if (!gpgpuEnabled) {
@@ -225,7 +229,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
} }
if (frontEndStateDirty) { if (frontEndStateDirty) {
programFrontEnd(scratchSpaceController->getScratchPatchAddress(), child); programFrontEnd(scratchSpaceController->getScratchPatchAddress(), scratchSpaceController->getPerThreadScratchSpaceSize(), child);
} }
if (gsbaStateDirty) { if (gsbaStateDirty) {
auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT); auto indirectHeap = CommandList::fromHandle(phCommandLists[0])->commandContainer.getIndirectHeap(NEO::HeapType::INDIRECT_OBJECT);
@@ -372,18 +376,18 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream) { void CommandQueueHw<gfxCoreFamily>::programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
UNRECOVERABLE_IF(csr == nullptr); UNRECOVERABLE_IF(csr == nullptr);
NEO::PreambleHelper<GfxFamily>::programVFEState(&commandStream, NEO::PreambleHelper<GfxFamily>::programVFEState(&commandStream,
device->getHwInfo(), device->getHwInfo(),
commandQueuePerThreadScratchSize, perThreadScratchSpaceSize,
scratchAddress, scratchAddress,
device->getMaxNumHwThreads(), device->getMaxNumHwThreads(),
csr->getOsContext().getEngineType(), csr->getOsContext().getEngineType(),
NEO::AdditionalKernelExecInfo::NotApplicable, NEO::AdditionalKernelExecInfo::NotApplicable,
NEO::KernelExecutionType::NotApplicable); NEO::KernelExecutionType::NotApplicable);
frontEndInit = true; csr->setMediaVFEStateDirty(false);
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -110,10 +110,11 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &residency, void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer, NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController, NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState) { bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize) {
if (commandQueuePerThreadScratchSize > 0) { if (perThreadScratchSpaceSize > 0) {
scratchController->setRequiredScratchSpace(nullptr, 0u, commandQueuePerThreadScratchSize, 0u, csr->peekTaskCount(), scratchController->setRequiredScratchSpace(nullptr, 0u, perThreadScratchSpaceSize, 0u, csr->peekTaskCount(),
csr->getOsContext(), gsbaState, frontEndState); csr->getOsContext(), gsbaState, frontEndState);
auto scratchAllocation = scratchController->getScratchSpaceAllocation(); auto scratchAllocation = scratchController->getScratchSpaceAllocation();
residency.push_back(scratchAllocation); residency.push_back(scratchAllocation);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2019-2020 Intel Corporation * Copyright (C) 2019-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -62,7 +62,6 @@ struct CommandQueueImp : public CommandQueue {
CommandQueueImp() = delete; CommandQueueImp() = delete;
CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
: device(device), csr(csr), desc(*desc) { : device(device), csr(csr), desc(*desc) {
std::atomic_init(&commandQueuePerThreadScratchSize, 0u);
} }
ze_result_t destroy() override; ze_result_t destroy() override;
@@ -95,7 +94,6 @@ struct CommandQueueImp : public CommandQueue {
std::atomic<uint32_t> taskCount{0}; std::atomic<uint32_t> taskCount{0};
std::vector<Kernel *> printfFunctionContainer; std::vector<Kernel *> printfFunctionContainer;
bool gsbaInit = false; bool gsbaInit = false;
bool frontEndInit = false;
bool gpgpuEnabled = false; bool gpgpuEnabled = false;
CommandBufferManager buffers; CommandBufferManager buffers;
NEO::ResidencyContainer residencyContainer; NEO::ResidencyContainer residencyContainer;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2019-2020 Intel Corporation * Copyright (C) 2019-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -25,7 +25,6 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::device; using BaseClass::device;
using BaseClass::printfFunctionContainer; using BaseClass::printfFunctionContainer;
using BaseClass::synchronizeByPollingForTaskCount; using BaseClass::synchronizeByPollingForTaskCount;
using CommandQueue::commandQueuePerThreadScratchSize;
using CommandQueue::internalUsage; using CommandQueue::internalUsage;
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr, WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
@@ -78,7 +77,6 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>; using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
using BaseClass::commandStream; using BaseClass::commandStream;
using BaseClass::printfFunctionContainer; using BaseClass::printfFunctionContainer;
using L0::CommandQueue::commandQueuePerThreadScratchSize;
using L0::CommandQueue::internalUsage; using L0::CommandQueue::internalUsage;
MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) { MockCommandQueueHw(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {

View File

@@ -735,11 +735,12 @@ class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
void handleScratchSpace(NEO::ResidencyContainer &residency, void handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer, NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController, NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState) override { bool &gsbaState, bool &frontEndState,
uint32_t perThreadScratchSpaceSize) override {
this->mockHeapContainer = heapContainer; this->mockHeapContainer = heapContainer;
} }
void programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream) override { void programFrontEnd(uint64_t scratchAddress, uint32_t perThreadScratchSpaceSize, NEO::LinearStream &commandStream) override {
return; return;
} }
}; };
@@ -755,7 +756,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndScratchW
commandQueue->initialize(false, false); commandQueue->initialize(false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>(); auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::Compute); commandList->initialize(device, NEO::EngineGroupType::Compute);
commandList->commandListPerThreadScratchSize = 100u; commandList->setCommandListPerThreadScratchSize(100u);
auto commandListHandle = commandList->toHandle(); auto commandListHandle = commandList->toHandle();
void *alloc = alignedMalloc(0x100, 0x100); void *alloc = alignedMalloc(0x100, 0x100);
@@ -784,7 +785,7 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithWhenBindlessEn
commandQueue->initialize(false, false); commandQueue->initialize(false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>(); auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::Compute); commandList->initialize(device, NEO::EngineGroupType::Compute);
commandList->commandListPerThreadScratchSize = 100u; commandList->setCommandListPerThreadScratchSize(100u);
auto commandListHandle = commandList->toHandle(); auto commandListHandle = commandList->toHandle();
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false); commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
@@ -803,7 +804,7 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
commandQueue->initialize(false, false); commandQueue->initialize(false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>(); auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::Compute); commandList->initialize(device, NEO::EngineGroupType::Compute);
commandList->commandListPerThreadScratchSize = 100u; commandList->setCommandListPerThreadScratchSize(100u);
auto commandListHandle = commandList->toHandle(); auto commandListHandle = commandList->toHandle();
void *alloc = alignedMalloc(0x100, 0x100); void *alloc = alignedMalloc(0x100, 0x100);
@@ -823,6 +824,344 @@ HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenConta
alignedFree(alloc); alignedFree(alloc);
} }
HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSDirtyFlagIsSetOnlyOnce, CommandQueueExecuteTestSupport) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ze_result_t returnValue;
auto commandQueue = CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue);
auto commandList0 = new CommandListCoreFamily<gfxCoreFamily>();
commandList0->initialize(device, NEO::EngineGroupType::Compute);
commandList0->setCommandListPerThreadScratchSize(0u);
auto commandList1 = new CommandListCoreFamily<gfxCoreFamily>();
commandList1->initialize(device, NEO::EngineGroupType::Compute);
commandList1->setCommandListPerThreadScratchSize(0u);
auto commandListHandle0 = commandList0->toHandle();
auto commandListHandle1 = commandList1->toHandle();
EXPECT_EQ(true, csr->getMediaVFEStateDirty());
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(false, csr->getMediaVFEStateDirty());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(false, csr->getMediaVFEStateDirty());
commandQueue->destroy();
commandList0->destroy();
commandList1->destroy();
}
using CommandQueueExecuteSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
HWTEST2_F(ExecuteCommandListTests, givenCommandQueueHavingTwoB2BCommandListsThenMVSIsProgrammedOnlyOnce, CommandQueueExecuteSupport) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList0->setCommandListPerThreadScratchSize(0u);
auto commandList1 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList1->setCommandListPerThreadScratchSize(0u);
auto commandListHandle0 = commandList0->toHandle();
auto commandListHandle1 = commandList1->toHandle();
ASSERT_NE(nullptr, commandQueue->commandStream);
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
GenCmdList cmdList1;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList1, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
// We should have only 1 state added
ASSERT_EQ(1u, mediaVfeStates.size());
commandQueue->destroy();
}
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsWithPTSSsetForFirstCmdListThenMVSIsProgrammedOnlyOnce, CommandQueueExecuteSupport) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
auto commandList1 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList0->setCommandListPerThreadScratchSize(512u);
commandList1->setCommandListPerThreadScratchSize(0u);
auto commandListHandle0 = commandList0->toHandle();
auto commandListHandle1 = commandList1->toHandle();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
// We should have only 1 state added
ASSERT_EQ(1u, mediaVfeStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(0u);
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(0u);
auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
usedSpaceAfter = commandQueue1->commandStream->getUsed();
GenCmdList cmdList1;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
// We should have no state added
ASSERT_EQ(0u, mediaVfeStates.size());
commandQueue->destroy();
commandQueue1->destroy();
}
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSsetForSecondCmdListThenMVSIsProgrammedTwice, CommandQueueExecuteSupport) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
auto commandList1 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList0->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(512u);
auto commandListHandle0 = commandList0->toHandle();
auto commandListHandle1 = commandList1->toHandle();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
// We should have 2 states added
ASSERT_EQ(2u, mediaVfeStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(512u);
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(0u);
auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
usedSpaceAfter = commandQueue1->commandStream->getUsed();
GenCmdList cmdList1;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
// We should have no state added
ASSERT_EQ(0u, mediaVfeStates.size());
commandQueue->destroy();
commandQueue1->destroy();
}
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSGrowingThenMVSIsProgrammedTwice, CommandQueueExecuteSupport) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
auto commandList1 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList0->setCommandListPerThreadScratchSize(512u);
commandList1->setCommandListPerThreadScratchSize(512u);
auto commandListHandle0 = commandList0->toHandle();
auto commandListHandle1 = commandList1->toHandle();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
// We should have only 1 state added
ASSERT_EQ(1u, mediaVfeStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(1024u);
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(1024u);
auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
usedSpaceAfter = commandQueue1->commandStream->getUsed();
GenCmdList cmdList1;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
// We should have only 1 state added
ASSERT_EQ(1u, mediaVfeStates.size());
commandQueue->destroy();
commandQueue1->destroy();
}
HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandListsAndWithPTSSUniquePerCmdListThenMVSIsProgrammedOncePerSubmission, CommandQueueExecuteSupport) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
auto commandList0 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
auto commandList1 = std::unique_ptr<CommandList>(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)));
commandList0->setCommandListPerThreadScratchSize(0u);
commandList1->setCommandListPerThreadScratchSize(512u);
auto commandListHandle0 = commandList0->toHandle();
auto commandListHandle1 = commandList1->toHandle();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
// We should have 2 states added
ASSERT_EQ(2u, mediaVfeStates.size());
commandList0->reset();
commandList0->setCommandListPerThreadScratchSize(1024u);
commandList1->reset();
commandList1->setCommandListPerThreadScratchSize(2048u);
auto commandQueue1 = whitebox_cast(CommandQueue::create(productFamily,
device,
csr,
&desc,
false,
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false);
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSpaceSize());
usedSpaceAfter = commandQueue1->commandStream->getUsed();
GenCmdList cmdList1;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList1, ptrOffset(commandQueue1->commandStream->getCpuBase(), 0), usedSpaceAfter));
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
// We should have 2 states added
ASSERT_EQ(2u, mediaVfeStates.size());
commandQueue->destroy();
commandQueue1->destroy();
}
using CommandQueueSynchronizeTest = Test<ContextFixture>; using CommandQueueSynchronizeTest = Test<ContextFixture>;
HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) { HWTEST_F(CommandQueueSynchronizeTest, givenCallToSynchronizeThenCorrectEnableTimeoutAndTimeoutValuesAreUsed) {

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2020 Intel Corporation * Copyright (C) 2020-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -195,5 +195,68 @@ HWTEST_F(CommandQueueExecuteCommandLists, whenExecutingCommandListsThenEndingPip
commandQueue->destroy(); commandQueue->destroy();
} }
using CommandQueueExecuteSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandQueueHaving2CommandListsThenMVSIsProgrammedWithMaxPTSS, CommandQueueExecuteSupport) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
using PARSE = typename FamilyType::PARSE;
ze_command_queue_desc_t desc = {};
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(512u);
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
ASSERT_NE(nullptr, commandQueue->commandStream);
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1024u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList,
ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
usedSpaceAfter));
auto mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList.begin(), cmdList.end());
// We should have only 1 state added
ASSERT_EQ(1u, mediaVfeStates.size());
CommandList::fromHandle(commandLists[0])->reset();
CommandList::fromHandle(commandLists[1])->reset();
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(2048u);
CommandList::fromHandle(commandLists[1])->setCommandListPerThreadScratchSize(1024u);
ASSERT_NE(nullptr, commandQueue->commandStream);
usedSpaceBefore = commandQueue->commandStream->getUsed();
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2048u, neoDevice->getDefaultEngine().commandStreamReceiver->getScratchSpaceController()->getPerThreadScratchSpaceSize());
usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList1;
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList1,
ptrOffset(commandQueue->commandStream->getCpuBase(), 0),
usedSpaceAfter));
mediaVfeStates = findAll<MEDIA_VFE_STATE *>(cmdList1.begin(), cmdList1.end());
// We should have 2 states added
ASSERT_EQ(2u, mediaVfeStates.size());
commandQueue->destroy();
}
} // namespace ult } // namespace ult
} // namespace L0 } // namespace L0

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2020 Intel Corporation * Copyright (C) 2020-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -178,12 +178,12 @@ HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledAndRequiredGsbaWhenCommandListIsE
GTEST_SKIP(); GTEST_SKIP();
} }
commandQueue->commandQueuePerThreadScratchSize = 4096;
auto usedSpaceBefore = commandQueue->commandStream->getUsed(); auto usedSpaceBefore = commandQueue->commandStream->getUsed();
ze_command_list_handle_t commandLists[] = { ze_command_list_handle_t commandLists[] = {
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)->toHandle()}; CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue)->toHandle()};
CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096);
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2018-2020 Intel Corporation * Copyright (C) 2018-2021 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -131,6 +131,7 @@ class CommandStreamReceiver {
void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; } void overrideDispatchPolicy(DispatchMode overrideValue) { this->dispatchMode = overrideValue; }
void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; } void setMediaVFEStateDirty(bool dirty) { mediaVfeStateDirty = dirty; }
bool getMediaVFEStateDirty() { return mediaVfeStateDirty; }
void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize); void setRequiredScratchSizes(uint32_t newRequiredScratchSize, uint32_t newRequiredPrivateScratchSize);
GraphicsAllocation *getScratchAllocation(); GraphicsAllocation *getScratchAllocation();

View File

@@ -49,6 +49,9 @@ class ScratchSpaceController {
virtual uint64_t calculateNewGSH() = 0; virtual uint64_t calculateNewGSH() = 0;
virtual uint64_t getScratchPatchAddress() = 0; virtual uint64_t getScratchPatchAddress() = 0;
inline uint32_t getPerThreadScratchSpaceSize() {
return static_cast<uint32_t>(scratchSizeBytes / computeUnitsUsedForScratch);
}
virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0; virtual void reserveHeap(IndirectHeap::Type heapType, IndirectHeap *&indirectHeap) = 0;
virtual void programHeaps(HeapContainer &heapContainer, virtual void programHeaps(HeapContainer &heapContainer,

View File

@@ -31,7 +31,7 @@ void ScratchSpaceControllerBase::setRequiredScratchSpace(void *sshBaseAddress,
bool &stateBaseAddressDirty, bool &stateBaseAddressDirty,
bool &vfeStateDirty) { bool &vfeStateDirty) {
size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch; size_t requiredScratchSizeInBytes = requiredPerThreadScratchSize * computeUnitsUsedForScratch;
if (requiredScratchSizeInBytes && (!scratchAllocation || scratchSizeBytes < requiredScratchSizeInBytes)) { if (requiredScratchSizeInBytes && (scratchSizeBytes < requiredScratchSizeInBytes)) {
if (scratchAllocation) { if (scratchAllocation) {
scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId()); scratchAllocation->updateTaskCount(currentTaskCount, osContext.getContextId());
csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION); csrAllocationStorage.storeAllocation(std::unique_ptr<GraphicsAllocation>(scratchAllocation), TEMPORARY_ALLOCATION);