Optimize ExecuteCommandList.

- reuse vector instead of creating one per every function call.

Change-Id: I584bfbd7757e7b8851ae4970b740adf20659bcb1
Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek 2020-10-16 15:18:09 +02:00 committed by sys_ocldev
parent beeaae0e26
commit 1eefd3743f
3 changed files with 43 additions and 7 deletions

View File

@ -109,12 +109,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
bool directSubmissionEnabled = csr->isDirectSubmissionEnabled();
NEO::ResidencyContainer residencyContainer;
L0::Fence *fence = nullptr;
NEO::HeapContainer heapContainer;
heapContainer.reserve(numCommandLists);
device->activateMetricGroups();
size_t totalCmdBuffers = 0;
@ -359,6 +355,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
this->synchronize(std::numeric_limits<uint64_t>::max());
}
this->residencyContainer.clear();
this->heapContainer.clear();
return ZE_RESULT_SUCCESS;
}

View File

@ -10,6 +10,7 @@
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/submissions_aggregator.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
@ -97,6 +98,8 @@ struct CommandQueueImp : public CommandQueue {
bool frontEndInit = false;
bool gpgpuEnabled = false;
CommandBufferManager buffers;
NEO::ResidencyContainer residencyContainer;
NEO::HeapContainer heapContainer;
};
} // namespace L0

View File

@ -358,14 +358,19 @@ template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandQueue : public L0::CommandQueueHw<gfxCoreFamily> {
public:
using L0::CommandQueueHw<gfxCoreFamily>::CommandQueueHw;
MockCommandQueue(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) : L0::CommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
NEO::HeapContainer heapContainer;
using BaseClass::heapContainer;
using BaseClass::residencyContainer;
NEO::HeapContainer mockHeapContainer;
void handleScratchSpace(NEO::ResidencyContainer &residency,
NEO::HeapContainer &heapContainer,
NEO::ScratchSpaceController *scratchController,
bool &gsbaState, bool &frontEndState) override {
this->heapContainer = heapContainer;
this->mockHeapContainer = heapContainer;
}
void programFrontEnd(uint64_t scratchAddress, NEO::LinearStream &commandStream) override {
@ -396,7 +401,36 @@ HWTEST2_F(CommandQueueDestroy, givenCommandQueueAndCommandListWithSshAndScratchW
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(commandQueue->heapContainer.size(), 3u);
EXPECT_EQ(commandQueue->mockHeapContainer.size(), 3u);
commandQueue->destroy();
commandList->destroy();
alignedFree(alloc);
}
using ExecuteCommandListTests = Test<ContextFixture>;
HWTEST2_F(ExecuteCommandListTests, givenExecuteCommandListWhenItReturnsThenContainersAreEmpty, CommandQueueExecuteTestSupport) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
auto commandQueue = new MockCommandQueue<gfxCoreFamily>(device, csr, &desc);
commandQueue->initialize(false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::Compute);
commandList->commandListPerThreadScratchSize = 100u;
auto commandListHandle = commandList->toHandle();
void *alloc = alignedMalloc(0x100, 0x100);
NEO::GraphicsAllocation graphicsAllocation1(0, NEO::GraphicsAllocation::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u);
NEO::GraphicsAllocation graphicsAllocation2(0, NEO::GraphicsAllocation::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u);
commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation1);
commandList->commandContainer.sshAllocations.push_back(&graphicsAllocation2);
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(0u, commandQueue->residencyContainer.size());
EXPECT_EQ(0u, commandQueue->heapContainer.size());
commandQueue->destroy();
commandList->destroy();
alignedFree(alloc);