Correct linear stream size estimation - dispatch task count post sync

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-7156
This commit is contained in:
Kamil Kopryk
2022-09-07 05:53:23 +00:00
committed by Compute-Runtime-Automation
parent 38fd01ef41
commit 3223a0bace
3 changed files with 53 additions and 6 deletions

View File

@@ -110,8 +110,8 @@ struct CommandQueueHw : public CommandQueueImp {
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists,
ze_fence_handle_t hFence);
inline bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx,
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
inline size_t estimateLinearStreamSizeInitial(const CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
@@ -119,7 +119,7 @@ struct CommandQueueHw : public CommandQueueImp {
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists);
inline ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
inline void allocateGlobalFenceAndMakeItResident();
inline void allocateWorkPartitionAndMakeItResident();
inline void allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);

View File

@@ -120,14 +120,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists);
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
this->setFrontEndStateProperties(ctx);
linearStreamSizeEstimate += this->estimateLinearStreamSizeComplementary(ctx, phCommandLists, numCommandLists);
linearStreamSizeEstimate += this->computePreemptionSize(ctx, phCommandLists, numCommandLists);
linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx);
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
if (ctx.isDispatchTaskCountPostSyncRequired) {
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
}
NEO::LinearStream child(nullptr);
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
@@ -546,7 +548,7 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
CommandListExecutionContext &ctx,
const CommandListExecutionContext &ctx,
ze_command_list_handle_t *phCommandLists,
uint32_t numCommandLists) {

View File

@@ -377,6 +377,51 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
}
template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandQueueHwEstimateSizeTest : public MockCommandQueueHw<gfxCoreFamily> {
MockCommandQueueHwEstimateSizeTest(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
: MockCommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize) override {
requiredSizeCalled = requiredSize;
return ZE_RESULT_ERROR_DEVICE_LOST;
}
bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const override {
return dispatchTaskCountPostSyncRequired;
}
bool dispatchTaskCountPostSyncRequired = false;
size_t requiredSizeCalled = 0u;
};
HWTEST2_F(CommandQueueCreate, GivenDispatchTaskCountPostSyncRequiredWhenExecuteCommandListsThenEstimatedSizeIsCorrect, IsAtLeastSkl) {
const ze_command_queue_desc_t desc = {};
auto commandQueue = new MockCommandQueueHwEstimateSizeTest<gfxCoreFamily>(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc);
commandQueue->initialize(false, false);
ze_result_t returnValue;
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList);
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
commandQueue->dispatchTaskCountPostSyncRequired = false;
commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
auto estimatedSizeWithoutBarrier = commandQueue->requiredSizeCalled;
commandQueue->dispatchTaskCountPostSyncRequired = true;
commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
auto estimatedSizeWithtBarrier = commandQueue->requiredSizeCalled;
auto sizeForBarrier = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getHwInfo(), false);
EXPECT_GT(sizeForBarrier, 0u);
EXPECT_EQ(estimatedSizeWithtBarrier, estimatedSizeWithoutBarrier + sizeForBarrier);
commandQueue->destroy();
}
HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitAndRegularCmdListWhenDispatchTaskCountWriteThenPipeControlFlushed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;