mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 05:24:02 +08:00
Correct linear stream size estimation - dispatch task count post sync
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com> Related-To: NEO-7156
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
38fd01ef41
commit
3223a0bace
@@ -110,8 +110,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
uint32_t numCommandLists,
|
||||
ze_fence_handle_t hFence);
|
||||
inline bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
|
||||
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx,
|
||||
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
|
||||
inline size_t estimateLinearStreamSizeInitial(const CommandListExecutionContext &ctx,
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
uint32_t numCommandLists);
|
||||
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
|
||||
@@ -119,7 +119,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
inline size_t estimateLinearStreamSizeComplementary(CommandListExecutionContext &ctx,
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
uint32_t numCommandLists);
|
||||
inline ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
|
||||
MOCKABLE_VIRTUAL ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize);
|
||||
inline void allocateGlobalFenceAndMakeItResident();
|
||||
inline void allocateWorkPartitionAndMakeItResident();
|
||||
inline void allocateTagsManagerHeapsAndMakeThemResidentIfSWTagsEnabled(NEO::LinearStream &commandStream);
|
||||
|
||||
@@ -120,14 +120,16 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
size_t linearStreamSizeEstimate = this->estimateLinearStreamSizeInitial(ctx, phCommandLists, numCommandLists);
|
||||
|
||||
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
|
||||
|
||||
this->handleScratchSpaceAndUpdateGSBAStateDirtyFlag(ctx);
|
||||
this->setFrontEndStateProperties(ctx);
|
||||
|
||||
linearStreamSizeEstimate += this->estimateLinearStreamSizeComplementary(ctx, phCommandLists, numCommandLists);
|
||||
linearStreamSizeEstimate += this->computePreemptionSize(ctx, phCommandLists, numCommandLists);
|
||||
linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx);
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
|
||||
|
||||
if (ctx.isDispatchTaskCountPostSyncRequired) {
|
||||
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
|
||||
}
|
||||
|
||||
NEO::LinearStream child(nullptr);
|
||||
if (const auto ret = this->makeAlignedChildStreamAndSetGpuBase(child, linearStreamSizeEstimate); ret != ZE_RESULT_SUCCESS) {
|
||||
@@ -546,7 +548,7 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
|
||||
CommandListExecutionContext &ctx,
|
||||
const CommandListExecutionContext &ctx,
|
||||
ze_command_list_handle_t *phCommandLists,
|
||||
uint32_t numCommandLists) {
|
||||
|
||||
|
||||
@@ -377,6 +377,51 @@ HWTEST2_F(CommandQueueCreate, givenGpuHangInReservingLinearStreamWhenExecutingCo
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
struct MockCommandQueueHwEstimateSizeTest : public MockCommandQueueHw<gfxCoreFamily> {
|
||||
|
||||
MockCommandQueueHwEstimateSizeTest(L0::Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc)
|
||||
: MockCommandQueueHw<gfxCoreFamily>(device, csr, desc) {}
|
||||
|
||||
ze_result_t makeAlignedChildStreamAndSetGpuBase(NEO::LinearStream &child, size_t requiredSize) override {
|
||||
requiredSizeCalled = requiredSize;
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
|
||||
bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const override {
|
||||
return dispatchTaskCountPostSyncRequired;
|
||||
}
|
||||
bool dispatchTaskCountPostSyncRequired = false;
|
||||
size_t requiredSizeCalled = 0u;
|
||||
};
|
||||
|
||||
HWTEST2_F(CommandQueueCreate, GivenDispatchTaskCountPostSyncRequiredWhenExecuteCommandListsThenEstimatedSizeIsCorrect, IsAtLeastSkl) {
|
||||
const ze_command_queue_desc_t desc = {};
|
||||
auto commandQueue = new MockCommandQueueHwEstimateSizeTest<gfxCoreFamily>(device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc);
|
||||
commandQueue->initialize(false, false);
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<CommandList>(whiteboxCast(
|
||||
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
commandQueue->dispatchTaskCountPostSyncRequired = false;
|
||||
commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
auto estimatedSizeWithoutBarrier = commandQueue->requiredSizeCalled;
|
||||
|
||||
commandQueue->dispatchTaskCountPostSyncRequired = true;
|
||||
commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
auto estimatedSizeWithtBarrier = commandQueue->requiredSizeCalled;
|
||||
|
||||
auto sizeForBarrier = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getHwInfo(), false);
|
||||
EXPECT_GT(sizeForBarrier, 0u);
|
||||
|
||||
EXPECT_EQ(estimatedSizeWithtBarrier, estimatedSizeWithoutBarrier + sizeForBarrier);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitAndRegularCmdListWhenDispatchTaskCountWriteThenPipeControlFlushed) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
Reference in New Issue
Block a user