diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 27dc4d0b06..1b1934b6b3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1869,16 +1869,33 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu } uint64_t gpuAddr = 0; - constexpr uint32_t eventStateClear = static_cast(-1); + constexpr uint32_t eventStateClear = Event::State::STATE_CLEARED; bool dcFlushRequired = false; if (NEO::MemorySynchronizationCommands::isDcFlushAllowed()) { for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); - dcFlushRequired |= (!event->waitScope) ? false : true; + dcFlushRequired |= !!event->waitScope; } } + size_t estimatedBufferSize = 0; + if (dcFlushRequired) { + if (isCopyOnly()) { + estimatedBufferSize += NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); + } else { + estimatedBufferSize += NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl(); + } + } + for (uint32_t i = 0; i < numEvents; i++) { + auto event = Event::fromHandle(phEvent[i]); + uint32_t packetsToWait = event->getPacketsInUse(); + for (uint32_t i = 0u; i < packetsToWait; i++) { + estimatedBufferSize += NEO::EncodeSempahore::getSizeMiSemaphoreWait(); + } + } + increaseCommandStreamSpace(estimatedBufferSize); + if (dcFlushRequired) { if (isCopyOnly()) { NEO::MiFlushArgs args; diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 83ec6a6153..3c1d5ea4f9 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -15,6 +15,8 @@ #include "level_zero/core/source/driver/driver_handle.h" #include +#include + struct _ze_event_handle_t {}; struct _ze_event_pool_handle_t {}; @@ -40,7 +42,7 @@ struct Event : _ze_event_handle_t { virtual ze_result_t queryTimestampsExp(Device *device, uint32_t *pCount, ze_kernel_timestamp_result_t *pTimestamps) = 0; enum State : uint32_t { STATE_SIGNALED = 0u, - STATE_CLEARED = static_cast(-1), + STATE_CLEARED = std::numeric_limits::max(), STATE_INITIAL = STATE_CLEARED }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 33e269288b..e8166f3cb7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -289,5 +289,79 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenCommandListWhenAppendWriteGlobalTim ASSERT_TRUE(postSyncFound); } +HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWaitOnEventThenAllocateNewCommandBuffer) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + + auto consumeSpace = commandList->commandContainer.getCommandStream()->getAvailableSpace(); + consumeSpace -= sizeof(MI_BATCH_BUFFER_END); + commandList->commandContainer.getCommandStream()->getSpace(consumeSpace); + + size_t expectedConsumedSpace = sizeof(MI_SEMAPHORE_WAIT); + if (MemorySynchronizationCommands::isDcFlushAllowed()) { + expectedConsumedSpace += sizeof(PIPE_CONTROL); + } + + const ze_event_desc_t eventDesc = { + ZE_STRUCTURE_TYPE_EVENT_DESC, + nullptr, + 0, + 0, + ZE_EVENT_SCOPE_FLAG_DEVICE}; + + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + ze_event_handle_t hEventHandle = event->toHandle(); + + auto oldCommandBuffer = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); + auto result = commandList->appendWaitOnEvents(1, &hEventHandle); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + auto newCommandBuffer = commandList->commandContainer.getCommandStream()->getGraphicsAllocation(); + + EXPECT_EQ(expectedConsumedSpace, usedSpaceAfter); + EXPECT_NE(oldCommandBuffer, newCommandBuffer); + + auto gpuAddress = event->getGpuAddress(device); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + commandList->commandContainer.getCommandStream()->getCpuBase(), + usedSpaceAfter)); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + if (MemorySynchronizationCommands::isDcFlushAllowed()) { + ASSERT_NE(cmdList.end(), itorPC); + { + auto cmd = genCmdCast(*itorPC); + ASSERT_NE(cmd, nullptr); + + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(), cmd->getDcFlushEnable()); + } + } else { + EXPECT_EQ(cmdList.end(), itorPC); + } + + auto itorSW = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, itorSW.size()); + uint32_t semaphoreWaitsFound = 0; + for (auto it : itorSW) { + auto cmd = genCmdCast(*it); + auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; + + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, + cmd->getCompareOperation()); + EXPECT_EQ(cmd->getSemaphoreDataDword(), std::numeric_limits::max()); + EXPECT_EQ(gpuAddress & addressSpace, cmd->getSemaphoreGraphicsAddress() & addressSpace); + EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, cmd->getWaitMode()); + + semaphoreWaitsFound++; + gpuAddress += event->getSinglePacketSize(); + } + EXPECT_EQ(1u, semaphoreWaitsFound); +} + } // namespace ult } // namespace L0