diff --git a/level_zero/core/source/cmdlist/cmdlist.cpp b/level_zero/core/source/cmdlist/cmdlist.cpp index fb3d126b06..97e02cc3d1 100644 --- a/level_zero/core/source/cmdlist/cmdlist.cpp +++ b/level_zero/core/source/cmdlist/cmdlist.cpp @@ -188,4 +188,11 @@ bool CommandList::setupTimestampEventForMultiTile(Event *signalEvent) { return false; } +void CommandList::synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList) { + for (uint32_t i = 0; i < numWaitEvents; i++) { + Event *event = Event::fromHandle(waitEventList[i]); + event->hostSynchronize(std::numeric_limits::max()); + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index acc74a7c12..96236cbee9 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -291,6 +291,10 @@ struct CommandList : _ze_command_list_handle_t { return prefetchContext; } + bool eventWaitlistSyncRequired() const { + return this->isTbxMode && !this->isSyncModeQueue; + } + ze_context_handle_t hContext = nullptr; std::vector printfKernelContainer; CommandQueue *cmdQImmediate = nullptr; @@ -318,6 +322,7 @@ struct CommandList : _ze_command_list_handle_t { bool getDcFlushRequired(bool externalCondition) const { return externalCondition ? dcFlushSupport : false; } + void synchronizeEventList(uint32_t numWaitEvents, ze_event_handle_t *waitEventList); std::map hostPtrMap; std::vector ownedPrivateAllocations; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index c7447700fd..e2ab5e3c99 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -139,6 +139,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily dependenciesPresent{false}; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 79fb9d96ec..af37a648d3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -250,13 +250,13 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); } - if (waitForEventsFromHost()) { - for (uint32_t i = 0; i < numWaitEvents; i++) { - auto event = Event::fromHandle(phWaitEvents[i]); - event->hostSynchronize(std::numeric_limits::max()); + bool hostWait = waitForEventsFromHost(); + if (hostWait || this->eventWaitlistSyncRequired()) { + this->synchronizeEventList(numWaitEvents, phWaitEvents); + if (hostWait) { + numWaitEvents = 0u; + phWaitEvents = nullptr; } - numWaitEvents = 0u; - phWaitEvents = nullptr; } auto ret = CommandListCoreFamily::appendLaunchKernel(kernelHandle, threadGroupDimensions, hSignalEvent, numWaitEvents, phWaitEvents, @@ -271,6 +271,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelInd if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer, hSignalEvent, numWaitEvents, phWaitEvents); @@ -286,6 +287,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier( if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); @@ -304,6 +306,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } ze_result_t ret; @@ -343,6 +346,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } ze_result_t ret; @@ -379,6 +383,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents); @@ -446,6 +451,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui } if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phWaitEvents); this->dependenciesPresent = true; @@ -459,6 +465,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTime if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); @@ -495,6 +502,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); @@ -512,6 +520,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent, numWaitEvents, phWaitEvents); @@ -530,6 +539,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent, numWaitEvents, phWaitEvents); @@ -546,6 +556,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryRangesBar ze_event_handle_t *phWaitEvents) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, phWaitEvents); } auto ret = CommandListCoreFamily::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents); return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent); @@ -559,6 +570,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchCooperati ze_event_handle_t *waitEventHandles) { if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(); + checkWaitEventsState(numWaitEvents, waitEventHandles); } auto ret = CommandListCoreFamily::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles); return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent); @@ -670,4 +682,11 @@ void *CommandListCoreFamilyImmediate::obtainLockedPtrFromDevice(v return ptrOffset(alloc->getLockedPtr(), offset); } +template +void CommandListCoreFamilyImmediate::checkWaitEventsState(uint32_t numWaitEvents, ze_event_handle_t *waitEventList) { + if (this->eventWaitlistSyncRequired()) { + this->synchronizeEventList(numWaitEvents, waitEventList); + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 175ce08081..f4ba214018 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -158,7 +158,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device } commandList->cmdQImmediate = commandQueue; - commandList->isTbxMode = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX) || (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB); + commandList->isTbxMode = csr->isTbxMode(); commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); commandList->isBcsSplitNeeded = deviceImp->bcsSplit.setupDevice(productFamily, internalUsage, desc, csr); diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index dbaef0fbc1..e815d3f14b 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -17,8 +17,7 @@ template Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { auto neoDevice = device->getNEODevice(); auto csr = neoDevice->getDefaultEngine().commandStreamReceiver; - bool downloadAllocationRequired = (csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX || - csr->getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB); + bool downloadAllocationRequired = csr->isTbxMode(); auto event = new EventImp(eventPool, desc->index, device, downloadAllocationRequired); UNRECOVERABLE_IF(event == nullptr); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index ccec8bd6bc..60378602f7 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -10,6 +10,7 @@ #include "shared/source/os_interface/hw_info_config.h" #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" +#include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "gtest/gtest.h" @@ -21,17 +22,17 @@ void CommandListFixture::setUp() { ze_result_t returnValue; commandList.reset(whiteboxCast(CommandList::create(device->getHwInfo().platform.eProductFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue))); - ze_event_pool_desc_t eventPoolDesc = {}; + ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 2; - ze_event_desc_t eventDesc = {}; + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventDesc.index = 0; eventDesc.wait = 0; eventDesc.signal = 0; - eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); } void CommandListFixture::tearDown() { @@ -62,22 +63,20 @@ void MultiTileCommandListFixtureInit::setUpParams(bool createImmediate, bool cre } ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); - ze_event_pool_desc_t eventPoolDesc = {}; + ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; eventPoolDesc.count = 2; - ze_event_desc_t eventDesc = {}; + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; eventDesc.index = 0; eventDesc.wait = 0; eventDesc.signal = 0; - eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); - event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); } -void ModuleMutableCommandListFixture::setUp(uint32_t revision) { - ModuleImmutableDataFixture::setUp(); - +void ModuleMutableCommandListFixture::setUpImpl(uint32_t revision) { if (revision != 0) { auto revId = NEO::HwInfoConfig::get(device->getHwInfo().platform.eProductFamily)->getHwRevIdFromStepping(revision, device->getHwInfo()); neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = revId; @@ -85,7 +84,7 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) { ze_result_t returnValue; - ze_command_queue_desc_t queueDesc{}; + ze_command_queue_desc_t queueDesc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; queueDesc.ordinal = 0u; queueDesc.index = 0u; queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; @@ -111,6 +110,12 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) { createKernel(kernel.get()); } +void ModuleMutableCommandListFixture::setUp(uint32_t revision) { + ModuleImmutableDataFixture::setUp(); + + ModuleMutableCommandListFixture::setUpImpl(revision); +} + void ModuleMutableCommandListFixture::tearDown() { commandQueue->destroy(); commandList.reset(nullptr); @@ -178,5 +183,21 @@ void CommandListEventUsedPacketSignalFixture::setUp() { CommandListFixture::setUp(); } +void TbxImmediateCommandListFixture::setEvent() { + auto mockEvent = static_cast(event.get()); + + size_t offset = 0; + if (event->isUsingContextEndOffset()) { + offset = event->getContextEndOffset(); + } + void *completionAddress = ptrOffset(mockEvent->hostAddress, offset); + size_t packets = event->getPacketsInUse(); + EventFieldType signaledValue = Event::STATE_SIGNALED; + for (size_t i = 0; i < packets; i++) { + memcpy(completionAddress, &signaledValue, sizeof(EventFieldType)); + completionAddress = ptrOffset(completionAddress, event->getSinglePacketSize()); + } +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 1dc2e47cd6..d551b4b6c2 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -72,6 +72,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture { } void setUp(uint32_t revision); void tearDown(); + void setUpImpl(uint32_t revision); std::unique_ptr mockKernelImmData; std::unique_ptr commandList; @@ -199,5 +200,29 @@ struct CommandListEventUsedPacketSignalFixture : public CommandListFixture { DebugManagerStateRestore restorer; }; +struct TbxImmediateCommandListFixture : public ModuleMutableCommandListFixture { + using EventFieldType = uint32_t; + + template + void setUpT(); + + template + void tearDownT() { + event.reset(nullptr); + eventPool.reset(nullptr); + + ModuleMutableCommandListFixture::tearDown(); + } + + void setEvent(); + + void setUp() {} + void tearDown() {} + + DebugManagerStateRestore restorer; + std::unique_ptr eventPool; + std::unique_ptr event; +}; + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl index e061f24b06..9075d670a4 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl @@ -1103,5 +1103,30 @@ void CmdListLargeGrfFixture::testBody() { } } +template +void TbxImmediateCommandListFixture::setUpT() { + NEO::DebugManager.flags.EnableFlushTaskSubmission.set(1); + + ModuleImmutableDataFixture::setUp(); + + neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX; + ModuleMutableCommandListFixture::setUpImpl(0u); + + ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 2; + + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; + eventDesc.index = 0; + eventDesc.wait = 0; + eventDesc.signal = 0; + + ze_result_t returnValue; + eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + setEvent(); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/event_fixture.h b/level_zero/core/test/unit_tests/fixtures/event_fixture.h index 4fb3f18ac0..85a005bed7 100644 --- a/level_zero/core/test/unit_tests/fixtures/event_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/event_fixture.h @@ -55,8 +55,8 @@ struct EventFixture : public DeviceFixture { std::unique_ptr eventPool = nullptr; std::unique_ptr> event; - ze_event_pool_desc_t eventPoolDesc; - ze_event_desc_t eventDesc = {}; + ze_event_pool_desc_t eventPoolDesc = {ZE_STRUCTURE_TYPE_EVENT_POOL_DESC}; + ze_event_desc_t eventDesc = {ZE_STRUCTURE_TYPE_EVENT_DESC}; }; template diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index c1fbb4e262..69069cea90 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -507,8 +507,10 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm using BaseClass::applyMemoryRangesBarrier; using BaseClass::dcFlushSupport; using BaseClass::dependenciesPresent; + using BaseClass::eventWaitlistSyncRequired; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isSyncModeQueue; + using BaseClass::isTbxMode; ze_result_t executeCommandListImmediate(bool performMigration) override { ++executeCommandListImmediateCalledCount; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index fe3e9b7eae..7d393ad627 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -874,5 +874,25 @@ HWTEST_F(CommandListCreate, givenFlushTaskFlagEnabledAndAsyncCmdQueueWithCopyOnl EXPECT_EQ(false, commandList->isFlushTaskSubmissionEnabled); } +HWTEST2_F(CommandListCreate, givenAllValuesTbxAndSyncModeFlagsWhenCheckingWaitlistEventSyncRequiredThenExpectTrueOnlyForTbxTrueAndAsyncMode, IsAtLeastSkl) { + MockCommandListImmediateHw cmdList; + + cmdList.isSyncModeQueue = true; + cmdList.isTbxMode = false; + EXPECT_FALSE(cmdList.eventWaitlistSyncRequired()); + + cmdList.isSyncModeQueue = true; + cmdList.isTbxMode = true; + EXPECT_FALSE(cmdList.eventWaitlistSyncRequired()); + + cmdList.isSyncModeQueue = false; + cmdList.isTbxMode = false; + EXPECT_FALSE(cmdList.eventWaitlistSyncRequired()); + + cmdList.isSyncModeQueue = false; + cmdList.isTbxMode = true; + EXPECT_TRUE(cmdList.eventWaitlistSyncRequired()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 900e21b1a5..bc31036f53 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -13,7 +13,8 @@ #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" -#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h" +#include "level_zero/core/source/image/image_hw.h" +#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" @@ -571,5 +572,201 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCommandListWhenAppendWaitO EXPECT_FALSE(cmdList.dependenciesPresent); } +using TbxImmediateCommandListTest = Test; + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendLaunchKernelThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + ze_group_count_t group = {1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + commandListImmediate->appendLaunchKernel(kernel->toHandle(), &group, nullptr, 1, &eventHandle, launchParams); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendLaunchKernelIndirectThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + ze_group_count_t group = {1, 1, 1}; + commandListImmediate->appendLaunchKernelIndirect(kernel->toHandle(), &group, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendBarrierThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + commandListImmediate->appendBarrier(nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendMemoryCopyThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + commandListImmediate->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendMemoryCopyRegionThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + ze_copy_region_t dstRegion = {}; + ze_copy_region_t srcRegion = {}; + commandListImmediate->appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendMemoryFillThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + void *dstBuffer = nullptr; + ze_host_mem_alloc_desc_t hostDesc = {}; + context->allocHostMem(&hostDesc, 4096, 4096u, &dstBuffer); + + int one = 1; + commandListImmediate->appendMemoryFill(dstBuffer, reinterpret_cast(&one), sizeof(one), 4096, + nullptr, 1u, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); + + context->freeMem(dstBuffer); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendWaitOnEventsThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + commandListImmediate->appendWaitOnEvents(1u, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendWriteGlobalTimestampThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + auto eventHandle = event->toHandle(); + uint64_t *dstptr = reinterpret_cast(0x12345678555500); + commandListImmediate->appendWriteGlobalTimestamp(dstptr, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendImageCopyRegionThenExpectDownloadAllocations) { + if (!neoDevice->getDeviceInfo().imageSupport) { + GTEST_SKIP(); + } + auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion); + auto mockBuiltinKernel = static_cast *>(kernel); + mockBuiltinKernel->setArgRedescribedImageCallBase = false; + + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + ze_image_desc_t desc = {ZE_STRUCTURE_TYPE_IMAGE_DESC}; + L0::Image *imagePtr; + + auto result = Image::create(neoDevice->getHardwareInfo().platform.eProductFamily, device, &desc, &imagePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + std::unique_ptr imageDst(imagePtr); + + result = Image::create(neoDevice->getHardwareInfo().platform.eProductFamily, device, &desc, &imagePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + std::unique_ptr imageSrc(imagePtr); + + ze_image_region_t srcRegion = {4, 4, 4, 2, 2, 2}; + ze_image_region_t dstRegion = {4, 4, 4, 2, 2, 2}; + + auto eventHandle = event->toHandle(); + commandListImmediate->appendImageCopyRegion(imageDst->toHandle(), imageSrc->toHandle(), &dstRegion, &srcRegion, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendImageCopyFromMemoryThenExpectDownloadAllocations) { + if (!neoDevice->getDeviceInfo().imageSupport) { + GTEST_SKIP(); + } + + auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyBufferToImage3dBytes); + auto mockBuiltinKernel = static_cast *>(kernel); + mockBuiltinKernel->setArgRedescribedImageCallBase = false; + + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + void *ptr = reinterpret_cast(0x1234); + + ze_image_desc_t desc = {ZE_STRUCTURE_TYPE_IMAGE_DESC}; + L0::Image *imagePtr; + auto result = Image::create(neoDevice->getHardwareInfo().platform.eProductFamily, device, &desc, &imagePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + std::unique_ptr image(imagePtr); + + auto eventHandle = event->toHandle(); + commandListImmediate->appendImageCopyFromMemory(imagePtr->toHandle(), ptr, nullptr, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendImageCopyToMemoryThenExpectDownloadAllocations) { + if (!neoDevice->getDeviceInfo().imageSupport) { + GTEST_SKIP(); + } + + auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImage3dToBufferBytes); + auto mockBuiltinKernel = static_cast *>(kernel); + mockBuiltinKernel->setArgRedescribedImageCallBase = false; + + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + void *ptr = reinterpret_cast(0x1234); + + ze_image_desc_t desc = {ZE_STRUCTURE_TYPE_IMAGE_DESC}; + L0::Image *imagePtr; + auto result = Image::create(neoDevice->getHardwareInfo().platform.eProductFamily, device, &desc, &imagePtr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + std::unique_ptr image(imagePtr); + + auto eventHandle = event->toHandle(); + commandListImmediate->appendImageCopyToMemory(ptr, imagePtr->toHandle(), nullptr, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendMemoryRangesBarrierThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + uint32_t numRanges = 1; + const size_t rangeSizes = 1; + const char *ranges[rangeSizes]; + const void **rangesMemory = reinterpret_cast(&ranges[0]); + + auto eventHandle = event->toHandle(); + commandListImmediate->appendMemoryRangesBarrier(numRanges, &rangeSizes, rangesMemory, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + +HWTEST_TEMPLATED_F(TbxImmediateCommandListTest, givenTbxModeOnFlushTaskImmediateAsyncCommandListWhenAppendLaunchCooperativeKernelThenExpectDownloadAllocations) { + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + + ze_group_count_t groupCount{1, 1, 1}; + auto eventHandle = event->toHandle(); + commandListImmediate->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 1, &eventHandle); + + EXPECT_TRUE(ultCsr.downloadAllocationsCalled); +} + } // namespace ult } // namespace L0 diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 02050c3358..7176448336 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -560,7 +560,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { bool flushBatchedSubmissions() override { return true; } - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { return CommandStreamReceiverType::CSR_HW; } diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw.h b/shared/source/command_stream/aub_command_stream_receiver_hw.h index 6f426c75d9..2429bcfc4c 100644 --- a/shared/source/command_stream/aub_command_stream_receiver_hw.h +++ b/shared/source/command_stream/aub_command_stream_receiver_hw.h @@ -99,7 +99,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw &&cmdBuffer); bool initializeTagAllocation(); @@ -385,6 +385,10 @@ class CommandStreamReceiver { return externalCondition ? dcFlushSupport : false; } + bool isTbxMode() const { + return (getType() == NEO::CommandStreamReceiverType::CSR_TBX || getType() == NEO::CommandStreamReceiverType::CSR_TBX_WITH_AUB); + } + protected: void cleanupResources(); void printDeviceIndex(); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index ffb0d1df72..6be8aa1dd4 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -92,7 +92,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void resetKmdNotifyHelper(KmdNotifyHelper *newHelper); - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { return CommandStreamReceiverType::CSR_HW; } diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h index e0d9b8e59f..0b2655b58d 100644 --- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h +++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h @@ -33,7 +33,7 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR { AubSubCaptureStatus checkAndActivateAubSubCapture(const std::string &kernelName) override; void setupContext(OsContext &osContext) override; - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { if (BaseCSR::getType() == CommandStreamReceiverType::CSR_TBX) { return CommandStreamReceiverType::CSR_TBX_WITH_AUB; } diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h index bcfdac3ca1..a16ec1a9d2 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h @@ -95,7 +95,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw allocationsForDownload = {}; - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { return CommandStreamReceiverType::CSR_TBX; } diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 66a854ccde..4a449d60ff 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -354,7 +354,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ BaseClass::ensureCommandBufferAllocation(commandStream, minimumRequiredSize, additionalAllocationSize); } - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { return commandStreamReceiverType; } diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index e99c469e55..4af635d576 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -121,7 +121,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { return commandStreamReceiverType; } diff --git a/shared/test/common/mocks/mock_csr.h b/shared/test/common/mocks/mock_csr.h index 1c17805c55..08f9d6ae5b 100644 --- a/shared/test/common/mocks/mock_csr.h +++ b/shared/test/common/mocks/mock_csr.h @@ -86,7 +86,7 @@ class MockCsrAub : public MockCsrBase { uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : MockCsrBase(execStamp, executionEnvironment, rootDeviceIndex, deviceBitfield) {} - CommandStreamReceiverType getType() override { + CommandStreamReceiverType getType() const override { return CommandStreamReceiverType::CSR_AUB; } }; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 81aeb77359..fc812006a8 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -2435,4 +2435,23 @@ HWTEST_F(CommandStreamReceiverHwTest, whenFlushTagUpdateThenSetStallingCmdsFlag) EXPECT_EQ(SubmissionStatus::SUCCESS, ultCsr.flushTagUpdate()); EXPECT_TRUE(ultCsr.latestFlushedBatchBuffer.hasStallingCmds); -} \ No newline at end of file +} + +HWTEST_F(CommandStreamReceiverHwTest, givenVariousCsrModeWhenGettingTbxModeThenExpectOnlyWhenModeIsTbxOrTbxWithAub) { + auto &ultCsr = pDevice->getUltCommandStreamReceiver(); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; + EXPECT_FALSE(ultCsr.isTbxMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_HW_WITH_AUB; + EXPECT_FALSE(ultCsr.isTbxMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_AUB; + EXPECT_FALSE(ultCsr.isTbxMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX; + EXPECT_TRUE(ultCsr.isTbxMode()); + + ultCsr.commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX_WITH_AUB; + EXPECT_TRUE(ultCsr.isTbxMode()); +}