diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 419f94642b..752d1f7d1a 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -198,6 +198,8 @@ struct CommandList : _ze_command_list_handle_t { return hostPtrMap; }; + virtual ze_result_t setSyncModeQueue(bool syncMode) = 0; + protected: std::map hostPtrMap; uint32_t commandListPerThreadScratchSize = 0u; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 8356dc16e3..637be15bf0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -144,6 +144,10 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t executeCommandListImmediate(bool performMigration) override; size_t getReserveSshSize(); + ze_result_t setSyncModeQueue(bool syncMode) override { + return ZE_RESULT_SUCCESS; + } + protected: MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 1374f0aa6f..7762b72a90 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -89,6 +89,14 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 054fe8d06d..9e999f035a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,8 +8,10 @@ #pragma once #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" +#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" namespace L0 { + template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, @@ -40,12 +42,33 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - - auto ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + bool isTimestampEvent = false; + for (uint32_t i = 0; i < numWaitEvents; i++) { + auto event = Event::fromHandle(phWaitEvents[i]); + isTimestampEvent |= (event->isTimestampEvent) ? true : false; + } + if (hSignalEvent) { + auto signalEvent = Event::fromHandle(hSignalEvent); + isTimestampEvent |= signalEvent->isTimestampEvent; + } + if (isSyncModeQueue || isTimestampEvent) { + auto ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); + if (ret == ZE_RESULT_SUCCESS) { + executeCommandListImmediate(true); + } + return ret; + } else { + auto ret = appendWaitOnEvents(numWaitEvents, phWaitEvents); + if (!hSignalEvent) { + NEO::PipeControlArgs args; + auto cmdQueueImp = static_cast(this->cmdQImmediate); + NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); + csr->flushNonKernelTask(nullptr, 0, 0, args, false, false, false); + } else { + ret = appendSignalEvent(hSignalEvent); + } + return ret; } - return ret; } template @@ -102,19 +125,42 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void template ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hEvent) { - auto ret = CommandListCoreFamily::appendSignalEvent(hEvent); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + auto event = Event::fromHandle(hEvent); + if (isSyncModeQueue || event->isTimestampEvent) { + auto ret = CommandListCoreFamily::appendSignalEvent(hEvent); + if (ret == ZE_RESULT_SUCCESS) { + executeCommandListImmediate(true); + } + return ret; + } else { + NEO::PipeControlArgs args; + args.dcFlushEnable = (!event->signalScope) ? false : true; + auto cmdQueueImp = static_cast(this->cmdQImmediate); + NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); + csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false); + event->updateTaskCountEnabled = true; + return ZE_RESULT_SUCCESS; } - return ret; } + template ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_event_handle_t hEvent) { - auto ret = CommandListCoreFamily::appendEventReset(hEvent); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + auto event = Event::fromHandle(hEvent); + if (isSyncModeQueue || event->isTimestampEvent) { + auto ret = CommandListCoreFamily::appendEventReset(hEvent); + if (ret == ZE_RESULT_SUCCESS) { + executeCommandListImmediate(true); + } + return ret; + } else { + NEO::PipeControlArgs args; + args.dcFlushEnable = (!event->signalScope) ? false : true; + auto cmdQueueImp = static_cast(this->cmdQImmediate); + NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); + csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false); + event->updateTaskCountEnabled = true; + return ZE_RESULT_SUCCESS; } - return ret; } template @@ -128,11 +174,37 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N template ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) { - auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phEvent); - if (ret == ZE_RESULT_SUCCESS) { - executeCommandListImmediate(true); + bool isTimestampEvent = false; + for (uint32_t i = 0; i < numEvents; i++) { + auto event = Event::fromHandle(phEvent[i]); + isTimestampEvent |= (event->isTimestampEvent) ? true : false; + } + if (isSyncModeQueue || isTimestampEvent) { + auto ret = CommandListCoreFamily::appendWaitOnEvents(numEvents, phEvent); + if (ret == ZE_RESULT_SUCCESS) { + executeCommandListImmediate(true); + } + return ret; + } else { + bool dcFlushRequired = false; + for (uint32_t i = 0; i < numEvents; i++) { + auto event = Event::fromHandle(phEvent[i]); + dcFlushRequired |= (!event->waitScope) ? false : true; + } + + auto cmdQueueImp = static_cast(this->cmdQImmediate); + NEO::CommandStreamReceiver *csr = cmdQueueImp->getCsr(); + NEO::PipeControlArgs args; + args.dcFlushEnable = dcFlushRequired; + for (uint32_t i = 0; i < numEvents; i++) { + auto event = Event::fromHandle(phEvent[i]); + bool isStartOfDispatch = (i == 0); + bool isEndOfDispatch = (i == numEvents - 1); + csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, true, isStartOfDispatch, isEndOfDispatch); + event->updateTaskCountEnabled = true; + } + return ZE_RESULT_SUCCESS; } - return ret; } template diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 3b0a04c231..aea6f66595 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -64,8 +64,11 @@ CommandList *CommandList::create(uint32_t productFamily, Device *device, NEO::En if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); commandList = nullptr; + } else { + commandList->setSyncModeQueue(false); } } + return commandList; } @@ -91,7 +94,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device commandList = nullptr; return commandList; } - NEO::CommandStreamReceiver *csr = nullptr; auto deviceImp = static_cast(device); if (internalUsage) { @@ -112,6 +114,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device commandList->cmdQImmediate = commandQueue; commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; commandList->commandListPreemptionMode = device->getDevicePreemptionMode(); + commandList->setSyncModeQueue(desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); return commandList; } diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 889e518e86..e9ce50e9b3 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -268,6 +268,8 @@ ze_result_t EventImp::queryStatusKernelTimestamp() { ze_result_t EventImp::queryStatus() { uint64_t *hostAddr = static_cast(hostAddress); uint32_t queryVal = Event::STATE_CLEARED; + ze_result_t retVal; + if (metricStreamer != nullptr) { *hostAddr = metricStreamer->getNotificationState(); } @@ -276,7 +278,18 @@ ze_result_t EventImp::queryStatus() { return queryStatusKernelTimestamp(); } memcpy_s(static_cast(&queryVal), sizeof(uint32_t), static_cast(hostAddr), sizeof(uint32_t)); - return queryVal == Event::STATE_CLEARED ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; + retVal = (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; + + if (retVal == ZE_RESULT_NOT_READY) { + return retVal; + } + + if (updateTaskCountEnabled) { + this->csr->flushTagUpdate(); + updateTaskCountEnabled = false; + } + + return retVal; } ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { @@ -316,6 +329,11 @@ ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) { UNRECOVERABLE_IF(hostAddr == nullptr); memcpy_s(static_cast(hostAddr), sizeof(uint32_t), static_cast(&eventVal), sizeof(uint32_t)); + if (updateTaskCountEnabled) { + this->csr->flushTagUpdate(); + updateTaskCountEnabled = false; + } + NEO::CpuIntrinsics::clFlush(hostAddr); return ZE_RESULT_SUCCESS; @@ -328,6 +346,7 @@ ze_result_t EventImp::hostSignal() { ze_result_t EventImp::hostSynchronize(uint64_t timeout) { std::chrono::high_resolution_clock::time_point time1, time2; uint64_t timeDiff = 0; + ze_result_t ret = ZE_RESULT_NOT_READY; if (this->csr->getType() == NEO::CommandStreamReceiverType::CSR_AUB) { @@ -342,7 +361,7 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { while (true) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { - return ZE_RESULT_SUCCESS; + return ret; } NEO::WaitUtils::waitFunction(nullptr, 0u); diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 837973a92b..d1ad78501c 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -62,6 +62,7 @@ struct Event : _ze_event_handle_t { ze_event_scope_flags_t signalScope = 0u; ze_event_scope_flags_t waitScope = 0u; bool isTimestampEvent = false; + bool updateTaskCountEnabled = false; std::unique_ptr[]> kernelTimestampsData = nullptr; uint64_t globalStartTS; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 5bb7777a87..88a9dfa0ce 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -317,6 +317,9 @@ struct MockCommandList : public CommandList { (L0::Device * device, NEO::EngineGroupType engineGroupType)); + ADDMETHOD_NOBASE(setSyncModeQueue, ze_result_t, ZE_RESULT_SUCCESS, + (bool syncMode)); + uint8_t *batchBuffer = nullptr; NEO::GraphicsAllocation *mockAllocation = nullptr; }; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 4c5ddb37fd..d40a3f633e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -253,6 +253,358 @@ TEST_F(CommandListCreate, whenCreatingImmediateCommandListThenItHasImmediateComm EXPECT_NE(nullptr, commandList->cmdQImmediate); } +TEST_F(CommandListCreate, whenCreatingImmediateCommandListWithSyncModeThenItHasImmediateCommandQueueCreated) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); +} + +TEST_F(CommandListCreate, whenCreatingImmediateCommandListWithASyncModeThenItHasImmediateCommandQueueCreated) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendSignalEventThenUpdateTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendSignalEvent(event); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendBarrierThenUpdateTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendBarrier(event, 0, nullptr); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); + + commandList->appendBarrier(nullptr, 0, nullptr); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendResetEventThenUpdateTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendEventReset(event); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEventThenUpdateTaskCountNeededFlagIsEnabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = 0; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendSignalEvent(event); + EXPECT_EQ(true, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThenUpdateTaskCountNeededFlagIsEnabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendBarrier(event, 0, nullptr); + EXPECT_EQ(true, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); + + commandList->appendBarrier(nullptr, 0, nullptr); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndCopyEngineAndAppendBarrierThenUpdateTaskCountNeededFlagIsEnabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendBarrier(event, 0, nullptr); + EXPECT_EQ(true, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); + + commandList->appendBarrier(nullptr, 0, nullptr); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsEnabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendEventReset(event); + EXPECT_EQ(true, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); +} + +TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListWithSyncModeThenSuccessIsReturned) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + +TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListWithASyncModeThenSuccessIsReturned) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + auto result = commandList->appendMemoryCopyFromContext(dstPtr, nullptr, srcPtr, 8, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + TEST_F(CommandListCreate, whenInvokingAppendMemoryCopyFromContextForImmediateCommandListThenSuccessIsReturned) { const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; @@ -384,6 +736,35 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListCreate, whenBindlessModeEnabledWhenComman ASSERT_EQ(cmdList.end(), itor); } +HWCMDTEST_F(IGFX_GEN8_CORE, CommandListCreate, whenBindlessModeEnabledWhenCommandListImmediateIsCreatedThenStateBaseAddressCmdsIsNotAdded) { + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.UseBindlessMode.set(1); + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + auto &commandContainer = commandList->commandContainer; + + ASSERT_NE(nullptr, commandContainer.getCommandStream()); + auto usedSpaceBefore = commandContainer.getCommandStream()->getUsed(); + + auto result = commandList->close(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_EQ(cmdList.end(), itor); +} + HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenCreatedThenStateBaseAddressCmdIsNotProgrammed) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -413,6 +794,28 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenSetBarrierThenMiFlus EXPECT_NE(cmdList.end(), itor); } +HWTEST_F(CommandListCreate, givenImmediateCommandListWithCopyOnlyWhenSetBarrierThenMiFlushCmdIsNotInsertedInTheCmdContainer) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + commandList->appendBarrier(nullptr, 0, nullptr); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + + EXPECT_EQ(cmdList.end(), itor); +} + HWTEST_F(CommandListCreate, whenCommandListIsResetThenContainsStatelessUncachedResourceIsSetToFalse) { ze_result_t returnValue; std::unique_ptr commandList(CommandList::create(productFamily, @@ -1034,6 +1437,178 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenP EXPECT_NE(cmdList.end(), itor2); } +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithHostScopeThenPipeControlAndSemWaitAreAddedViaFlushTask) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + MockEvent event, event2; + event.signalScope = 0; + event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; + event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; + ze_event_handle_t events[] = {&event, &event2}; + + auto used = commandContainer.getCommandStream()->getUsed(); + commandList->appendWaitOnEvents(2, events); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), itor); + EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); +} + +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenPipeControlAndSemWaitAreAddedViaFlushTask) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + MockEvent event, event2; + event.signalScope = 0; + event.waitScope = 0; + event2.waitScope = 0; + ze_event_handle_t events[] = {&event, &event2}; + auto event_object = L0::Event::fromHandle(events[0]); + auto event_object2 = L0::Event::fromHandle(events[1]); + + auto used = commandContainer.getCommandStream()->getUsed(); + commandList->appendWaitOnEvents(2, events); + EXPECT_EQ(true, event_object->updateTaskCountEnabled); + EXPECT_EQ(true, event_object2->updateTaskCountEnabled); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), itor); + EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); +} + +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + MockEvent event, event2; + event.signalScope = 0; + event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; + event2.waitScope = 0; + ze_event_handle_t events[] = {&event, &event2}; + + auto used = commandContainer.getCommandStream()->getUsed(); + commandList->appendWaitOnEvents(2, events); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), itor); + EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); +} + +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + MockEvent event, event2; + event.signalScope = 0; + event.waitScope = 0; + event2.waitScope = 0; + ze_event_handle_t events[] = {&event, &event2}; + auto event_object = L0::Event::fromHandle(events[0]); + auto event_object2 = L0::Event::fromHandle(events[1]); + + auto used = commandContainer.getCommandStream()->getUsed(); + commandList->appendWaitOnEvents(2, events); + EXPECT_EQ(true, event_object->updateTaskCountEnabled); + EXPECT_EQ(true, event_object2->updateTaskCountEnabled); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), itor); + EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); +} + +HWTEST_F(CommandListCreate, givenSyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAdded) { + using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 2; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + auto event2 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ze_event_handle_t events[] = {event->toHandle(), event2->toHandle()}; + + auto used = commandContainer.getCommandStream()->getUsed(); + commandList->appendWaitOnEvents(2, events); + EXPECT_EQ(false, event->updateTaskCountEnabled); + EXPECT_EQ(false, event2->updateTaskCountEnabled); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); +} + using Platforms = IsAtLeastProduct; HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCopyOnlyThenCommandsHaveCorrectEventOffsets, Platforms) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 0ad9a26059..417a45c3c7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -852,6 +852,223 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenMemoryCopyRegionWithSi commandList->cmdQImmediate = nullptr; } +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEventWithTimestampThenUpdateTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = 0; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendSignalEvent(event); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThenUpdateTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendBarrier(event, 0, nullptr); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); + + commandList->appendBarrier(nullptr, 0, nullptr); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); +} + +TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventResetThenUpdateTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + eventPool->createEvent(&eventDesc, &event); + + std::unique_ptr event_object(L0::Event::fromHandle(event)); + ASSERT_NE(nullptr, event_object->csr); + ASSERT_EQ(static_cast(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr); + + commandList->appendEventReset(event); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + + auto result = event_object->hostSignal(); + EXPECT_EQ(false, event_object->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(event_object->queryStatus(), ZE_RESULT_SUCCESS); +} + +HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeAndTimeStampThenTaskCountNeededFlagIsDisabled) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(1u, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + auto &commandContainer = commandList->commandContainer; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 2; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + auto event2 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ze_event_handle_t events[] = {event->toHandle(), event2->toHandle()}; + + auto used = commandContainer.getCommandStream()->getUsed(); + commandList->appendWaitOnEvents(2, events); + EXPECT_EQ(false, event->updateTaskCountEnabled); + EXPECT_EQ(false, event2->updateTaskCountEnabled); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed()); +} + +TEST_F(CommandListCreate, givenQueueDescriptionwhenCreatingImmediateCommandListForCopyEnigneThenItHasImmediateCommandQueueCreated) { + auto engines = neoDevice->getEngineGroups(); + uint32_t numaAvailableEngineGroups = 0; + for (uint32_t ordinal = 0; ordinal < static_cast(NEO::EngineGroupType::MaxEngineGroups); ordinal++) { + if (engines[ordinal].size()) { + numaAvailableEngineGroups++; + } + } + for (uint32_t ordinal = 0; ordinal < numaAvailableEngineGroups; ordinal++) { + uint32_t engineGroupIndex = ordinal; + device->mapOrdinalForAvailableEngineGroup(&engineGroupIndex); + for (uint32_t index = 0; index < engines[engineGroupIndex].size(); index++) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + desc.ordinal = ordinal; + desc.index = index; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue)); + ASSERT_NE(nullptr, commandList); + + EXPECT_EQ(device, commandList->device); + EXPECT_EQ(CommandList::CommandListType::TYPE_IMMEDIATE, commandList->cmdListType); + EXPECT_NE(nullptr, commandList->cmdQImmediate); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 3; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + + ze_event_desc_t eventDesc = {}; + eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + auto event1 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + auto event2 = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ze_event_handle_t events[] = {event1->toHandle(), event2->toHandle()}; + + commandList->appendBarrier(nullptr, 0, nullptr); + commandList->appendBarrier(event->toHandle(), 2, events); + EXPECT_EQ(true, event->updateTaskCountEnabled); + EXPECT_EQ(true, event1->updateTaskCountEnabled); + EXPECT_EQ(true, event2->updateTaskCountEnabled); + + auto result = event->hostSignal(); + EXPECT_EQ(false, event->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + result = event1->hostSignal(); + EXPECT_EQ(false, event1->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + result = event2->hostSignal(); + EXPECT_EQ(false, event2->updateTaskCountEnabled); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + } + } +} + using HostPointerManagerCommandListTest = Test; HWTEST2_F(HostPointerManagerCommandListTest, givenImportedHostPointerWhenAppendMemoryFillUsingHostPointerThenAppendFillUsingHostPointerAllocation, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index a4296889a9..e035a33bb0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -103,22 +103,20 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenAppendingEventResetThenEve using SklPlusMatcher = IsAtLeastProduct; HWTEST2_F(CommandListAppendEventReset, givenImmediateCmdlistWhenAppendingEventResetThenCommandsAreExecuted, SklPlusMatcher) { - Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + bool internalEngine = true; - auto commandList = std::make_unique>>(); - ASSERT_NE(nullptr, commandList); - ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute); - ASSERT_EQ(ZE_RESULT_SUCCESS, ret); - commandList->device = device; - commandList->cmdQImmediate = &cmdQueue; - commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + ze_result_t returnValue; + std::unique_ptr commandList0(CommandList::createImmediate(productFamily, + device, + &desc, + internalEngine, + NEO::EngineGroupType::RenderCompute, + returnValue)); + ASSERT_NE(nullptr, commandList0); - EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); - - auto result = commandList->appendEventReset(event->toHandle()); + auto result = commandList0->appendEventReset(event->toHandle()); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - commandList->cmdQImmediate = nullptr; } HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeControlAppendedCorrectly, SklPlusMatcher) { diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index 3b74222fef..04b71a1f85 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -239,6 +239,48 @@ TEST_F(CommandQueueCreate, whenCommandQueueCreatedThenExpectLinearStreamInitiali commandQueue->destroy(); } +HWTEST_F(CommandQueueCreate, givenQueueInAsyncModeAndRugularCmdListWithAppendBarrierThenFlushTaskIsNotUsed) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + ASSERT_NE(nullptr, commandQueue); + + auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue))); + ASSERT_NE(nullptr, commandList); + + commandList->appendBarrier(nullptr, 0, nullptr); + + commandQueue->destroy(); +} + +HWTEST_F(CommandQueueCreate, givenQueueInSyncModeAndRugularCmdListWithAppendBarrierThenFlushTaskIsNotUsed) { + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + ASSERT_NE(nullptr, commandQueue); + + auto commandList = std::unique_ptr(whitebox_cast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue))); + ASSERT_NE(nullptr, commandList); + + commandList->appendBarrier(nullptr, 0, nullptr); + + commandQueue->destroy(); +} + using CommandQueueSBASupport = IsWithinProducts; struct MockMemoryManagerCommandQueueSBA : public MemoryManagerMock { diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index f21fca9714..a4aeb3fc0c 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -455,6 +455,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { TagAllocatorBase *getTimestampPacketAllocator() override { return nullptr; } void flushTagUpdate() override{}; + void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{}; void updateTagFromWait() override{}; bool isMultiOsContextCapable() const override { return false; } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 7d0c3ec878..7915764f5e 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -23,6 +23,7 @@ #include "shared/source/utilities/spinlock.h" #include "csr_properties_flags.h" +#include "pipe_control_args.h" #include #include @@ -211,6 +212,7 @@ class CommandStreamReceiver { virtual uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) = 0; virtual void flushTagUpdate() = 0; + virtual void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool isStartOfDispatch, bool isEndOfDispatch) = 0; virtual void updateTagFromWait() = 0; ScratchSpaceController *getScratchSpaceController() const { diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 8a5308b54a..89d01f7699 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -94,8 +94,12 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled) override; void flushTagUpdate() override; + void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvent, bool isStartOfDispatch, bool isEndOfDispatch) override; void flushMiFlushDW(); + void flushMiFlushDW(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData); void flushPipeControl(); + void flushPipeControl(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args); + void flushSemaphoreWait(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isStartOfDispatch, bool isEndOfDispatch); void flushSmallTask(LinearStream &commandStreamTask, size_t commandStreamStartTask); void flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); @@ -168,6 +172,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { std::unique_ptr>> directSubmission; std::unique_ptr>> blitterDirectSubmission; + + size_t cmdStreamStart = 0; }; } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index bac61eb833..35419c6398 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -32,7 +32,6 @@ #include "shared/source/utilities/tag_allocator.h" #include "command_stream_receiver_hw_ext.inl" -#include "pipe_control_args.h" #include "stream_properties.h" namespace NEO { @@ -1106,6 +1105,19 @@ inline void CommandStreamReceiverHw::flushTagUpdate() { } } +template +void CommandStreamReceiverHw::flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvent, bool isStartOfDispatch, bool isEndOfDispatch) { + if (isWaitOnEvent) { + this->flushSemaphoreWait(eventAlloc, immediateGpuAddress, immediateData, args, isStartOfDispatch, isEndOfDispatch); + } else { + if (this->osContext->getEngineType() == aub_stream::ENGINE_BCS) { + this->flushMiFlushDW(eventAlloc, immediateGpuAddress, immediateData); + } else { + this->flushPipeControl(eventAlloc, immediateGpuAddress, immediateData, args); + } + } +} + template inline void CommandStreamReceiverHw::flushMiFlushDW() { auto lock = obtainUniqueOwnership(); @@ -1116,7 +1128,23 @@ inline void CommandStreamReceiverHw::flushMiFlushDW() { EncodeMiFlushDW::programMiFlushDw(commandStream, tagAllocation->getGpuAddress(), taskCount, false, true); makeResident(*tagAllocation); - makeResident(*commandStream.getGraphicsAllocation()); + + this->flushSmallTask(commandStream, commandStreamStart); +} + +template +void CommandStreamReceiverHw::flushMiFlushDW(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData) { + auto lock = obtainUniqueOwnership(); + + auto &commandStream = getCS(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); + auto commandStreamStart = commandStream.getUsed(); + + if (eventAlloc) { + EncodeMiFlushDW::programMiFlushDw(commandStream, immediateGpuAddress, immediateData, false, true); + makeResident(*eventAlloc); + } else { + EncodeMiFlushDW::programMiFlushDw(commandStream, 0, 0, false, false); + } this->flushSmallTask(commandStream, commandStreamStart); } @@ -1139,11 +1167,70 @@ void CommandStreamReceiverHw::flushPipeControl() { args); makeResident(*tagAllocation); - makeResident(*commandStream.getGraphicsAllocation()); this->flushSmallTask(commandStream, commandStreamStart); } +template +void CommandStreamReceiverHw::flushPipeControl(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args) { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + + auto lock = obtainUniqueOwnership(); + auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForSinglePipeControl()); + auto commandStreamStart = commandStream.getUsed(); + + if (eventAlloc) { + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, + PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + immediateGpuAddress, + immediateData, + peekHwInfo(), + args); + makeResident(*eventAlloc); + } else { + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addPipeControl(commandStream, args); + } + + this->flushSmallTask(commandStream, commandStreamStart); +} + +template +void CommandStreamReceiverHw::flushSemaphoreWait(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isStartOfDispatch, bool isEndOfDispatch) { + auto lock = obtainUniqueOwnership(); + if (isStartOfDispatch && args.dcFlushEnable) { + if (this->osContext->getEngineType() == aub_stream::ENGINE_BCS) { + LinearStream &commandStream = getCS(EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); + cmdStreamStart = commandStream.getUsed(); + EncodeMiFlushDW::programMiFlushDw(commandStream, 0, 0, false, false); + } else { + LinearStream &commandStream = getCS(MemorySynchronizationCommands::getSizeForSinglePipeControl()); + cmdStreamStart = commandStream.getUsed(); + NEO::MemorySynchronizationCommands::addPipeControl(commandStream, args); + } + } + + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + + LinearStream &commandStream = getCS(NEO::EncodeSempahore::getSizeMiSemaphoreWait()); + if (isStartOfDispatch && !args.dcFlushEnable) { + cmdStreamStart = commandStream.getUsed(); + } + + NEO::EncodeSempahore::addMiSemaphoreWaitCommand(commandStream, + immediateGpuAddress, + static_cast(immediateData), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + + makeResident(*eventAlloc); + + if (isEndOfDispatch) { + this->flushSmallTask(commandStream, cmdStreamStart); + cmdStreamStart = 0; + } +} + template void CommandStreamReceiverHw::flushSmallTask(LinearStream &commandStreamTask, size_t commandStreamStartTask) { using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 7aafd706f8..f03e888cf9 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -47,6 +47,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { bool flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override; void flushTagUpdate() override{}; + void flushNonKernelTask(GraphicsAllocation *eventAlloc, uint64_t immediateGpuAddress, uint64_t immediateData, PipeControlArgs &args, bool isWaitOnEvents, bool startOfDispatch, bool endOfDispatch) override{}; void updateTagFromWait() override{}; bool isMultiOsContextCapable() const override { return multiOsContextCapable; }