From 3d6d4acda2703d6a8fe55cf86ed2c010701a5bde Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Mon, 23 Aug 2021 17:42:53 +0000 Subject: [PATCH] Update isCooperativeDispatchSupported Signed-off-by: Filip Hazubski --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 5 ++- level_zero/core/source/device/device_imp.cpp | 8 ++-- .../test_cmdlist_append_launch_kernel_1.cpp | 13 ++++-- .../unit_tests/sources/device/test_device.cpp | 43 +++++++++++++------ opencl/source/api/api.cpp | 2 +- opencl/source/kernel/kernel.cpp | 3 -- .../command_queue/enqueue_kernel_1_tests.cpp | 14 +++--- .../sync_buffer_handler_tests.cpp | 14 ++++-- .../test/unit_test/mocks/mock_command_queue.h | 1 + shared/source/helpers/hw_helper.h | 7 ++- shared/source/helpers/hw_helper_base.inl | 7 ++- 11 files changed, 77 insertions(+), 40 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 594504a0e6..e60608ae09 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1874,8 +1874,9 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu template ze_result_t CommandListCoreFamily::programSyncBuffer(Kernel &kernel, NEO::Device &device, const ze_group_count_t *pThreadGroupDimensions) { - auto &hwHelper = NEO::HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily); - if (!hwHelper.isCooperativeDispatchSupported(this->engineGroupType)) { + auto &hwInfo = device.getHardwareInfo(); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (!hwHelper.isCooperativeDispatchSupported(this->engineGroupType, hwInfo)) { return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 832baebb90..c28813e7f9 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -165,15 +165,17 @@ ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount, pCommandQueueGroupProperties[engineGroupCount].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY | ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS; - if (hwHelper.isCooperativeDispatchSupported(static_cast(i))) { + if (hwHelper.isCooperativeDispatchSupported(static_cast(i), hardwareInfo)) { pCommandQueueGroupProperties[engineGroupCount].flags |= ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS; } pCommandQueueGroupProperties[engineGroupCount].maxMemoryFillPatternSize = std::numeric_limits::max(); } if (i == static_cast(NEO::EngineGroupType::Compute)) { pCommandQueueGroupProperties[engineGroupCount].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE | - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY | - ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS; + ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY; + if (hwHelper.isCooperativeDispatchSupported(static_cast(i), hardwareInfo)) { + pCommandQueueGroupProperties[engineGroupCount].flags |= ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS; + } pCommandQueueGroupProperties[engineGroupCount].maxMemoryFillPatternSize = std::numeric_limits::max(); } if (i == static_cast(NEO::EngineGroupType::Copy)) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index 9a060dbbc2..135aaa3b2d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -1018,12 +1018,17 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; bool isCooperative = true; auto pCommandList = std::make_unique>>(); - pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + auto engineGroupType = NEO::EngineGroupType::Compute; + if (hwHelper.isCooperativeEngineSupported(*defaultHwInfo)) { + engineGroupType = hwHelper.getEngineGroupType(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative, *defaultHwInfo); + } + pCommandList->initialize(device, engineGroupType, 0u); auto result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); pCommandList = std::make_unique>>(); - pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + pCommandList->initialize(device, engineGroupType, 0u); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -1038,10 +1043,10 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau { VariableBackup groupCountX{&groupCount.groupCountX}; uint32_t maximalNumberOfWorkgroupsAllowed; - kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, NEO::EngineGroupType::Compute, false); + kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, engineGroupType, false); groupCountX = maximalNumberOfWorkgroupsAllowed + 1; pCommandList = std::make_unique>>(); - pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + pCommandList->initialize(device, engineGroupType, 0u); result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } diff --git a/level_zero/core/test/unit_tests/sources/device/test_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_device.cpp index 6b79bcacf3..539a180295 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_device.cpp @@ -33,6 +33,10 @@ using ::testing::Return; +namespace NEO { +extern HwHelper *hwHelperFactory[IGFX_MAX_CORE]; +} // namespace NEO + namespace L0 { namespace ult { @@ -1791,7 +1795,14 @@ TEST_F(DeviceTest, givenValidDeviceWhenCallingReleaseResourcesThenResourcesRelea EXPECT_TRUE(deviceImp->resourcesReleased); } -TEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsThenCooperativeKernelsAreSupported) { +HWTEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsThenCooperativeKernelsAreSupported) { + struct MockHwHelper : NEO::HwHelperHw { + bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override { + return isCooperativeDispatchSupportedValue; + } + bool isCooperativeDispatchSupportedValue = true; + }; + const uint32_t rootDeviceIndex = 0u; auto hwInfo = *NEO::defaultHwInfo; hwInfo.featureTable.ftrCCSNode = true; @@ -1800,24 +1811,30 @@ TEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsT rootDeviceIndex); Mock deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); + MockHwHelper hwHelper{}; + VariableBackup hwHelperFactoryBackup{&NEO::hwHelperFactory[static_cast(hwInfo.platform.eRenderCoreFamily)]}; + hwHelperFactoryBackup = &hwHelper; + uint32_t count = 0; ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr); EXPECT_EQ(ZE_RESULT_SUCCESS, res); - std::vector properties(count); - res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); - EXPECT_EQ(ZE_RESULT_SUCCESS, res); - - auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); NEO::EngineGroupType engineGroupTypes[] = {NEO::EngineGroupType::RenderCompute, NEO::EngineGroupType::Compute}; - for (auto engineGroupType : engineGroupTypes) { - auto groupOrdinal = static_cast(engineGroupType); - if (groupOrdinal >= count) { - continue; + for (auto isCooperativeDispatchSupported : ::testing::Bool()) { + hwHelper.isCooperativeDispatchSupportedValue = isCooperativeDispatchSupported; + + std::vector properties(count); + res = deviceImp.getCommandQueueGroupProperties(&count, properties.data()); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + for (auto engineGroupType : engineGroupTypes) { + auto groupOrdinal = static_cast(engineGroupType); + if (groupOrdinal >= count) { + continue; + } + auto actualValue = NEO::isValueSet(properties[groupOrdinal].flags, ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); + EXPECT_EQ(isCooperativeDispatchSupported, actualValue); } - auto expectedValue = hwHelper.isCooperativeDispatchSupported(engineGroupType); - auto actualValue = NEO::isValueSet(properties[groupOrdinal].flags, ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS); - EXPECT_EQ(expectedValue, actualValue); } } diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index e13d9db6aa..e823ebdb00 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -5949,7 +5949,7 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue, auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(), pCommandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); - if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) { + if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { retVal = CL_INVALID_COMMAND_QUEUE; return retVal; } diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 8b706ed978..4af39bdca4 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1041,9 +1041,6 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local auto engineGroupType = hwHelper.getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); - if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) { - return 0; - } const auto &kernelDescriptor = kernelInfo.kernelDescriptor; auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount; diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index 5a6ab5da58..095bdffe03 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -311,7 +311,7 @@ TEST_F(clEnqueueNDCountKernelTests, GivenQueueIncapableWhenEnqueuingNDCountKerne auto &hwHelper = HwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(), pCommandQueue->getGpgpuEngine().getEngineUsage(), *::defaultHwInfo); - if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) { + if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, *::defaultHwInfo)) { GTEST_SKIP(); } @@ -342,10 +342,10 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily); + HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(), pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo); - if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) { + if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext; } @@ -390,10 +390,10 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily); + HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(), pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo); - if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) { + if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext; } @@ -438,10 +438,10 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas cl_int retVal = CL_SUCCESS; CommandQueue *pCmdQ2 = createCommandQueue(pClDevice); - HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily); + HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(), pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo); - if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) { + if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) { pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext; } diff --git a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp index 98f99500c8..71b43f5dc8 100644 --- a/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/sync_buffer_handler_tests.cpp @@ -16,6 +16,8 @@ #include "opencl/test/unit_test/mocks/mock_platform.h" #include "test.h" +#include "engine_node.h" + using namespace NEO; class MockSyncBufferHandler : public SyncBufferHandler { @@ -69,6 +71,9 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest { kernel->executionType = KernelExecutionType::Concurrent; commandQueue = reinterpret_cast(new MockCommandQueueHw(context, pClDevice, 0)); hwHelper = &HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily); + if (hwHelper->isCooperativeEngineSupported(pClDevice->getHardwareInfo())) { + commandQueue->gpgpuEngine = &pClDevice->getEngine(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative); + } } template @@ -93,7 +98,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest { bool isCooperativeDispatchSupported() { auto engineGroupType = hwHelper->getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(), commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo); - return hwHelper->isCooperativeDispatchSupported(engineGroupType); + return hwHelper->isCooperativeDispatchSupported(engineGroupType, pDevice->getHardwareInfo()); } const cl_uint workDim = 1; @@ -116,9 +121,10 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurr EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize); commandQueue->flush(); - EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount( - pDevice->getUltCommandStreamReceiver().getOsContext().getContextId()), - pDevice->getUltCommandStreamReceiver().latestSentTaskCount); + + auto pCsr = commandQueue->getGpgpuEngine().commandStreamReceiver; + EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount(pCsr->getOsContext().getContextId()), + static_cast *>(pCsr)->latestSentTaskCount); } HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) { diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index a32b46fadd..01b52a747b 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -27,6 +27,7 @@ class MockCommandQueue : public CommandQueue { using CommandQueue::gpgpuEngine; using CommandQueue::isCopyOnly; using CommandQueue::obtainNewTimestampPacketNodes; + using CommandQueue::overrideEngine; using CommandQueue::queueCapabilities; using CommandQueue::queueFamilyIndex; using CommandQueue::queueFamilySelected; diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 0afe9ebf41..0d3669e5e2 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -123,7 +123,8 @@ class HwHelper { virtual bool useOnlyGlobalTimestamps() const = 0; virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0; virtual bool packedFormatsSupported() const = 0; - virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const = 0; + virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0; + virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0; virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType, const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0; virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0; @@ -329,7 +330,9 @@ class HwHelperHw : public HwHelper { bool packedFormatsSupported() const override; - bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const override; + bool isRcsAvailable(const HardwareInfo &hwInfo) const override; + + bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override; uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType, const HardwareInfo &hwInfo, bool isEngineInstanced) const override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 464949824b..9b8b2d1c75 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -568,7 +568,12 @@ bool MemorySynchronizationCommands::isPipeControlPriorToPipelineSelec } template -bool HwHelperHw::isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const { +bool HwHelperHw::isRcsAvailable(const HardwareInfo &hwInfo) const { + return true; +} + +template +bool HwHelperHw::isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const { return true; }