Update isCooperativeDispatchSupported

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2021-08-23 17:42:53 +00:00
committed by Compute-Runtime-Automation
parent 6b5a5d481e
commit 3d6d4acda2
11 changed files with 77 additions and 40 deletions

View File

@@ -1874,8 +1874,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::programSyncBuffer(Kernel &kernel, NEO::Device &device,
const ze_group_count_t *pThreadGroupDimensions) {
auto &hwHelper = NEO::HwHelper::get(device.getHardwareInfo().platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(this->engineGroupType)) {
auto &hwInfo = device.getHardwareInfo();
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (!hwHelper.isCooperativeDispatchSupported(this->engineGroupType, hwInfo)) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

View File

@@ -165,15 +165,17 @@ ze_result_t DeviceImp::getCommandQueueGroupProperties(uint32_t *pCount,
pCommandQueueGroupProperties[engineGroupCount].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_METRICS;
if (hwHelper.isCooperativeDispatchSupported(static_cast<NEO::EngineGroupType>(i))) {
if (hwHelper.isCooperativeDispatchSupported(static_cast<NEO::EngineGroupType>(i), hardwareInfo)) {
pCommandQueueGroupProperties[engineGroupCount].flags |= ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
}
pCommandQueueGroupProperties[engineGroupCount].maxMemoryFillPatternSize = std::numeric_limits<size_t>::max();
}
if (i == static_cast<uint32_t>(NEO::EngineGroupType::Compute)) {
pCommandQueueGroupProperties[engineGroupCount].flags = ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE |
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY |
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY;
if (hwHelper.isCooperativeDispatchSupported(static_cast<NEO::EngineGroupType>(i), hardwareInfo)) {
pCommandQueueGroupProperties[engineGroupCount].flags |= ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS;
}
pCommandQueueGroupProperties[engineGroupCount].maxMemoryFillPatternSize = std::numeric_limits<size_t>::max();
}
if (i == static_cast<uint32_t>(NEO::EngineGroupType::Copy)) {

View File

@@ -1018,12 +1018,17 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
bool isCooperative = true;
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
auto engineGroupType = NEO::EngineGroupType::Compute;
if (hwHelper.isCooperativeEngineSupported(*defaultHwInfo)) {
engineGroupType = hwHelper.getEngineGroupType(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative, *defaultHwInfo);
}
pCommandList->initialize(device, engineGroupType, 0u);
auto result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
pCommandList->initialize(device, engineGroupType, 0u);
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
@@ -1038,10 +1043,10 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
{
VariableBackup<uint32_t> groupCountX{&groupCount.groupCountX};
uint32_t maximalNumberOfWorkgroupsAllowed;
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, NEO::EngineGroupType::Compute, false);
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed, engineGroupType, false);
groupCountX = maximalNumberOfWorkgroupsAllowed + 1;
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
pCommandList->initialize(device, engineGroupType, 0u);
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}

View File

@@ -33,6 +33,10 @@
using ::testing::Return;
namespace NEO {
extern HwHelper *hwHelperFactory[IGFX_MAX_CORE];
} // namespace NEO
namespace L0 {
namespace ult {
@@ -1791,7 +1795,14 @@ TEST_F(DeviceTest, givenValidDeviceWhenCallingReleaseResourcesThenResourcesRelea
EXPECT_TRUE(deviceImp->resourcesReleased);
}
TEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsThenCooperativeKernelsAreSupported) {
HWTEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsThenCooperativeKernelsAreSupported) {
struct MockHwHelper : NEO::HwHelperHw<FamilyType> {
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override {
return isCooperativeDispatchSupportedValue;
}
bool isCooperativeDispatchSupportedValue = true;
};
const uint32_t rootDeviceIndex = 0u;
auto hwInfo = *NEO::defaultHwInfo;
hwInfo.featureTable.ftrCCSNode = true;
@@ -1800,24 +1811,30 @@ TEST_F(DeviceTest, givenCooperativeDispatchSupportedWhenQueryingPropertiesFlagsT
rootDeviceIndex);
Mock<L0::DeviceImp> deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
MockHwHelper hwHelper{};
VariableBackup<HwHelper *> hwHelperFactoryBackup{&NEO::hwHelperFactory[static_cast<size_t>(hwInfo.platform.eRenderCoreFamily)]};
hwHelperFactoryBackup = &hwHelper;
uint32_t count = 0;
ze_result_t res = deviceImp.getCommandQueueGroupProperties(&count, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
NEO::EngineGroupType engineGroupTypes[] = {NEO::EngineGroupType::RenderCompute, NEO::EngineGroupType::Compute};
for (auto engineGroupType : engineGroupTypes) {
auto groupOrdinal = static_cast<size_t>(engineGroupType);
if (groupOrdinal >= count) {
continue;
for (auto isCooperativeDispatchSupported : ::testing::Bool()) {
hwHelper.isCooperativeDispatchSupportedValue = isCooperativeDispatchSupported;
std::vector<ze_command_queue_group_properties_t> properties(count);
res = deviceImp.getCommandQueueGroupProperties(&count, properties.data());
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
for (auto engineGroupType : engineGroupTypes) {
auto groupOrdinal = static_cast<size_t>(engineGroupType);
if (groupOrdinal >= count) {
continue;
}
auto actualValue = NEO::isValueSet(properties[groupOrdinal].flags, ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_EQ(isCooperativeDispatchSupported, actualValue);
}
auto expectedValue = hwHelper.isCooperativeDispatchSupported(engineGroupType);
auto actualValue = NEO::isValueSet(properties[groupOrdinal].flags, ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COOPERATIVE_KERNELS);
EXPECT_EQ(expectedValue, actualValue);
}
}

View File

@@ -5949,7 +5949,7 @@ cl_int CL_API_CALL clEnqueueNDCountKernelINTEL(cl_command_queue commandQueue,
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(),
pCommandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo);
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) {
retVal = CL_INVALID_COMMAND_QUEUE;
return retVal;
}

View File

@@ -1041,9 +1041,6 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
auto engineGroupType = hwHelper.getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(),
commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo);
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
return 0;
}
const auto &kernelDescriptor = kernelInfo.kernelDescriptor;
auto dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;

View File

@@ -311,7 +311,7 @@ TEST_F(clEnqueueNDCountKernelTests, GivenQueueIncapableWhenEnqueuingNDCountKerne
auto &hwHelper = HwHelper::get(::defaultHwInfo->platform.eRenderCoreFamily);
auto engineGroupType = hwHelper.getEngineGroupType(pCommandQueue->getGpgpuEngine().getEngineType(),
pCommandQueue->getGpgpuEngine().getEngineUsage(), *::defaultHwInfo);
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, *::defaultHwInfo)) {
GTEST_SKIP();
}
@@ -342,10 +342,10 @@ TEST_F(EnqueueKernelTest, givenKernelWhenAllArgsAreSetThenClEnqueueNDCountKernel
cl_int retVal = CL_SUCCESS;
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(),
pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo);
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) {
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext;
}
@@ -390,10 +390,10 @@ TEST_F(EnqueueKernelTest, givenKernelWhenNotAllArgsAreSetButSetKernelArgIsCalled
cl_int retVal = CL_SUCCESS;
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(),
pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo);
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) {
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext;
}
@@ -438,10 +438,10 @@ TEST_F(EnqueueKernelTest, givenKernelWhenSetKernelArgIsCalledForEachArgButAtLeas
cl_int retVal = CL_SUCCESS;
CommandQueue *pCmdQ2 = createCommandQueue(pClDevice);
HwHelper &hwHelper = HwHelper::get(pClDevice->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto engineGroupType = hwHelper.getEngineGroupType(pCmdQ2->getGpgpuEngine().getEngineType(),
pCmdQ2->getGpgpuEngine().getEngineUsage(), hardwareInfo);
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType)) {
if (!hwHelper.isCooperativeDispatchSupported(engineGroupType, hardwareInfo)) {
pCmdQ2->getGpgpuEngine().osContext = pCmdQ2->getDevice().getEngine(aub_stream::ENGINE_CCS, EngineUsage::LowPriority).osContext;
}

View File

@@ -16,6 +16,8 @@
#include "opencl/test/unit_test/mocks/mock_platform.h"
#include "test.h"
#include "engine_node.h"
using namespace NEO;
class MockSyncBufferHandler : public SyncBufferHandler {
@@ -69,6 +71,9 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
kernel->executionType = KernelExecutionType::Concurrent;
commandQueue = reinterpret_cast<MockCommandQueue *>(new MockCommandQueueHw<FamilyType>(context, pClDevice, 0));
hwHelper = &HwHelper::get(pClDevice->getHardwareInfo().platform.eRenderCoreFamily);
if (hwHelper->isCooperativeEngineSupported(pClDevice->getHardwareInfo())) {
commandQueue->gpgpuEngine = &pClDevice->getEngine(aub_stream::EngineType::ENGINE_CCS, EngineUsage::Cooperative);
}
}
template <typename FamilyType>
@@ -93,7 +98,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
bool isCooperativeDispatchSupported() {
auto engineGroupType = hwHelper->getEngineGroupType(commandQueue->getGpgpuEngine().getEngineType(),
commandQueue->getGpgpuEngine().getEngineUsage(), hardwareInfo);
return hwHelper->isCooperativeDispatchSupported(engineGroupType);
return hwHelper->isCooperativeDispatchSupported(engineGroupType, pDevice->getHardwareInfo());
}
const cl_uint workDim = 1;
@@ -116,9 +121,10 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurr
EXPECT_EQ(workItemsCount, syncBufferHandler->usedBufferSize);
commandQueue->flush();
EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount(
pDevice->getUltCommandStreamReceiver<FamilyType>().getOsContext().getContextId()),
pDevice->getUltCommandStreamReceiver<FamilyType>().latestSentTaskCount);
auto pCsr = commandQueue->getGpgpuEngine().commandStreamReceiver;
EXPECT_EQ(syncBufferHandler->graphicsAllocation->getTaskCount(pCsr->getOsContext().getContextId()),
static_cast<UltCommandStreamReceiver<FamilyType> *>(pCsr)->latestSentTaskCount);
}
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {

View File

@@ -27,6 +27,7 @@ class MockCommandQueue : public CommandQueue {
using CommandQueue::gpgpuEngine;
using CommandQueue::isCopyOnly;
using CommandQueue::obtainNewTimestampPacketNodes;
using CommandQueue::overrideEngine;
using CommandQueue::queueCapabilities;
using CommandQueue::queueFamilyIndex;
using CommandQueue::queueFamilySelected;

View File

@@ -123,7 +123,8 @@ class HwHelper {
virtual bool useOnlyGlobalTimestamps() const = 0;
virtual bool useSystemMemoryPlacementForISA(const HardwareInfo &hwInfo) const = 0;
virtual bool packedFormatsSupported() const = 0;
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const = 0;
virtual bool isRcsAvailable(const HardwareInfo &hwInfo) const = 0;
virtual bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const = 0;
virtual uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
const HardwareInfo &hwInfo, bool isEngineInstanced) const = 0;
virtual size_t getMaxFillPaternSizeForCopyEngine() const = 0;
@@ -329,7 +330,9 @@ class HwHelperHw : public HwHelper {
bool packedFormatsSupported() const override;
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const override;
bool isRcsAvailable(const HardwareInfo &hwInfo) const override;
bool isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const override;
uint32_t adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
const HardwareInfo &hwInfo, bool isEngineInstanced) const override;

View File

@@ -568,7 +568,12 @@ bool MemorySynchronizationCommands<GfxFamily>::isPipeControlPriorToPipelineSelec
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGroupType engineGroupType) const {
bool HwHelperHw<GfxFamily>::isRcsAvailable(const HardwareInfo &hwInfo) const {
return true;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGroupType engineGroupType, const HardwareInfo &hwInfo) const {
return true;
}