Do not allow mixing cooperative and non-cooperative kernels in an execute

When executing command lists, ensure that all lists contain either cooperative
or non-cooperative kernels. When appending kernels to a command list, do not
allow mixing cooperative and non-cooperative kernels.

Related-To: NEO-4940, NEO-5757

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2021-08-02 19:10:52 +00:00
committed by Compute-Runtime-Automation
parent 66ee5bd3c4
commit caf1ab628a
9 changed files with 133 additions and 7 deletions

View File

@@ -207,6 +207,9 @@ struct CommandList : _ze_command_list_handle_t {
bool isInternal() const {
return internalUsage;
}
bool containsCooperativeKernels() const {
return containsCooperativeKernelsFlag;
}
enum CommandListType : uint32_t {
TYPE_REGULAR = 0u,
@@ -252,6 +255,7 @@ struct CommandList : _ze_command_list_handle_t {
UnifiedMemoryControls unifiedMemoryControls;
bool indirectAllocationsAllowed = false;
bool internalUsage = false;
bool containsCooperativeKernelsFlag = false;
NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize);
NEO::GraphicsAllocation *getHostPtrAlloc(const void *buffer, uint64_t bufferSize);
bool containsStatelessUncachedResource = false;

View File

@@ -91,6 +91,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
requiredStreamState = {};
finalStreamState = requiredStreamState;
containsAnyKernel = false;
containsCooperativeKernelsFlag = false;
clearCommandsToPatch();
commandListSLMEnabled = false;

View File

@@ -120,6 +120,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->indirectAllocationsAllowed = true;
}
if (!containsAnyKernel) {
containsCooperativeKernelsFlag = isCooperative;
} else if (containsCooperativeKernelsFlag != isCooperative) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (kernel->usesSyncBuffer()) {
auto retVal = (isCooperative
? programSyncBuffer(*kernel, *device->getNEODevice(), pThreadGroupDimensions)

View File

@@ -189,6 +189,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
kernelDescriptor.kernelMetadata.kernelName.c_str());
}
if (!containsAnyKernel) {
containsCooperativeKernelsFlag = isCooperative;
} else if (containsCooperativeKernelsFlag != isCooperative) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (kernel->usesSyncBuffer()) {
auto retVal = (isCooperative
? programSyncBuffer(*kernel, *neoDevice, pThreadGroupDimensions)

View File

@@ -75,11 +75,17 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto lockCSR = csr->obtainUniqueOwnership();
auto commandListsContainCooperativeKernels = CommandList::fromHandle(phCommandLists[0])->containsCooperativeKernels();
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
if (peekIsCopyOnlyCommandQueue() != commandList->isCopyOnly()) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
if (commandListsContainCooperativeKernels != commandList->containsCooperativeKernels()) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
}
}
size_t spaceForResidency = 0;

View File

@@ -40,6 +40,8 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::commandListPerThreadScratchSize;
using BaseClass::commandListPreemptionMode;
using BaseClass::commandsToPatch;
using BaseClass::containsAnyKernel;
using BaseClass::containsCooperativeKernelsFlag;
using BaseClass::engineGroupType;
using BaseClass::finalStreamState;
using BaseClass::flags;

View File

@@ -827,5 +827,16 @@ HWTEST2_F(CommandListCreate, givenIndirectAccessFlagsAreChangedWhenResetingComma
EXPECT_FALSE(commandList->unifiedMemoryControls.indirectDeviceAllocationsAllowed);
}
HWTEST2_F(CommandListCreate, whenContainsCooperativeKernelsIsCalledThenCorrectValueIsReturned, TestPlatforms) {
for (auto testValue : ::testing::Bool()) {
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::Compute, 0u);
commandList.containsCooperativeKernelsFlag = testValue;
EXPECT_EQ(testValue, commandList.containsCooperativeKernels());
commandList.reset();
EXPECT_FALSE(commandList.containsCooperativeKernels());
}
}
} // namespace ult
} // namespace L0

View File

@@ -1256,23 +1256,26 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto &kernelAttributes = kernel.immutableData.kernelDescriptor->kernelAttributes;
kernelAttributes.flags.usesSyncBuffer = true;
kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
bool isCooperative = true;
result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
auto result = pCommandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
{
VariableBackup<uint32_t> usesSyncBuffer{&kernelAttributes.flags.packed};
usesSyncBuffer = false;
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
@@ -1281,6 +1284,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
uint32_t maximalNumberOfWorkgroupsAllowed;
kernel.suggestMaxCooperativeGroupCount(&maximalNumberOfWorkgroupsAllowed);
groupCountX = maximalNumberOfWorkgroupsAllowed + 1;
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
@@ -1292,9 +1297,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau
}
}
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenRequiredStateAndFinalStateAreCorrectlySet, Platforms) {
HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenRequiredStateAndFinalStateAreCorrectlySet, SklPlusMatcher) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
@@ -1318,5 +1321,54 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenR
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeKernelWhenAppendLaunchCooperativeKernelIsCalledThenCommandListTypeIsProperlySet, SklPlusMatcher) {
createKernel();
kernel->setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
bool isCooperative = false;
auto result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->containsAnyKernel);
EXPECT_FALSE(pCommandList->containsCooperativeKernelsFlag);
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
isCooperative = true;
result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->containsAnyKernel);
EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnError, SklPlusMatcher) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
bool isCooperative = false;
auto result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
isCooperative = true;
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
isCooperative = true;
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
isCooperative = false;
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
} // namespace ult
} // namespace L0

View File

@@ -15,6 +15,8 @@
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
@@ -393,6 +395,42 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsA
}
}
HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNonCooperativeKernelsWhenExecuteCommandListsIsCalledThenErrorIsReturned, IsSklOrAbove) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
auto pCommandQueue = new MockCommandQueueHw<gfxCoreFamily>{device, csr, &desc};
pCommandQueue->initialize(false, false);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
ze_group_count_t threadGroupDimensions{1, 1, 1};
auto pCommandListWithCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandListWithCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u);
pCommandListWithCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, true);
auto pCommandListWithNonCooperativeKernels = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandListWithNonCooperativeKernels->initialize(device, NEO::EngineGroupType::Compute, 0u);
pCommandListWithNonCooperativeKernels->appendLaunchKernelWithParams(&kernel, &threadGroupDimensions, nullptr, false, false, false);
{
ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(),
pCommandListWithNonCooperativeKernels->toHandle()};
auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
}
{
ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(),
pCommandListWithCooperativeKernels->toHandle()};
auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
}
pCommandQueue->destroy();
}
template <typename FamilyType>
void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool preemptionCmdProgramming) {
ze_command_queue_desc_t desc = {};