Update VFE state programming

Treat regular kernels scheduled as cooperative ones as cooperative kernels.
Add debug variable AllowMixingRegularAndCooperativeKernels.

Related-To: NEO-4940

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2021-08-31 16:13:36 +00:00
committed by Compute-Runtime-Automation
parent 5844cbe02b
commit c03620468b
9 changed files with 89 additions and 14 deletions

View File

@@ -222,7 +222,7 @@ struct CommandListCoreFamily : CommandListImp {
bool isCooperative); bool isCooperative);
ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent); ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent);
ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions);
void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable); void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative);
void clearCommandsToPatch(); void clearCommandsToPatch();
void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes,

View File

@@ -2149,7 +2149,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable) { void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily; using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
@@ -2158,12 +2158,12 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo); auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo);
if (!containsAnyKernel) { if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, false, device->getHwInfo()); requiredStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, false, hwInfo);
finalStreamState = requiredStreamState; finalStreamState = requiredStreamState;
containsAnyKernel = true; containsAnyKernel = true;
} }
finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, false, hwInfo); finalStreamState.frontEndState.setProperties(isCooperative, disableOverdispatch, false, hwInfo);
if (finalStreamState.frontEndState.isDirty()) { if (finalStreamState.frontEndState.isDirty()) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType); auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
auto pVfeState = new VFE_STATE_TYPE; auto pVfeState = new VFE_STATE_TYPE;

View File

@@ -120,8 +120,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
this->indirectAllocationsAllowed = true; this->indirectAllocationsAllowed = true;
} }
if (!containsAnyKernel) { if ((!containsAnyKernel) || NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get()) {
containsCooperativeKernelsFlag = isCooperative; containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative);
} else if (containsCooperativeKernelsFlag != isCooperative) { } else if (containsCooperativeKernelsFlag != isCooperative) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} }
@@ -150,7 +150,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u); kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u);
} }
updateStreamProperties(*kernel, false); updateStreamProperties(*kernel, false, isCooperative);
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer,
reinterpret_cast<const void *>(pThreadGroupDimensions), reinterpret_cast<const void *>(pThreadGroupDimensions),

View File

@@ -188,8 +188,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
kernelDescriptor.kernelMetadata.kernelName.c_str(), 0u); kernelDescriptor.kernelMetadata.kernelName.c_str(), 0u);
} }
if (!containsAnyKernel) { if ((!containsAnyKernel) || NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get()) {
containsCooperativeKernelsFlag = isCooperative; containsCooperativeKernelsFlag = (containsCooperativeKernelsFlag || isCooperative);
} else if (containsCooperativeKernelsFlag != isCooperative) { } else if (containsCooperativeKernelsFlag != isCooperative) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT; return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} }
@@ -205,7 +205,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
auto isMultiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(device->getNEODevice()->getDeviceBitfield(), auto isMultiOsContextCapable = NEO::ImplicitScalingHelper::isImplicitScalingEnabled(device->getNEODevice()->getDeviceBitfield(),
!isCooperative); !isCooperative);
updateStreamProperties(*kernel, isMultiOsContextCapable); updateStreamProperties(*kernel, isMultiOsContextCapable, isCooperative);
KernelImp *kernelImp = static_cast<KernelImp *>(kernel); KernelImp *kernelImp = static_cast<KernelImp *>(kernel);
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();

View File

@@ -82,7 +82,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
} }
if (commandListsContainCooperativeKernels != commandList->containsCooperativeKernels()) { if ((commandListsContainCooperativeKernels != commandList->containsCooperativeKernels()) &&
(!NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.get())) {
return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE; return ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE;
} }
} }

View File

@@ -1068,11 +1068,11 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenR
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
int32_t expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*defaultHwInfo); int32_t expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*defaultHwInfo);
pCommandList->updateStreamProperties(kernel, false); pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
pCommandList->updateStreamProperties(kernel, false); pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
} }
@@ -1099,6 +1099,63 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeKernelWhenAppendLaunchC
EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag); EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag);
} }
HWTEST2_F(CommandListAppendLaunchKernel, givenAnyCooperativeKernelAndMixingAllowedWhenAppendLaunchCooperativeKernelIsCalledThenCommandListTypeIsProperlySet, SklAndLaterMatcher) {
DebugManagerStateRestore restorer;
DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
createKernel();
kernel->setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
bool isCooperative = false;
auto result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->containsAnyKernel);
EXPECT_FALSE(pCommandList->containsCooperativeKernelsFlag);
isCooperative = true;
result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->containsAnyKernel);
EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag);
isCooperative = false;
result = pCommandList->appendLaunchKernelWithParams(kernel->toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(pCommandList->containsAnyKernel);
EXPECT_TRUE(pCommandList->containsCooperativeKernelsFlag);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsAndAllowMixingWhenAppendLaunchCooperativeKernelIsCalledThenReturnSuccess, SklAndLaterMatcher) {
DebugManagerStateRestore restorer;
DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
kernel.setGroupSize(4, 1, 1);
ze_group_count_t groupCount{8, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
bool isCooperative = false;
auto result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
isCooperative = true;
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
isCooperative = true;
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
isCooperative = false;
result = pCommandList->appendLaunchKernelWithParams(kernel.toHandle(), &groupCount, nullptr, false, false, isCooperative);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnError, SklAndLaterMatcher) { HWTEST2_F(CommandListAppendLaunchKernel, givenCooperativeAndNonCooperativeKernelsWhenAppendLaunchCooperativeKernelIsCalledThenReturnError, SklAndLaterMatcher) {
Mock<::L0::Kernel> kernel; Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr)); auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));

View File

@@ -430,6 +430,21 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenCommandListsWithCooperativeAndNo
auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false); auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_COMMAND_LIST_TYPE, result);
} }
DebugManagerStateRestore restorer;
DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
{
ze_command_list_handle_t commandLists[] = {pCommandListWithCooperativeKernels->toHandle(),
pCommandListWithNonCooperativeKernels->toHandle()};
auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
{
ze_command_list_handle_t commandLists[] = {pCommandListWithNonCooperativeKernels->toHandle(),
pCommandListWithCooperativeKernels->toHandle()};
auto result = pCommandQueue->executeCommandLists(2, commandLists, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
pCommandQueue->destroy(); pCommandQueue->destroy();
} }

View File

@@ -324,4 +324,5 @@ DoNotFreeResources = 0
OverrideGmmResourceUsageField = -1 OverrideGmmResourceUsageField = -1
LogAllocationType = 0 LogAllocationType = 0
ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0 ProgramAdditionalPipeControlBeforeStateComputeModeCommand = 0
OverrideBufferSuitableForRenderCompression = -1 OverrideBufferSuitableForRenderCompression = -1
AllowMixingRegularAndCooperativeKernels = 0

View File

@@ -71,6 +71,7 @@ DECLARE_DEBUG_VARIABLE(bool, DisableTimestampEvents, false, "Timestamp info will
DECLARE_DEBUG_VARIABLE(bool, EnableResourceTags, false, "Enable resource tagging in GMM") DECLARE_DEBUG_VARIABLE(bool, EnableResourceTags, false, "Enable resource tagging in GMM")
DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path") DECLARE_DEBUG_VARIABLE(bool, EnableFlushTaskSubmission, false, "true: driver uses csr flushTask for immediate submissions, false: driver uses legacy executeCommandList path")
DECLARE_DEBUG_VARIABLE(bool, DoNotFreeResources, false, "true: driver stops freeing resources") DECLARE_DEBUG_VARIABLE(bool, DoNotFreeResources, false, "true: driver stops freeing resources")
DECLARE_DEBUG_VARIABLE(bool, AllowMixingRegularAndCooperativeKernels, false, "true: driver allows mixing regular and cooperative kernels in a single command list and in a single execute")
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing") DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
DECLARE_DEBUG_VARIABLE(std::string, LoadBinarySipFromFile, std::string("unk"), "Select binary file to load SIP kernel raw binary") DECLARE_DEBUG_VARIABLE(std::string, LoadBinarySipFromFile, std::string("unk"), "Select binary file to load SIP kernel raw binary")
DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "-1: disable, 0+: tile mask, each bit corresponds to tile") DECLARE_DEBUG_VARIABLE(int64_t, OverrideMultiStoragePlacement, -1, "-1: disable, 0+: tile mask, each bit corresponds to tile")