Limit properties update for immediate command list to used in flush task

Related-To: NEO-7701

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-02-07 14:56:40 +00:00
committed by Compute-Runtime-Automation
parent 783df81a44
commit 1740e1e747
8 changed files with 448 additions and 106 deletions

View File

@@ -259,7 +259,8 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
void updateStreamProperties(Kernel &kernel, bool isCooperative);
void updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState);
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative);
void updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative);
void clearCommandsToPatch();
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);
@@ -308,6 +309,9 @@ struct CommandListCoreFamily : CommandListImp {
void dispatchEventRemainingPacketsPostSyncOperation(Event *event);
void dispatchEventPostSyncOperation(Event *event, uint32_t value, bool omitFirstOperation, bool useMax, bool useLastPipeControl);
static constexpr bool cmdListDefaultCoherency = false;
static constexpr bool cmdListDefaultDisableOverdispatch = true;
int64_t currentSurfaceStateBaseAddress = -1;
int64_t currentDynamicStateBaseAddress = -1;
int64_t currentIndirectObjectBaseAddress = -1;

View File

@@ -164,6 +164,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
if (this->isFlushTaskSubmissionEnabled) {
commandContainer.setFlushTaskUsedForImmediate(this->isFlushTaskSubmissionEnabled);
commandContainer.setNumIddPerBlock(1);
requiredStreamState.stateComputeMode.setPropertiesCoherencyDevicePreemption(cmdListDefaultCoherency, this->device->getNEODevice()->getPreemptionMode(), rootDeviceEnvironment);
requiredStreamState.frontEndState.setPropertyDisableOverdispatch(cmdListDefaultDisableOverdispatch, rootDeviceEnvironment);
}
if (this->immediateCmdListHeapSharing) {
@@ -2341,69 +2344,72 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState) {
KernelImp &kernelImp = static_cast<KernelImp &>(kernel);
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
if (captureBaseAddressState) {
currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
auto ssh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
currentSurfaceStateBaseAddress = ssh->getHeapGpuBase();
currentSurfaceStateSize = ssh->getHeapSizeInPages();
currentBindingTablePoolBaseAddress = currentSurfaceStateBaseAddress;
currentBindingTablePoolSize = currentSurfaceStateSize;
auto dsh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
if (dsh != nullptr) {
currentDynamicStateBaseAddress = dsh->getHeapGpuBase();
currentDynamicStateSize = dsh->getHeapSizeInPages();
}
auto ioh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT);
currentIndirectObjectBaseAddress = ioh->getHeapGpuBase();
currentIndirectObjectSize = ioh->getHeapSizeInPages();
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative) {
if (this->isFlushTaskSubmissionEnabled) {
updateStreamPropertiesForFlushTaskDispatchFlags(kernel, isCooperative);
} else {
updateStreamPropertiesForRegularCommandLists(kernel, isCooperative);
}
auto sbaStreamState = &finalStreamState.stateBaseAddress;
if (updateRequiredState) {
sbaStreamState = &requiredStreamState.stateBaseAddress;
}
sbaStreamState->setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState,
currentBindingTablePoolBaseAddress, currentBindingTablePoolSize,
currentSurfaceStateBaseAddress, currentSurfaceStateSize,
currentDynamicStateBaseAddress, currentDynamicStateSize,
currentIndirectObjectBaseAddress, currentIndirectObjectSize,
rootDeviceEnvironment);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative) {
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative) {
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
requiredStreamState.stateComputeMode.setPropertiesGrfNumberThreadArbitration(kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, rootDeviceEnvironment);
requiredStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, rootDeviceEnvironment);
requiredStreamState.pipelineSelect.setPropertySystolicMode(kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative) {
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
bool captureBaseAddressState = containsAnyKernel;
KernelImp &kernelImp = static_cast<KernelImp &>(kernel);
currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
auto ssh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE);
currentSurfaceStateBaseAddress = ssh->getHeapGpuBase();
currentSurfaceStateSize = ssh->getHeapSizeInPages();
currentBindingTablePoolBaseAddress = currentSurfaceStateBaseAddress;
currentBindingTablePoolSize = currentSurfaceStateSize;
auto dsh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE);
if (dsh != nullptr) {
currentDynamicStateBaseAddress = dsh->getHeapGpuBase();
currentDynamicStateSize = dsh->getHeapSizeInPages();
}
auto ioh = commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT);
currentIndirectObjectBaseAddress = ioh->getHeapGpuBase();
currentIndirectObjectSize = ioh->getHeapSizeInPages();
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment);
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
if (!this->isFlushTaskSubmissionEnabled) {
updateStateBaseAddressStreamProperties(kernel, true, true);
}
requiredStreamState.stateBaseAddress.setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState,
currentBindingTablePoolBaseAddress, currentBindingTablePoolSize,
currentSurfaceStateBaseAddress, currentSurfaceStateSize,
currentDynamicStateBaseAddress, currentDynamicStateSize,
currentIndirectObjectBaseAddress, currentIndirectObjectSize,
rootDeviceEnvironment);
if (this->stateComputeModeTracking) {
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment);
requiredStreamState.stateComputeMode.setProperties(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment);
finalStreamState = requiredStreamState;
} else {
finalStreamState = requiredStreamState;
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment);
requiredStreamState.stateComputeMode.setProperties(cmdListDefaultCoherency, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), rootDeviceEnvironment);
}
containsAnyKernel = true;
captureBaseAddressState = false;
}
auto logicalStateHelperBlock = !getLogicalStateHelper();
@@ -2419,7 +2425,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
rootDeviceEnvironment);
}
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment);
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
if (isPatchingVfeStateAllowed) {
@@ -2452,9 +2458,12 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, rootDeviceEnvironment, isRcs, this->dcFlushSupport, nullptr);
}
if (!this->isFlushTaskSubmissionEnabled) {
updateStateBaseAddressStreamProperties(kernel, false, captureBaseAddressState);
}
finalStreamState.stateBaseAddress.setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState,
currentBindingTablePoolBaseAddress, currentBindingTablePoolSize,
currentSurfaceStateBaseAddress, currentSurfaceStateSize,
currentDynamicStateBaseAddress, currentDynamicStateSize,
currentIndirectObjectBaseAddress, currentIndirectObjectSize,
rootDeviceEnvironment);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -133,6 +133,7 @@ void ModuleMutableCommandListFixture::tearDown() {
void MultiReturnCommandListFixture::setUp() {
DebugManager.flags.EnableFrontEndTracking.set(1);
DebugManager.flags.EnableFlushTaskSubmission.set(1);
ModuleMutableCommandListFixture::setUp(REVISION_B);
}

View File

@@ -448,10 +448,8 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateRegularImmediate() {
size_t csrUsedAfter = csrStream.getUsed();
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
EXPECT_EQ(1, immediateCmdListRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, immediateCmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(immediateCmdListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
@@ -483,7 +481,6 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
void *currentBuffer = nullptr;
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
@@ -510,7 +507,6 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
size_t csrUsedAfter = csrStream.getUsed();
EXPECT_EQ(1, immediateCmdListRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, immediateCmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(immediateCmdListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,

View File

@@ -1683,14 +1683,11 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
size_t csrUsedAfter = csrStream.getUsed();
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
}
cmdList.clear();
@@ -1754,14 +1751,11 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
size_t csrUsedAfter = csrStream.getUsed();
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
}
GenCmdList cmdList;

View File

@@ -31,6 +31,8 @@ struct StateComputeModeProperties {
void setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode, const RootDeviceEnvironment &rootDeviceEnvironment);
void setProperties(const StateComputeModeProperties &properties);
void setPropertiesGrfNumberThreadArbitration(uint32_t numGrfRequired, int32_t threadArbitrationPolicy, const RootDeviceEnvironment &rootDeviceEnvironment);
void setPropertiesCoherencyDevicePreemption(bool requiresCoherency, PreemptionMode devicePreemptionMode, const RootDeviceEnvironment &rootDeviceEnvironment);
bool isDirty() const;
protected:
@@ -40,6 +42,13 @@ struct StateComputeModeProperties {
void setPropertiesExtra();
void setPropertiesExtra(const StateComputeModeProperties &properties);
void clearIsDirtyExtra();
void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment);
void setCoherencyProperty(bool requiresCoherency);
void setDevicePreemptionProperty(PreemptionMode devicePreemptionMode);
void setGrfNumberProperty(uint32_t numGrfRequired);
void setThreadArbitrationProperty(int32_t threadArbitrationPolicy,
const RootDeviceEnvironment &rootDeviceEnvironment);
StateComputeModePropertiesSupport scmPropertiesSupport = {};
bool propertiesSupportLoaded = false;
@@ -58,13 +67,16 @@ struct FrontEndProperties {
StreamProperty disableOverdispatch{};
StreamProperty singleSliceDispatchCcsMode{};
void setProperties(bool isCooperativeKernel, bool disableEUFusion, bool disableOverdispatch, int32_t engineInstancedDevice, const RootDeviceEnvironment &rootDeviceEnvironment);
void setProperties(bool isCooperativeKernel, bool disableEuFusion, bool disableOverdispatch, int32_t engineInstancedDevice, const RootDeviceEnvironment &rootDeviceEnvironment);
void setProperties(const FrontEndProperties &properties);
void setPropertySingleSliceDispatchCcsMode(int32_t engineInstancedDevice, const RootDeviceEnvironment &rootDeviceEnvironment);
void setPropertyDisableOverdispatch(bool disableOverdispatch, const RootDeviceEnvironment &rootDeviceEnvironment);
void setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(bool isCooperativeKernel, bool disableEuFusion, const RootDeviceEnvironment &rootDeviceEnvironment);
bool isDirty() const;
protected:
void clearIsDirty();
void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment);
FrontEndPropertiesSupport frontEndPropertiesSupport = {};
bool propertiesSupportLoaded = false;
@@ -83,10 +95,12 @@ struct PipelineSelectProperties {
void setProperties(bool modeSelected, bool mediaSamplerDopClockGate, bool systolicMode, const RootDeviceEnvironment &rootDeviceEnvironment);
void setProperties(const PipelineSelectProperties &properties);
void setPropertySystolicMode(bool systolicMode, const RootDeviceEnvironment &rootDeviceEnvironment);
bool isDirty() const;
protected:
void clearIsDirty();
void initSupport(const RootDeviceEnvironment &rootDeviceEnvironment);
PipelineSelectPropertiesSupport pipelineSelectPropertiesSupport = {};
bool propertiesSupportLoaded = false;

View File

@@ -18,26 +18,12 @@ using namespace NEO;
void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t numGrfRequired, int32_t threadArbitrationPolicy, PreemptionMode devicePreemptionMode,
const RootDeviceEnvironment &rootDeviceEnvironment) {
if (this->propertiesSupportLoaded == false) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
productHelper.fillScmPropertiesSupportStructure(this->scmPropertiesSupport);
this->propertiesSupportLoaded = true;
}
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
initSupport(rootDeviceEnvironment);
clearIsDirty();
if (this->scmPropertiesSupport.coherencyRequired) {
int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0);
this->isCoherencyRequired.set(isCoherencyRequired);
}
if (this->scmPropertiesSupport.largeGrfMode &&
(this->largeGrfMode.value == -1 || numGrfRequired != GrfConfig::NotApplicable)) {
int32_t largeGrfMode = (numGrfRequired == GrfConfig::LargeGrfNumber ? 1 : 0);
this->largeGrfMode.set(largeGrfMode);
}
setCoherencyProperty(requiresCoherency);
setGrfNumberProperty(numGrfRequired);
setThreadArbitrationProperty(threadArbitrationPolicy, rootDeviceEnvironment);
int32_t zPassAsyncComputeThreadLimit = -1;
if (DebugManager.flags.ForceZPassAsyncComputeThreadLimit.get() != -1) {
@@ -54,23 +40,7 @@ void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t
if (pixelAsyncComputeThreadLimit != -1 && this->scmPropertiesSupport.pixelAsyncComputeThreadLimit) {
this->pixelAsyncComputeThreadLimit.set(pixelAsyncComputeThreadLimit);
}
bool setDefaultThreadArbitrationPolicy = (threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) &&
(NEO::DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.get() ||
(this->threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent));
if (setDefaultThreadArbitrationPolicy) {
threadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy();
}
if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = DebugManager.flags.OverrideThreadArbitrationPolicy.get();
}
if (this->scmPropertiesSupport.threadArbitrationPolicy) {
this->threadArbitrationPolicy.set(threadArbitrationPolicy);
}
if (this->scmPropertiesSupport.devicePreemptionMode) {
this->devicePreemptionMode.set(static_cast<int32_t>(devicePreemptionMode));
}
setDevicePreemptionProperty(devicePreemptionMode);
setPropertiesExtra();
}
@@ -104,14 +74,81 @@ void StateComputeModeProperties::clearIsDirty() {
clearIsDirtyExtra();
}
void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableEUFusion, bool disableOverdispatch, int32_t engineInstancedDevice, const RootDeviceEnvironment &rootDeviceEnvironment) {
if (this->propertiesSupportLoaded == false) {
void StateComputeModeProperties::setCoherencyProperty(bool requiresCoherency) {
if (this->scmPropertiesSupport.coherencyRequired) {
int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0);
this->isCoherencyRequired.set(isCoherencyRequired);
}
}
void StateComputeModeProperties::setDevicePreemptionProperty(PreemptionMode devicePreemptionMode) {
if (this->scmPropertiesSupport.devicePreemptionMode) {
this->devicePreemptionMode.set(static_cast<int32_t>(devicePreemptionMode));
}
}
void StateComputeModeProperties::setGrfNumberProperty(uint32_t numGrfRequired) {
if (this->scmPropertiesSupport.largeGrfMode &&
(this->largeGrfMode.value == -1 || numGrfRequired != GrfConfig::NotApplicable)) {
int32_t largeGrfMode = (numGrfRequired == GrfConfig::LargeGrfNumber ? 1 : 0);
this->largeGrfMode.set(largeGrfMode);
}
}
void StateComputeModeProperties::setThreadArbitrationProperty(int32_t threadArbitrationPolicy,
const RootDeviceEnvironment &rootDeviceEnvironment) {
bool setDefaultThreadArbitrationPolicy = (threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) &&
(NEO::DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.get() ||
(this->threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent));
if (setDefaultThreadArbitrationPolicy) {
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
threadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy();
}
if (DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = DebugManager.flags.OverrideThreadArbitrationPolicy.get();
}
if (this->scmPropertiesSupport.threadArbitrationPolicy) {
this->threadArbitrationPolicy.set(threadArbitrationPolicy);
}
}
void StateComputeModeProperties::initSupport(const RootDeviceEnvironment &rootDeviceEnvironment) {
if (this->propertiesSupportLoaded == false) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
productHelper.fillScmPropertiesSupportStructure(this->scmPropertiesSupport);
this->propertiesSupportLoaded = true;
}
}
void StateComputeModeProperties::setPropertiesCoherencyDevicePreemption(bool requiresCoherency, PreemptionMode devicePreemptionMode, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
this->isCoherencyRequired.isDirty = false;
this->devicePreemptionMode.isDirty = false;
setCoherencyProperty(requiresCoherency);
setDevicePreemptionProperty(devicePreemptionMode);
}
void StateComputeModeProperties::setPropertiesGrfNumberThreadArbitration(uint32_t numGrfRequired, int32_t threadArbitrationPolicy, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
this->threadArbitrationPolicy.isDirty = false;
this->largeGrfMode.isDirty = false;
setGrfNumberProperty(numGrfRequired);
setThreadArbitrationProperty(threadArbitrationPolicy, rootDeviceEnvironment);
}
void FrontEndProperties::initSupport(const RootDeviceEnvironment &rootDeviceEnvironment) {
if (this->propertiesSupportLoaded == false) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
productHelper.fillFrontEndPropertiesSupportStructure(this->frontEndPropertiesSupport, hwInfo);
this->propertiesSupportLoaded = true;
}
}
void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableEuFusion, bool disableOverdispatch, int32_t engineInstancedDevice, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
clearIsDirty();
@@ -120,7 +157,7 @@ void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableEUF
}
if (this->frontEndPropertiesSupport.disableEuFusion) {
this->disableEUFusion.set(disableEUFusion);
this->disableEUFusion.set(disableEuFusion);
}
if (this->frontEndPropertiesSupport.disableOverdispatch) {
@@ -133,18 +170,38 @@ void FrontEndProperties::setProperties(bool isCooperativeKernel, bool disableEUF
}
void FrontEndProperties::setPropertySingleSliceDispatchCcsMode(int32_t engineInstancedDevice, const RootDeviceEnvironment &rootDeviceEnvironment) {
if (this->propertiesSupportLoaded == false) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
productHelper.fillFrontEndPropertiesSupportStructure(this->frontEndPropertiesSupport, hwInfo);
this->propertiesSupportLoaded = true;
}
initSupport(rootDeviceEnvironment);
this->singleSliceDispatchCcsMode.isDirty = false;
if (this->frontEndPropertiesSupport.singleSliceDispatchCcsMode) {
this->singleSliceDispatchCcsMode.set(engineInstancedDevice);
}
}
void FrontEndProperties::setPropertyDisableOverdispatch(bool disableOverdispatch, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
this->disableOverdispatch.isDirty = false;
if (this->frontEndPropertiesSupport.disableOverdispatch) {
this->disableOverdispatch.set(disableOverdispatch);
}
}
void FrontEndProperties::setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(bool isCooperativeKernel, bool disableEuFusion, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
this->computeDispatchAllWalkerEnable.isDirty = false;
this->disableEUFusion.isDirty = false;
if (this->frontEndPropertiesSupport.computeDispatchAllWalker) {
this->computeDispatchAllWalkerEnable.set(isCooperativeKernel);
}
if (this->frontEndPropertiesSupport.disableEuFusion) {
this->disableEUFusion.set(disableEuFusion);
}
}
void FrontEndProperties::setProperties(const FrontEndProperties &properties) {
clearIsDirty();
@@ -166,14 +223,17 @@ void FrontEndProperties::clearIsDirty() {
computeDispatchAllWalkerEnable.isDirty = false;
}
void PipelineSelectProperties::setProperties(bool modeSelected, bool mediaSamplerDopClockGate, bool systolicMode, const RootDeviceEnvironment &rootDeviceEnvironment) {
void PipelineSelectProperties::initSupport(const RootDeviceEnvironment &rootDeviceEnvironment) {
if (this->propertiesSupportLoaded == false) {
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
productHelper.fillPipelineSelectPropertiesSupportStructure(this->pipelineSelectPropertiesSupport, *rootDeviceEnvironment.getHardwareInfo());
this->propertiesSupportLoaded = true;
}
}
void PipelineSelectProperties::setProperties(bool modeSelected, bool mediaSamplerDopClockGate, bool systolicMode, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
clearIsDirty();
if (this->pipelineSelectPropertiesSupport.modeSelected) {
@@ -189,6 +249,16 @@ void PipelineSelectProperties::setProperties(bool modeSelected, bool mediaSample
}
}
void PipelineSelectProperties::setPropertySystolicMode(bool systolicMode, const RootDeviceEnvironment &rootDeviceEnvironment) {
initSupport(rootDeviceEnvironment);
this->systolicMode.isDirty = false;
if (this->pipelineSelectPropertiesSupport.systolicMode) {
this->systolicMode.set(systolicMode);
}
}
void PipelineSelectProperties::setProperties(const PipelineSelectProperties &properties) {
clearIsDirty();

View File

@@ -19,6 +19,11 @@
using namespace NEO;
struct MockStateComputeModeProperties : public StateComputeModeProperties {
using StateComputeModeProperties::propertiesSupportLoaded;
using StateComputeModeProperties::scmPropertiesSupport;
};
struct MockFrontEndProperties : public FrontEndProperties {
using FrontEndProperties::frontEndPropertiesSupport;
using FrontEndProperties::propertiesSupportLoaded;
@@ -266,6 +271,137 @@ TEST(StreamPropertiesTests, givenOtherPipelineSelectPropertiesStructWhenSetPrope
verifySettingPropertiesFromOtherStruct<PipelineSelectProperties, getAllPipelineSelectProperties>();
}
TEST(StreamPropertiesTests, givenCoherencyStateAndDevicePreemptionComputeModePropertiesWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
MockStateComputeModeProperties scmProperties{};
scmProperties.propertiesSupportLoaded = true;
scmProperties.scmPropertiesSupport.coherencyRequired = false;
scmProperties.scmPropertiesSupport.devicePreemptionMode = false;
bool coherencyRequired = false;
PreemptionMode devicePreemptionMode = PreemptionMode::Disabled;
scmProperties.setPropertiesCoherencyDevicePreemption(coherencyRequired, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(-1, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(-1, scmProperties.devicePreemptionMode.value);
scmProperties.scmPropertiesSupport.coherencyRequired = true;
scmProperties.scmPropertiesSupport.devicePreemptionMode = true;
scmProperties.setPropertiesCoherencyDevicePreemption(coherencyRequired, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_TRUE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(static_cast<int32_t>(devicePreemptionMode), scmProperties.devicePreemptionMode.value);
devicePreemptionMode = PreemptionMode::Initial;
scmProperties.setProperties(coherencyRequired, -1, -1, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_TRUE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(static_cast<int32_t>(devicePreemptionMode), scmProperties.devicePreemptionMode.value);
scmProperties.setPropertiesCoherencyDevicePreemption(coherencyRequired, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(static_cast<int32_t>(devicePreemptionMode), scmProperties.devicePreemptionMode.value);
scmProperties.setPropertiesCoherencyDevicePreemption(coherencyRequired, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(static_cast<int32_t>(devicePreemptionMode), scmProperties.devicePreemptionMode.value);
coherencyRequired = true;
devicePreemptionMode = PreemptionMode::MidThread;
scmProperties.setPropertiesCoherencyDevicePreemption(coherencyRequired, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_TRUE(scmProperties.isDirty());
EXPECT_EQ(1, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(static_cast<int32_t>(devicePreemptionMode), scmProperties.devicePreemptionMode.value);
scmProperties.setPropertiesCoherencyDevicePreemption(coherencyRequired, devicePreemptionMode, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(1, scmProperties.isCoherencyRequired.value);
EXPECT_EQ(static_cast<int32_t>(devicePreemptionMode), scmProperties.devicePreemptionMode.value);
}
TEST(StreamPropertiesTests, givenGrfNumberAndThreadArbitrationStateComputeModePropertiesWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
MockStateComputeModeProperties scmProperties{};
scmProperties.propertiesSupportLoaded = true;
scmProperties.scmPropertiesSupport.largeGrfMode = false;
scmProperties.scmPropertiesSupport.threadArbitrationPolicy = false;
int32_t grfNumber = 128;
int32_t threadArbitration = 1;
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(-1, scmProperties.largeGrfMode.value);
EXPECT_EQ(-1, scmProperties.threadArbitrationPolicy.value);
scmProperties.scmPropertiesSupport.largeGrfMode = true;
scmProperties.scmPropertiesSupport.threadArbitrationPolicy = true;
scmProperties.setProperties(false, static_cast<uint32_t>(grfNumber), threadArbitration, PreemptionMode::Initial, rootDeviceEnvironment);
EXPECT_TRUE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.largeGrfMode.value);
EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value);
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.largeGrfMode.value);
EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value);
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(0, scmProperties.largeGrfMode.value);
EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value);
grfNumber = 256;
threadArbitration = 2;
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
EXPECT_TRUE(scmProperties.isDirty());
EXPECT_EQ(1, scmProperties.largeGrfMode.value);
EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value);
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
EXPECT_FALSE(scmProperties.isDirty());
EXPECT_EQ(1, scmProperties.largeGrfMode.value);
EXPECT_EQ(threadArbitration, scmProperties.threadArbitrationPolicy.value);
}
TEST(StreamPropertiesTests, givenForceDebugDefaultThreadArbitrationStateComputeModePropertyWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) {
DebugManagerStateRestore restorer;
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
auto defaultThreadArbitrationPolicy = gfxCoreHelper.getDefaultThreadArbitrationPolicy();
MockStateComputeModeProperties scmProperties{};
scmProperties.propertiesSupportLoaded = true;
scmProperties.scmPropertiesSupport.threadArbitrationPolicy = true;
constexpr int32_t grfNumber = -1;
constexpr int32_t requestedThreadArbitration = ThreadArbitrationPolicy::RoundRobinAfterDependency;
int32_t threadArbitration = requestedThreadArbitration;
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
EXPECT_TRUE(scmProperties.isDirty());
EXPECT_EQ(-1, scmProperties.largeGrfMode.value);
EXPECT_EQ(requestedThreadArbitration, scmProperties.threadArbitrationPolicy.value);
DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.set(true);
threadArbitration = ThreadArbitrationPolicy::NotPresent;
scmProperties.setPropertiesGrfNumberThreadArbitration(static_cast<uint32_t>(grfNumber), threadArbitration, rootDeviceEnvironment);
if (defaultThreadArbitrationPolicy == requestedThreadArbitration) {
EXPECT_FALSE(scmProperties.isDirty());
} else {
EXPECT_TRUE(scmProperties.isDirty());
}
EXPECT_EQ(-1, scmProperties.largeGrfMode.value);
EXPECT_EQ(defaultThreadArbitrationPolicy, scmProperties.threadArbitrationPolicy.value);
}
TEST(StreamPropertiesTests, givenSingleDispatchCcsFrontEndPropertyWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
@@ -295,6 +431,88 @@ TEST(StreamPropertiesTests, givenSingleDispatchCcsFrontEndPropertyWhenSettingPro
EXPECT_EQ(engineInstancedDevice, feProperties.singleSliceDispatchCcsMode.value);
}
TEST(StreamPropertiesTests, givenDisableOverdispatchFrontEndPropertyWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
MockFrontEndProperties feProperties{};
feProperties.propertiesSupportLoaded = true;
feProperties.frontEndPropertiesSupport.disableOverdispatch = false;
bool disableOverdispatch = false;
feProperties.setPropertyDisableOverdispatch(disableOverdispatch, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(-1, feProperties.disableOverdispatch.value);
feProperties.frontEndPropertiesSupport.disableOverdispatch = true;
feProperties.setProperties(false, false, disableOverdispatch, -1, rootDeviceEnvironment);
EXPECT_TRUE(feProperties.isDirty());
EXPECT_EQ(0, feProperties.disableOverdispatch.value);
feProperties.setPropertyDisableOverdispatch(disableOverdispatch, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(0, feProperties.disableOverdispatch.value);
feProperties.setPropertyDisableOverdispatch(disableOverdispatch, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(0, feProperties.disableOverdispatch.value);
disableOverdispatch = true;
feProperties.setPropertyDisableOverdispatch(disableOverdispatch, rootDeviceEnvironment);
EXPECT_TRUE(feProperties.isDirty());
EXPECT_EQ(1, feProperties.disableOverdispatch.value);
feProperties.setPropertyDisableOverdispatch(disableOverdispatch, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(1, feProperties.disableOverdispatch.value);
}
TEST(StreamPropertiesTests, givenComputeDispatchAllWalkerEnableAndDisableEuFusionFrontEndPropertiesWhenSettingPropertiesAndCheckIfSupportedThenExpectCorrectState) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
MockFrontEndProperties feProperties{};
feProperties.propertiesSupportLoaded = true;
feProperties.frontEndPropertiesSupport.disableEuFusion = false;
feProperties.frontEndPropertiesSupport.computeDispatchAllWalker = false;
bool disableEuFusion = false;
bool isCooperativeKernel = false;
feProperties.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperativeKernel, disableEuFusion, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(-1, feProperties.disableEUFusion.value);
EXPECT_EQ(-1, feProperties.computeDispatchAllWalkerEnable.value);
feProperties.frontEndPropertiesSupport.disableEuFusion = true;
feProperties.frontEndPropertiesSupport.computeDispatchAllWalker = true;
feProperties.setProperties(isCooperativeKernel, disableEuFusion, false, -1, rootDeviceEnvironment);
EXPECT_TRUE(feProperties.isDirty());
EXPECT_EQ(0, feProperties.disableEUFusion.value);
EXPECT_EQ(0, feProperties.computeDispatchAllWalkerEnable.value);
feProperties.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperativeKernel, disableEuFusion, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(0, feProperties.disableEUFusion.value);
EXPECT_EQ(0, feProperties.computeDispatchAllWalkerEnable.value);
feProperties.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperativeKernel, disableEuFusion, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(0, feProperties.disableEUFusion.value);
EXPECT_EQ(0, feProperties.computeDispatchAllWalkerEnable.value);
disableEuFusion = true;
isCooperativeKernel = true;
feProperties.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperativeKernel, disableEuFusion, rootDeviceEnvironment);
EXPECT_TRUE(feProperties.isDirty());
EXPECT_EQ(1, feProperties.disableEUFusion.value);
EXPECT_EQ(1, feProperties.computeDispatchAllWalkerEnable.value);
feProperties.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperativeKernel, disableEuFusion, rootDeviceEnvironment);
EXPECT_FALSE(feProperties.isDirty());
EXPECT_EQ(1, feProperties.disableEUFusion.value);
EXPECT_EQ(1, feProperties.computeDispatchAllWalkerEnable.value);
}
TEST(StreamPropertiesTests, whenSettingPipelineSelectPropertiesThenCorrectValueIsSet) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &productHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper<ProductHelper>();
@@ -350,6 +568,42 @@ TEST(StreamPropertiesTests, givenModeSelectPipelineSelectPropertyNotSupportedWhe
EXPECT_FALSE(pipeProperties.isDirty());
}
TEST(StreamPropertiesTests, givenSystolicModePipelineSelectPropertyWhenSettingPropertyAndCheckIfSupportedThenExpectCorrectState) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
MockPipelineSelectProperties pipeProperties{};
pipeProperties.propertiesSupportLoaded = true;
pipeProperties.pipelineSelectPropertiesSupport.systolicMode = false;
bool systolicMode = false;
pipeProperties.setPropertySystolicMode(systolicMode, rootDeviceEnvironment);
EXPECT_FALSE(pipeProperties.isDirty());
EXPECT_EQ(-1, pipeProperties.systolicMode.value);
pipeProperties.pipelineSelectPropertiesSupport.systolicMode = true;
pipeProperties.setPropertySystolicMode(systolicMode, rootDeviceEnvironment);
EXPECT_TRUE(pipeProperties.isDirty());
EXPECT_EQ(0, pipeProperties.systolicMode.value);
pipeProperties.setPropertySystolicMode(systolicMode, rootDeviceEnvironment);
EXPECT_FALSE(pipeProperties.isDirty());
EXPECT_EQ(0, pipeProperties.systolicMode.value);
systolicMode = true;
pipeProperties.setPropertySystolicMode(systolicMode, rootDeviceEnvironment);
EXPECT_TRUE(pipeProperties.isDirty());
EXPECT_EQ(1, pipeProperties.systolicMode.value);
pipeProperties.setPropertySystolicMode(systolicMode, rootDeviceEnvironment);
EXPECT_FALSE(pipeProperties.isDirty());
EXPECT_EQ(1, pipeProperties.systolicMode.value);
pipeProperties.setPropertySystolicMode(systolicMode, rootDeviceEnvironment);
EXPECT_FALSE(pipeProperties.isDirty());
EXPECT_EQ(1, pipeProperties.systolicMode.value);
}
TEST(StreamPropertiesTests, givenStateBaseAddressSupportFlagStateWhenSettingPropertyAndCheckIfDirtyThenExpectCleanStateForNotSupportedAndDirtyForSupported) {
MockExecutionEnvironment mockExecutionEnvironment{};
MockStateBaseAddressProperties sbaProperties{};