Disable implicit scaling for cooperative kernels

When implicit scaling is disabled use useSingleSubdeviceValue = true.

Resolves: NEO-5757

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2021-06-21 15:24:14 +00:00
committed by Compute-Runtime-Automation
parent b5d5784b81
commit 29c64c3dd0
27 changed files with 256 additions and 107 deletions

View File

@@ -105,9 +105,11 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBuff
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false);
auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0),
Vec3<size_t>(1, 1, 1), false, false);
static_cast<MockOsContext *>(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS;
auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false);
auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0),
Vec3<size_t>(1, 1, 1), false, false);
auto expectedDiff = 2 * PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo());
auto diff = sizeWA - size;

View File

@@ -41,8 +41,8 @@ class MockTbxCsr : public TbxCommandStreamReceiverHw<GfxFamily> {
TbxCommandStreamReceiverHw<GfxFamily>::writeMemory(gpuAddress, cpuAddress, size, memoryBank, entryBits);
writeMemoryCalled = true;
}
void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) override {
TbxCommandStreamReceiverHw<GfxFamily>::submitBatchBuffer(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, entryBits, overrideRingHead);
void submitBatchBufferTbx(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) override {
TbxCommandStreamReceiverHw<GfxFamily>::submitBatchBufferTbx(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, entryBits, overrideRingHead);
overrideRingHeadPassed = overrideRingHead;
submitBatchBufferCalled = true;
}

View File

@@ -95,7 +95,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
bool useGlobalAtomics = true;
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
partitionCount, false, false);
EXPECT_TRUE(cmdContainer->lastSentUseGlobalAtomics);
@@ -122,7 +123,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
cmdContainer->lastSentUseGlobalAtomics = true;
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
partitionCount, false, false);
EXPECT_TRUE(cmdContainer->lastSentUseGlobalAtomics);
@@ -146,7 +148,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
cmdContainer->lastSentUseGlobalAtomics = true;
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
partitionCount, false, false);
EXPECT_FALSE(cmdContainer->lastSentUseGlobalAtomics);
@@ -173,7 +176,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
cmdContainer->lastSentUseGlobalAtomics = true;
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
partitionCount, false, false);
EXPECT_FALSE(cmdContainer->lastSentUseGlobalAtomics);
@@ -199,7 +203,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
bool useGlobalAtomics = false;
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
partitionCount, false, false);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -233,7 +238,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithNoImplicitScalingTests, given
bool useGlobalAtomics = true;
uint32_t partitionCount = 0;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
partitionCount, false, false);
EXPECT_FALSE(cmdContainer->lastSentUseGlobalAtomics);
GenCmdList commands;