Add extra parameters to programStateBaseAddress()

Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
Igor Venevtsev
2021-01-26 14:05:22 +00:00
committed by Compute-Runtime-Automation
parent adfa3c6bcb
commit bb72beac6b
18 changed files with 192 additions and 85 deletions

View File

@@ -746,6 +746,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired);
auto context = kernel->getProgram()->getContextPtr();
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
DispatchFlags dispatchFlags(
{}, //csrDependencies
&timestampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
@@ -771,7 +774,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
usePerDssBackedBuffer, //usePerDssBackedBuffer
kernel->isSingleSubdevicePreferred() //useSingleSubdevice
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
numDevicesInContext //numDevicesInContext
);
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
@@ -987,7 +992,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
false, //usePerDssBackedBuffer
false);
false, //useSingleSubdevice
false, //useGlobalAtomics
1u); //numDevicesInContext
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -75,7 +75,9 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
false, //usePerDssBackedBuffer
false);
false, //useSingleSubdevice
false, //useGlobalAtomics
1u); //numDevicesInContext
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
@@ -215,32 +217,37 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
auto context = kernel->getProgram()->getContextPtr();
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
DispatchFlags dispatchFlags(
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
{false, kernel->isVmeKernel()}, //pipelineSelectArgs
commandQueue.flushStamp->getStampReference(), //flushStampReference
commandQueue.getThrottle(), //throttle
preemptionMode, //preemptionMode
kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired
L3CachingSettings::l3CacheOn, //l3CacheSettings
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo
kernel->getExecutionType(), //kernelExecutionType
memoryCompressionState, //memoryCompressionState
commandQueue.getSliceCount(), //sliceCount
true, //blocking
flushDC, //dcFlush
slmUsed, //useSLM
true, //guardCommandBufferWithPipeControl
NDRangeKernel, //GSBA32BitRequired
requiresCoherency, //requiresCoherency
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
false, //implicitFlush
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer
kernel->isSingleSubdevicePreferred());
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
{false, kernel->isVmeKernel()}, //pipelineSelectArgs
commandQueue.flushStamp->getStampReference(), //flushStampReference
commandQueue.getThrottle(), //throttle
preemptionMode, //preemptionMode
kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired
L3CachingSettings::l3CacheOn, //l3CacheSettings
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo
kernel->getExecutionType(), //kernelExecutionType
memoryCompressionState, //memoryCompressionState
commandQueue.getSliceCount(), //sliceCount
true, //blocking
flushDC, //dcFlush
slmUsed, //useSLM
true, //guardCommandBufferWithPipeControl
NDRangeKernel, //GSBA32BitRequired
requiresCoherency, //requiresCoherency
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
false, //implicitFlush
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
numDevicesInContext); //numDevicesInContext
if (timestampPacketDependencies) {
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
@@ -363,7 +370,9 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
false, //epilogueRequired
false, //usePerDssBackedBuffer
false);
false, //useSingleSubdevice
false, //useGlobalAtomics
1u); //numDevicesInContext
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled());

View File

@@ -1309,6 +1309,46 @@ HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHave
EXPECT_TRUE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
}
HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsSetWhenEnqueueKernelThenDispatchFlagsUseGlobalAtomicsIsSet) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
pDevice->resetCommandStreamReceiver(mockCsr);
MockKernelWithInternals mockKernel(*pClDevice, context);
size_t gws[3] = {1, 0, 0};
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = true;
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_TRUE(mockCsr->passedDispatchFlags.useGlobalAtomics);
}
HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsIsNotSetWhenEnqueueKernelThenDispatchFlagsUseGlobalAtomicsIsNotSet) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
pDevice->resetCommandStreamReceiver(mockCsr);
MockKernelWithInternals mockKernel(*pClDevice, context);
size_t gws[3] = {1, 0, 0};
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = false;
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_FALSE(mockCsr->passedDispatchFlags.useGlobalAtomics);
}
HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenDispatchFlagsiHasCorrectNumDevicesValue) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
pDevice->resetCommandStreamReceiver(mockCsr);
MockKernelWithInternals mockKernel(*pClDevice, context);
size_t gws[3] = {1, 0, 0};
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(1u, mockCsr->passedDispatchFlags.numDevicesInContext);
context->devices.resize(10);
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(10u, mockCsr->passedDispatchFlags.numDevicesInContext);
context->devices.resize(1);
}
HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) {
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);

View File

@@ -1212,7 +1212,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrWhenGen
false,
pDevice->getGmmHelper(),
false,
MemoryCompressionState::NotApplicable);
MemoryCompressionState::NotApplicable,
false,
1u);
EXPECT_NE(generalStateBaseAddress, sbaCmd.getGeneralStateBaseAddress());
EXPECT_EQ(GmmHelper::decanonize(generalStateBaseAddress), sbaCmd.getGeneralStateBaseAddress());
@@ -1236,7 +1238,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroGeneralStateBaseAddres
false,
pDevice->getGmmHelper(),
false,
MemoryCompressionState::NotApplicable);
MemoryCompressionState::NotApplicable,
false,
1u);
EXPECT_EQ(0ull, sbaCmd.getGeneralStateBaseAddress());
EXPECT_EQ(0u, sbaCmd.getGeneralStateBufferSize());
@@ -1262,7 +1266,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroInternalHeapBaseAddres
false,
pDevice->getGmmHelper(),
false,
MemoryCompressionState::NotApplicable);
MemoryCompressionState::NotApplicable,
false,
1u);
EXPECT_FALSE(sbaCmd.getInstructionBaseAddressModifyEnable());
EXPECT_EQ(0ull, sbaCmd.getInstructionBaseAddress());
@@ -1293,7 +1299,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenSbaProgram
false,
pDevice->getGmmHelper(),
false,
MemoryCompressionState::NotApplicable);
MemoryCompressionState::NotApplicable,
false,
1u);
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2020 Intel Corporation
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -81,6 +81,6 @@ struct ComputeModeRequirements : public ::testing::Test {
CommandStreamReceiver *csr = nullptr;
std::unique_ptr<MockDevice> device;
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false};
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, 1};
GraphicsAllocation *alloc = nullptr;
};

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2020 Intel Corporation
* Copyright (C) 2017-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -20,6 +20,7 @@ class MockContext : public Context {
public:
using Context::contextType;
using Context::deviceBitfields;
using Context::devices;
using Context::driverDiagnostics;
using Context::maxRootDeviceIndex;
using Context::memoryManager;