mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-23 03:33:13 +08:00
Add extra parameters to programStateBaseAddress()
Signed-off-by: Igor Venevtsev <igor.venevtsev@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
adfa3c6bcb
commit
bb72beac6b
@@ -746,6 +746,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
|
||||
auto memoryCompressionState = getGpgpuCommandStreamReceiver().getMemoryCompressionState(auxTranslationRequired);
|
||||
|
||||
auto context = kernel->getProgram()->getContextPtr();
|
||||
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
×tampPacketDependencies.barrierNodes, //barrierTimestampPacketNodes
|
||||
@@ -771,7 +774,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
!eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
usePerDssBackedBuffer, //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred() //useSingleSubdevice
|
||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
numDevicesInContext //numDevicesInContext
|
||||
);
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
@@ -987,7 +992,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
false, //usePerDssBackedBuffer
|
||||
false);
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -75,7 +75,9 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
false, //usePerDssBackedBuffer
|
||||
false);
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
||||
|
||||
@@ -215,32 +217,37 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
|
||||
auto memoryCompressionState = commandStreamReceiver.getMemoryCompressionState(kernel->isAuxTranslationRequired());
|
||||
|
||||
auto context = kernel->getProgram()->getContextPtr();
|
||||
auto numDevicesInContext = context ? context->getNumDevices() : 1u;
|
||||
|
||||
DispatchFlags dispatchFlags(
|
||||
{}, //csrDependencies
|
||||
nullptr, //barrierTimestampPacketNodes
|
||||
{false, kernel->isVmeKernel()}, //pipelineSelectArgs
|
||||
commandQueue.flushStamp->getStampReference(), //flushStampReference
|
||||
commandQueue.getThrottle(), //throttle
|
||||
preemptionMode, //preemptionMode
|
||||
kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired
|
||||
L3CachingSettings::l3CacheOn, //l3CacheSettings
|
||||
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
|
||||
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo
|
||||
kernel->getExecutionType(), //kernelExecutionType
|
||||
memoryCompressionState, //memoryCompressionState
|
||||
commandQueue.getSliceCount(), //sliceCount
|
||||
true, //blocking
|
||||
flushDC, //dcFlush
|
||||
slmUsed, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
NDRangeKernel, //GSBA32BitRequired
|
||||
requiresCoherency, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred());
|
||||
{}, //csrDependencies
|
||||
nullptr, //barrierTimestampPacketNodes
|
||||
{false, kernel->isVmeKernel()}, //pipelineSelectArgs
|
||||
commandQueue.flushStamp->getStampReference(), //flushStampReference
|
||||
commandQueue.getThrottle(), //throttle
|
||||
preemptionMode, //preemptionMode
|
||||
kernelDescriptor.kernelAttributes.numGrfRequired, //numGrfRequired
|
||||
L3CachingSettings::l3CacheOn, //l3CacheSettings
|
||||
kernel->getThreadArbitrationPolicy(), //threadArbitrationPolicy
|
||||
kernel->getAdditionalKernelExecInfo(), //additionalKernelExecInfo
|
||||
kernel->getExecutionType(), //kernelExecutionType
|
||||
memoryCompressionState, //memoryCompressionState
|
||||
commandQueue.getSliceCount(), //sliceCount
|
||||
true, //blocking
|
||||
flushDC, //dcFlush
|
||||
slmUsed, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
NDRangeKernel, //GSBA32BitRequired
|
||||
requiresCoherency, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
false, //implicitFlush
|
||||
commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
kernel->requiresPerDssBackedBuffer(rootDeviceIndex), //usePerDssBackedBuffer
|
||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
kernel->getDefaultKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
numDevicesInContext); //numDevicesInContext
|
||||
|
||||
if (timestampPacketDependencies) {
|
||||
eventsRequest.fillCsrDependencies(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
|
||||
@@ -363,7 +370,9 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandStreamReceiver.isNTo1SubmissionModelEnabled(), //outOfOrderExecutionAllowed
|
||||
false, //epilogueRequired
|
||||
false, //usePerDssBackedBuffer
|
||||
false);
|
||||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
1u); //numDevicesInContext
|
||||
|
||||
UNRECOVERABLE_IF(!kernelOperation->blitEnqueue && !commandStreamReceiver.peekTimestampPacketWriteEnabled());
|
||||
|
||||
|
||||
@@ -1309,6 +1309,46 @@ HWTEST_F(EnqueueKernelTest, givenVMEKernelWhenEnqueueKernelThenDispatchFlagsHave
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.pipelineSelectArgs.mediaSamplerRequired);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsSetWhenEnqueueKernelThenDispatchFlagsUseGlobalAtomicsIsSet) {
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = true;
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_TRUE(mockCsr->passedDispatchFlags.useGlobalAtomics);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenUseGlobalAtomicsIsNotSetWhenEnqueueKernelThenDispatchFlagsUseGlobalAtomicsIsNotSet) {
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
mockKernel.kernelInfo.kernelDescriptor.kernelAttributes.flags.useGlobalAtomics = false;
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_FALSE(mockCsr->passedDispatchFlags.useGlobalAtomics);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenContextWithSeveralDevicesWhenEnqueueKernelThenDispatchFlagsiHasCorrectNumDevicesValue) {
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
|
||||
MockKernelWithInternals mockKernel(*pClDevice, context);
|
||||
size_t gws[3] = {1, 0, 0};
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(1u, mockCsr->passedDispatchFlags.numDevicesInContext);
|
||||
|
||||
context->devices.resize(10);
|
||||
clEnqueueNDRangeKernel(this->pCmdQ, mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(10u, mockCsr->passedDispatchFlags.numDevicesInContext);
|
||||
context->devices.resize(1);
|
||||
}
|
||||
|
||||
HWTEST_F(EnqueueKernelTest, givenNonVMEKernelWhenEnqueueKernelThenDispatchFlagsDoesntHaveMediaSamplerRequired) {
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
@@ -1212,7 +1212,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrWhenGen
|
||||
false,
|
||||
pDevice->getGmmHelper(),
|
||||
false,
|
||||
MemoryCompressionState::NotApplicable);
|
||||
MemoryCompressionState::NotApplicable,
|
||||
false,
|
||||
1u);
|
||||
|
||||
EXPECT_NE(generalStateBaseAddress, sbaCmd.getGeneralStateBaseAddress());
|
||||
EXPECT_EQ(GmmHelper::decanonize(generalStateBaseAddress), sbaCmd.getGeneralStateBaseAddress());
|
||||
@@ -1236,7 +1238,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroGeneralStateBaseAddres
|
||||
false,
|
||||
pDevice->getGmmHelper(),
|
||||
false,
|
||||
MemoryCompressionState::NotApplicable);
|
||||
MemoryCompressionState::NotApplicable,
|
||||
false,
|
||||
1u);
|
||||
|
||||
EXPECT_EQ(0ull, sbaCmd.getGeneralStateBaseAddress());
|
||||
EXPECT_EQ(0u, sbaCmd.getGeneralStateBufferSize());
|
||||
@@ -1262,7 +1266,9 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNonZeroInternalHeapBaseAddres
|
||||
false,
|
||||
pDevice->getGmmHelper(),
|
||||
false,
|
||||
MemoryCompressionState::NotApplicable);
|
||||
MemoryCompressionState::NotApplicable,
|
||||
false,
|
||||
1u);
|
||||
|
||||
EXPECT_FALSE(sbaCmd.getInstructionBaseAddressModifyEnable());
|
||||
EXPECT_EQ(0ull, sbaCmd.getInstructionBaseAddress());
|
||||
@@ -1293,7 +1299,9 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenSbaProgram
|
||||
false,
|
||||
pDevice->getGmmHelper(),
|
||||
false,
|
||||
MemoryCompressionState::NotApplicable);
|
||||
MemoryCompressionState::NotApplicable,
|
||||
false,
|
||||
1u);
|
||||
|
||||
EXPECT_FALSE(sbaCmd.getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_FALSE(sbaCmd.getDynamicStateBufferSizeModifyEnable());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2020 Intel Corporation
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -81,6 +81,6 @@ struct ComputeModeRequirements : public ::testing::Test {
|
||||
|
||||
CommandStreamReceiver *csr = nullptr;
|
||||
std::unique_ptr<MockDevice> device;
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, 1};
|
||||
GraphicsAllocation *alloc = nullptr;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
* Copyright (C) 2017-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -20,6 +20,7 @@ class MockContext : public Context {
|
||||
public:
|
||||
using Context::contextType;
|
||||
using Context::deviceBitfields;
|
||||
using Context::devices;
|
||||
using Context::driverDiagnostics;
|
||||
using Context::maxRootDeviceIndex;
|
||||
using Context::memoryManager;
|
||||
|
||||
Reference in New Issue
Block a user