Insert PC before SBA in cmdlist

Related-To: NEO-4916

Change-Id: I7024ff173ad97d4c42a5fa6d7ea0d0ee665a87d9
Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2020-07-31 08:40:52 +02:00
committed by sys_ocldev
parent 8b267fd949
commit 0ed1db0b46
10 changed files with 96 additions and 41 deletions

View File

@ -1494,6 +1494,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::setGlobalWorkSizeIndirect(NEO:
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandContainer &container) {
NEO::PipeControlArgs args(true);
args.hdcPipelineFlush = true;
args.textureCacheInvalidationEnable = true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
NEO::EncodeStateBaseAddress<GfxFamily>::encode(commandContainer);
if (device->getL0Debugger()) {
device->getL0Debugger()->captureStateBaseAddress(commandContainer);

View File

@ -8,7 +8,6 @@
#pragma once
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/device/device.h"

View File

@ -191,8 +191,9 @@ TEST_F(CommandListCreate, givenInvalidProductFamilyThenReturnsNullPointer) {
EXPECT_EQ(nullptr, commandList);
}
HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAddedAndCorrectlyProgrammed) {
HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenPCAndStateBaseAddressCmdsAreAddedAndCorrectlyProgrammed) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer;
@ -210,8 +211,17 @@ HWTEST_F(CommandListCreate, whenCommandListIsCreatedThenStateBaseAddressCmdIsAdd
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itor = find<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
auto itorPc = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto cmdPc = genCmdCast<PIPE_CONTROL *>(*itorPc);
EXPECT_TRUE(cmdPc->getDcFlushEnable());
EXPECT_TRUE(cmdPc->getCommandStreamerStallEnable());
EXPECT_TRUE(cmdPc->getTextureCacheInvalidationEnable());
auto itor = find<STATE_BASE_ADDRESS *>(itorPc, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*itor);
auto dsh = commandContainer.getIndirectHeap(NEO::HeapType::DYNAMIC_STATE);

View File

@ -31,8 +31,8 @@ HWTEST_F(CommandListAppendBarrier, WhenAppendingBarrierThenPipeControlIsGenerate
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
usedSpaceAfter));
ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore),
usedSpaceAfter - usedSpaceBefore));
// Find a PC w/ CS stall
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());

View File

@ -159,12 +159,21 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
}
auto slmSizeNew = dispatchInterface->getSlmTotalSize();
bool flush = container.slmSize != slmSizeNew || container.isAnyHeapDirty();
bool dirtyHeaps = container.isAnyHeapDirty();
bool flush = container.slmSize != slmSizeNew || dirtyHeaps;
if (flush) {
PipeControlArgs args(true);
if (dirtyHeaps) {
args.hdcPipelineFlush = true;
}
MemorySynchronizationCommands<Family>::addPipeControl(*container.getCommandStream(), args);
if (dirtyHeaps) {
EncodeStateBaseAddress<Family>::encode(container);
container.setDirtyStateForAllHeaps(false);
}
if (container.slmSize != slmSizeNew) {
EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
container.slmSize = slmSizeNew;
@ -173,11 +182,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}
}
if (container.isAnyHeapDirty()) {
EncodeStateBaseAddress<Family>::encode(container);
container.setDirtyStateForAllHeaps(false);
}
}
uint32_t numIDD = 0u;

View File

@ -6,16 +6,16 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
#include "shared/source/gen11/hw_cmds_base.h"
#include "shared/source/gen11/reg_configs.h"
using Family = NEO::ICLFamily;
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
namespace NEO {
using Family = ICLFamily;
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;
@ -32,4 +32,5 @@ template struct EncodeSempahore<Family>;
template struct EncodeBatchBufferStartOrEnd<Family>;
template struct EncodeMiFlushDW<Family>;
template struct EncodeMemoryPrefetch<Family>;
template struct EncodeWA<Family>;
} // namespace NEO

View File

@ -6,19 +6,19 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl"
#include "shared/source/gen12lp/hw_cmds_base.h"
#include "shared/source/gen12lp/reg_configs.h"
#include "shared/source/helpers/preamble.h"
using Family = NEO::TGLLPFamily;
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_tgllp_plus.inl"
namespace NEO {
using Family = TGLLPFamily;
template <>
inline size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device) {
size_t EncodeWA<Family>::getAdditionalPipelineSelectSize(Device &device) {
size_t size = 0;
if (device.getDefaultEngine().commandStreamReceiver->isRcs()) {
size += 2 * PreambleHelper<Family>::getCmdSizeForPipelineSelect(device.getHardwareInfo());
@ -39,7 +39,7 @@ void EncodeComputeMode<Family>::adjustComputeMode(LinearStream &csr, uint32_t nu
}
template <>
inline void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline) {
void EncodeWA<Family>::encodeAdditionalPipelineSelect(Device &device, LinearStream &stream, bool is3DPipeline) {
if (device.getDefaultEngine().commandStreamReceiver->isRcs()) {
PipelineSelectArgs args;
args.is3DPipelineRequired = is3DPipeline;

View File

@ -6,16 +6,16 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
#include "shared/source/gen8/hw_cmds_base.h"
#include "shared/source/gen8/reg_configs.h"
using Family = NEO::BDWFamily;
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
namespace NEO {
using Family = BDWFamily;
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;
@ -32,4 +32,5 @@ template struct EncodeSempahore<Family>;
template struct EncodeBatchBufferStartOrEnd<Family>;
template struct EncodeMiFlushDW<Family>;
template struct EncodeMemoryPrefetch<Family>;
template struct EncodeWA<Family>;
} // namespace NEO

View File

@ -6,16 +6,16 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
#include "shared/source/gen9/hw_cmds_base.h"
#include "shared/source/gen9/reg_configs.h"
using Family = NEO::SKLFamily;
#include "shared/source/command_container/command_encoder.inl"
#include "shared/source/command_container/command_encoder_base.inl"
#include "shared/source/command_container/encode_compute_mode_bdw_plus.inl"
namespace NEO {
using Family = SKLFamily;
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;
@ -32,4 +32,5 @@ template struct EncodeSempahore<Family>;
template struct EncodeBatchBufferStartOrEnd<Family>;
template struct EncodeMiFlushDW<Family>;
template struct EncodeMemoryPrefetch<Family>;
template struct EncodeWA<Family>;
} // namespace NEO

View File

@ -69,8 +69,6 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWithUsedAvailableSizeWhen
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThanZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t slmTotalSize = 1;
@ -87,8 +85,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeGraterThan
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenSlmTotalSizeEqualZeroWhenDispatchingKernelThenSharedMemorySizeSetCorrectly) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t slmTotalSize = 0;
@ -285,6 +281,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotChangedWhenDispatchKernelThenHeapsAreCleanAndFlushAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSize = 1;
@ -301,6 +298,43 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsAndSlmNotCha
EXPECT_FALSE(cmdContainer->isAnyHeapDirty());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenDirtyHeapsWhenDispatchKernelThenPCIsAddedBeforeSBA) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
cmdContainer->slmSize = 1;
EXPECT_CALL(*dispatchInterface.get(), getSlmTotalSize()).WillRepeatedly(::testing::Return(cmdContainer->slmSize));
cmdContainer->setDirtyStateForAllHeaps(true);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
GenCmdList cmdList;
CmdParse<FamilyType>::parseCommandBuffer(cmdList, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itor = reverse_find<STATE_BASE_ADDRESS *>(cmdList.rbegin(), cmdList.rend());
ASSERT_NE(cmdList.rend(), itor);
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*itor);
EXPECT_NE(nullptr, cmdSba);
auto itorPc = reverse_find<PIPE_CONTROL *>(itor, cmdList.rend());
ASSERT_NE(cmdList.rend(), itorPc);
bool foundPcWithDCFlush = false;
do {
auto cmdPc = genCmdCast<PIPE_CONTROL *>(*itorPc);
if (cmdPc && cmdPc->getDcFlushEnable()) {
foundPcWithDCFlush = true;
break;
}
} while (++itorPc != cmdList.rend());
EXPECT_TRUE(foundPcWithDCFlush);
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmChangedWhenDispatchKernelThenFlushAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};