Refactor MemorySynchronizationCommands class

Related-To: NEO-4338

Change-Id: Id0ae9c73293fd99f53fccc11a69ca14fa9a6d119
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-04-26 21:48:59 +02:00
committed by sys_ocldev
parent 4bf503da12
commit b2210fa5bb
39 changed files with 407 additions and 194 deletions

View File

@ -814,6 +814,7 @@ include_directories(${NEO_SHARED_DIRECTORY}/debug_settings/definitions${BRANCH_D
include_directories(${NEO_SHARED_DIRECTORY}/gen_common/reg_configs${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/client_context${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/helpers/definitions${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/memory_manager/definitions${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/memory_properties${BRANCH_DIR_SUFFIX})
include_directories(${NEO_SHARED_DIRECTORY}/sku_info/definitions${BRANCH_DIR_SUFFIX})

View File

@ -28,6 +28,8 @@
#include "level_zero/core/source/image/image.h"
#include "level_zero/core/source/module/module.h"
#include "pipe_control_args.h"
#include <algorithm>
namespace L0 {
@ -168,10 +170,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
auto event = Event::fromHandle(hEvent);
commandContainer.addToResidencyContainer(&event->getAllocation());
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
event->getGpuAddress(), Event::STATE_CLEARED, true, commandContainer.getDevice()->getHardwareInfo());
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
event->getGpuAddress(),
Event::STATE_CLEARED,
commandContainer.getDevice()->getHardwareInfo(),
args);
return ZE_RESULT_SUCCESS;
}
@ -188,7 +194,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), false);
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
if (hSignalEvent) {
@ -417,7 +424,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
hEvent, numWaitEvents, phWaitEvents);
if (allocationStruct.needsFlush) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
return ret;
@ -649,7 +657,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
}
if (flushHost) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
return ret;
@ -731,7 +740,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
}
if (dstAllocationStruct.needsFlush) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
return ret;
@ -817,7 +827,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
@ -1057,7 +1068,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
}
if (hostPointerNeedsFlush) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
return res;
@ -1155,10 +1167,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(), Event::STATE_SIGNALED, false, true);
} else {
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
NEO::PipeControlArgs args;
args.dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo());
event->getGpuAddress(), Event::STATE_SIGNALED,
commandContainer.getDevice()->getHardwareInfo(),
args);
}
return ZE_RESULT_SUCCESS;
}
@ -1193,7 +1208,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
}

View File

@ -16,6 +16,8 @@
#include "shared/source/memory_manager/residency_container.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "pipe_control_args.h"
#include <algorithm>
namespace L0 {
@ -100,23 +102,24 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
} else {
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END);
bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
NEO::PipeControlArgs args;
args.dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true;
if (isCopyOnlyCmdList) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(*commandContainer.getCommandStream(), timeStampAddress, 0llu, true, true);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
timeStampAddress,
0llu,
dcFlushEnable,
device->getHwInfo());
device->getHwInfo(),
args);
timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END);
NEO::EncodeStoreMMIO<GfxFamily>::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress);
if (dcFlushEnable) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), true);
if (args.dcFlushEnable) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(), args);
}
}
}

View File

@ -28,6 +28,8 @@
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/tools/source/metrics/metric.h"
#include "pipe_control_args.h"
#include <limits>
#include <thread>
@ -219,7 +221,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
auto commandListPreemption = commandList->getCommandListPreemptionMode();
if (statePreemption != commandListPreemption) {
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(child, false);
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(child, args);
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
commandListPreemption,
statePreemption,
@ -267,9 +270,13 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
if (isCopyOnlyCommandQueue) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo());
fence->getGpuAddress(),
Fence::STATE_SIGNALED,
device->getHwInfo(),
args);
}
}
@ -352,9 +359,14 @@ void CommandQueueHw<gfxCoreFamily>::dispatchTaskCountWrite(NEO::LinearStream &co
if (isCopyOnlyCommandQueue) {
NEO::EncodeMiFlushDW<GfxFamily>::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, false, true);
} else {
NEO::MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
gpuAddress, taskCountToWrite, true, device->getHwInfo());
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
commandStream,
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
gpuAddress,
taskCountToWrite,
device->getHwInfo(),
args);
}
}
} // namespace L0

View File

@ -5,14 +5,17 @@
*
*/
#include "pipe_control_args.h"
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,
const void **pRanges) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
true);
args);
}
} // namespace L0

View File

@ -7,14 +7,17 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "pipe_control_args.h"
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,
const void **pRanges) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
true);
args);
}
} // namespace L0

View File

@ -12,13 +12,16 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
#include "pipe_control_args.h"
namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::applyMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,
const void **pRanges) {
NEO::PipeControlArgs args(true);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandContainer.getCommandStream(),
true);
args);
}
} // namespace L0

View File

@ -12,6 +12,8 @@
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/helpers/dispatch_info_builder.h"
#include "pipe_control_args.h"
#include <memory>
namespace NEO {
@ -65,7 +67,8 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
DispatchInfo::EstimateCommandsMethodT>;
template <typename GfxFamily, bool dcFlush>
static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &) {
MemorySynchronizationCommands<GfxFamily>::addPipeControl(linearStream, dcFlush);
PipeControlArgs args(dcFlush);
MemorySynchronizationCommands<GfxFamily>::addPipeControl(linearStream, args);
}
template <typename GfxFamily>

View File

@ -36,6 +36,8 @@
#include "opencl/source/program/block_kernel_manager.h"
#include "opencl/source/program/printf_handler.h"
#include "pipe_control_args.h"
#include <algorithm>
#include <new>
@ -486,10 +488,14 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
if (isCacheFlushForBcsRequired()) {
auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]->getGpuAddress() +
offsetof(TimestampPacketStorage, packets[0].contextEnd);
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
cacheFlushTimestampPacketGpuAddress, 0, true, device->getHardwareInfo());
PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
commandStream,
GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
cacheFlushTimestampPacketGpuAddress,
0,
device->getHardwareInfo(),
args);
}
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(commandStream, *currentTimestampPacketNode);

View File

@ -27,6 +27,8 @@
#include "opencl/source/helpers/validators.h"
#include "opencl/source/mem_obj/mem_obj.h"
#include "pipe_control_args.h"
#include <algorithm>
#include <cmath>
@ -123,10 +125,14 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
// PIPE_CONTROL for global timestamp
uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS);
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
*commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
timeStampAddress, 0llu, false, hwInfo);
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandStream,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
timeStampAddress,
0llu,
hwInfo,
args);
//MI_STORE_REGISTER_MEM for context local timestamp
timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS);

View File

@ -12,6 +12,8 @@
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/command_queue/gpgpu_walker_base.inl"
#include "pipe_control_args.h"
namespace NEO {
template <typename GfxFamily>
@ -69,8 +71,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
bool dcFlush = false;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, dcFlush);
NEO::PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex;
const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize;
@ -161,8 +163,8 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
// Do not put BB_START only when returning in first Scheduler run
if (devQueueHw.getSchedulerReturnInstance() != 1) {
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, true);
args.dcFlushEnable = true;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
// Add BB Start Cmd to the SLB in the Primary Batch Buffer
auto *bbStart = static_cast<MI_BATCH_BUFFER_START *>(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_START)));
@ -183,8 +185,14 @@ void GpgpuWalkerHelper<GfxFamily>::setupTimestampPacket(
if (TimestampPacketStorage::WriteOperationType::AfterWalker == writeOperationType) {
uint64_t address = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd);
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
*cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, 0, false, *rootDeviceEnvironment.getHardwareInfo());
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*cmdStream,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
address,
0,
*rootDeviceEnvironment.getHardwareInfo(),
args);
}
}

View File

@ -108,7 +108,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
if (static_cast<uint32_t>(DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get()) == gpgpuCsr.peekTaskCount()) {
if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) {
MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandStream, true);
NEO::PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControl(*commandStream, args);
}
auto tagValue = *(gpgpuCsr.getTagAddress());

View File

@ -16,6 +16,8 @@
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "pipe_control_args.h"
namespace NEO {
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::allocateSlbBuffer() {
@ -124,14 +126,22 @@ void DeviceQueueHw<GfxFamily>::addExecutionModelCleanUpSection(Kernel *parentKer
}
uint64_t criticalSectionAddress = (uint64_t)&igilQueue->m_controls.m_CriticalSection;
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
slbCS,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
criticalSectionAddress,
ExecutionModelCriticalSection::Free,
device->getHardwareInfo(),
args);
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
criticalSectionAddress, ExecutionModelCriticalSection::Free, false, device->getHardwareInfo());
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
tagAddress, taskCount, false, device->getHardwareInfo());
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
slbCS,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
tagAddress,
taskCount,
device->getHardwareInfo(),
args);
addMediaStateClearCmds();

View File

@ -158,7 +158,12 @@ bool HwHelperHw<Family>::isIndependentForwardProgressSupported() {
}
template <>
void MemorySynchronizationCommands<Family>::setExtraCacheFlushFields(Family::PIPE_CONTROL &pipeControl) {
inline void MemorySynchronizationCommands<Family>::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) {
pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush);
}
template <>
void MemorySynchronizationCommands<Family>::setCacheFlushExtraProperties(Family::PIPE_CONTROL &pipeControl) {
pipeControl.setHdcPipelineFlush(true);
pipeControl.setConstantCacheInvalidationEnable(false);
}

View File

@ -28,12 +28,12 @@ void HwHelperHw<Family>::setupHardwareCapabilities(HardwareCapabilities *caps, c
}
template <>
typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
void MemorySynchronizationCommands<Family>::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) {
Family::PIPE_CONTROL cmd = Family::cmdInitPipeControl;
MemorySynchronizationCommands<Family>::setPipeControl(cmd, true);
args.dcFlushEnable = true;
MemorySynchronizationCommands<Family>::setPipeControl(cmd, args);
Family::PIPE_CONTROL *cmdBuffer = commandStream.getSpaceForCmd<Family::PIPE_CONTROL>();
*cmdBuffer = cmd;
return cmdBuffer;
}
template class AubHelperHw<Family>;

View File

@ -870,7 +870,8 @@ HWTEST_F(UltCommandStreamReceiverTest, addPipeControlWithFlushAllCaches) {
char buff[sizeof(PIPE_CONTROL) * 3];
LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3);
MemorySynchronizationCommands<FamilyType>::addPipeControl(stream, false);
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControl(stream, args);
parseCommands<FamilyType>(stream, 0);

View File

@ -1555,7 +1555,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsTrueWhenCall
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream commandStream(buffer.get(), 128);
auto pipeControl = MemorySynchronizationCommands<FamilyType>::addPipeControl(commandStream, true);
PipeControlArgs args(true);
MemorySynchronizationCommands<FamilyType>::addPipeControl(commandStream, args);
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getDcFlushEnable());
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
@ -1566,7 +1569,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsFalseWhenCal
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream commandStream(buffer.get(), 128);
auto pipeControl = MemorySynchronizationCommands<FamilyType>::addPipeControl(commandStream, false);
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControl(commandStream, args);
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
const bool expectedDcFlush = ::renderCoreFamily == IGFX_GEN8_CORE;
EXPECT_EQ(expectedDcFlush, pipeControl->getDcFlushEnable());

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
@ -53,6 +55,8 @@ GEN11TEST_F(MemorySynchronizatiopCommandsTestsGen11, WhenProgrammingCacheFlushTh
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream stream(buffer.get(), 128);
PIPE_CONTROL *pipeControl = MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable());
}

View File

@ -93,17 +93,20 @@ ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceDisabledWhenP
auto expectedPipeControlCmd = FamilyType::cmdInitPipeControl;
expectedPipeControlCmd.setCommandStreamerStallEnable(0x1);
setFlushAllCaches(expectedPipeControlCmd);
auto pipeControlCmd = reinterpret_cast<PIPE_CONTROL *>(stream->getCpuBase());
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(stream->getCpuBase());
ASSERT_NE(nullptr, pipeControlCmd);
EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL)));
size_t cmdOffset = sizeof(PIPE_CONTROL);
auto miLrCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(stream->getCpuBase(), cmdOffset));
auto miLrCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(ptrOffset(stream->getCpuBase(), cmdOffset));
ASSERT_NE(nullptr, miLrCmd);
EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM)));
cmdOffset += sizeof(MI_LOAD_REGISTER_IMM);
expectedPipeControlCmd = FamilyType::cmdInitPipeControl;
expectedPipeControlCmd.setCommandStreamerStallEnable(0x1);
pipeControlCmd = reinterpret_cast<PIPE_CONTROL *>(ptrOffset(stream->getCpuBase(), cmdOffset));
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream->getCpuBase(), cmdOffset));
ASSERT_NE(nullptr, pipeControlCmd);
EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL)));
}
@ -132,21 +135,25 @@ ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceEnabledWhenPo
expectedPipeControlCmd.setCommandStreamerStallEnable(0x1);
setFlushAllCaches(expectedPipeControlCmd);
expectedPipeControlCmd.setGenericMediaStateClear(true);
auto pipeControlCmd = reinterpret_cast<PIPE_CONTROL *>(stream->getCpuBase());
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(stream->getCpuBase());
ASSERT_NE(nullptr, pipeControlCmd);
EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL)));
size_t cmdOffset = sizeof(PIPE_CONTROL);
pipeControlCmd = reinterpret_cast<PIPE_CONTROL *>(ptrOffset(stream->getCpuBase(), cmdOffset));
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream->getCpuBase(), cmdOffset));
ASSERT_NE(nullptr, pipeControlCmd);
expectedPipeControlCmd = FamilyType::cmdInitPipeControl;
expectedPipeControlCmd.setCommandStreamerStallEnable(0x1);
EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL)));
cmdOffset += sizeof(PIPE_CONTROL);
auto miLrCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(stream->getCpuBase(), cmdOffset));
auto miLrCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(ptrOffset(stream->getCpuBase(), cmdOffset));
ASSERT_NE(nullptr, miLrCmd);
EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM)));
cmdOffset += sizeof(MI_LOAD_REGISTER_IMM);
pipeControlCmd = reinterpret_cast<PIPE_CONTROL *>(ptrOffset(stream->getCpuBase(), cmdOffset));
pipeControlCmd = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream->getCpuBase(), cmdOffset));
ASSERT_NE(nullptr, pipeControlCmd);
EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL)));
}

View File

@ -238,7 +238,7 @@ GEN12LPTEST_F(MemorySynchronizatiopCommandsTests, whenSettingCacheFlushExtraFiel
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
PIPE_CONTROL pipeControl = FamilyType::cmdInitPipeControl;
pipeControl.setConstantCacheInvalidationEnable(true);
MemorySynchronizationCommands<FamilyType>::setExtraCacheFlushFields(pipeControl);
MemorySynchronizationCommands<FamilyType>::setCacheFlushExtraProperties(pipeControl);
EXPECT_TRUE(pipeControl.getHdcPipelineFlush());
EXPECT_FALSE(pipeControl.getConstantCacheInvalidationEnable());
}

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/helpers/constants.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
@ -60,6 +61,8 @@ GEN8TEST_F(MemorySynchronizatiopCommandsTestsGen8, WhenProgrammingCacheFlushThen
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream stream(buffer.get(), 128);
PIPE_CONTROL *pipeControl = MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable());
}

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl"
#include "opencl/test/unit_test/helpers/hw_helper_tests.h"
@ -60,6 +62,8 @@ GEN9TEST_F(MemorySynchronizatiopCommandsTestsGen9, WhenProgrammingCacheFlushThen
std::unique_ptr<uint8_t> buffer(new uint8_t[128]);
LinearStream stream(buffer.get(), 128);
PIPE_CONTROL *pipeControl = MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable());
}

View File

@ -15,6 +15,7 @@
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
#include "shared/test/unit_test/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/helpers/variable_backup.h"
@ -25,6 +26,8 @@
#include "opencl/test/unit_test/mocks/mock_buffer.h"
#include "opencl/test/unit_test/mocks/mock_context.h"
#include "pipe_control_args.h"
#include <chrono>
#include <iostream>
#include <numeric>
@ -212,13 +215,15 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed
expectedPipeControl.setAddressHigh(static_cast<uint32_t>(address >> 32));
HardwareInfo hardwareInfo = *defaultHwInfo;
auto pipeControl = MemorySynchronizationCommands<FamilyType>::obtainPipeControlAndProgramPostSyncOperation(
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, false, hardwareInfo);
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControlAndProgramPostSyncOperation(
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, hardwareInfo, args);
auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL);
auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleSynchronization(hardwareInfo);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream.getCpuBase(), pipeControlLocationSize));
ASSERT_NE(nullptr, pipeControl);
EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed());
EXPECT_EQ(pipeControl, ptrOffset(stream.getCpuBase(), pipeControlLocationSize));
EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0);
}
@ -238,13 +243,15 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIs
expectedPipeControl.setImmediateData(immediateData);
HardwareInfo hardwareInfo = *defaultHwInfo;
auto pipeControl = MemorySynchronizationCommands<FamilyType>::obtainPipeControlAndProgramPostSyncOperation(
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, false, hardwareInfo);
PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addPipeControlAndProgramPostSyncOperation(
stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, hardwareInfo, args);
auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL);
auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleSynchronization(hardwareInfo);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream.getCpuBase(), pipeControlLocationSize));
ASSERT_NE(nullptr, pipeControl);
EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed());
EXPECT_EQ(pipeControl, ptrOffset(stream.getCpuBase(), pipeControlLocationSize));
EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0);
}
@ -850,7 +857,10 @@ HWTEST_F(PipeControlHelperTests, WhenProgrammingCacheFlushThenExpectBasicFieldsS
LinearStream stream(buffer.get(), 128);
PIPE_CONTROL *pipeControl = MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
MemorySynchronizationCommands<FamilyType>::addFullCacheFlush(stream);
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(buffer.get());
ASSERT_NE(nullptr, pipeControl);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());
EXPECT_TRUE(pipeControl->getDcFlushEnable());

View File

@ -17,6 +17,8 @@
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "pipe_control_args.h"
#include <algorithm>
namespace NEO {
@ -147,7 +149,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
bool flush = container.slmSize != slmSizeNew || container.isAnyHeapDirty();
if (flush) {
MemorySynchronizationCommands<Family>::addPipeControl(*container.getCommandStream(), true);
PipeControlArgs args(true);
MemorySynchronizationCommands<Family>::addPipeControl(*container.getCommandStream(), args);
if (container.slmSize != slmSizeNew) {
EncodeL3State<Family>::encode(container, slmSizeNew != 0u);

View File

@ -18,6 +18,7 @@
namespace NEO {
template <typename GfxFamily>
class DeviceCommandStreamReceiver;
struct PipeControlArgs;
template <typename GfxFamily>
class CommandStreamReceiverHw : public CommandStreamReceiver {
@ -111,8 +112,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
size_t getCmdSizeForPrologue(const DispatchFlags &dispatchFlags) const;
void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd);
PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream);
PIPE_CONTROL *addPipeControlBeforeStateBaseAddress(LinearStream &commandStream);
void addPipeControlCmd(LinearStream &commandStream, PipeControlArgs &args);
void addPipeControlBeforeStateBaseAddress(LinearStream &commandStream);
size_t getSshHeapSize();
uint64_t getScratchPatchAddress();

View File

@ -31,6 +31,7 @@
#include "shared/source/utilities/tag_allocator.h"
#include "command_stream_receiver_hw_ext.inl"
#include "pipe_control_args.h"
namespace NEO {
@ -63,9 +64,9 @@ bool CommandStreamReceiverHw<GfxFamily>::flush(BatchBuffer &batchBuffer, Residen
template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::addBatchBufferEnd(LinearStream &commandStream, void **patchLocation) {
typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END;
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
auto pCmd = (MI_BATCH_BUFFER_END *)commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END));
auto pCmd = commandStream.getSpaceForCmd<MI_BATCH_BUFFER_END>();
*pCmd = GfxFamily::cmdInitBatchBufferEnd;
if (patchLocation) {
*patchLocation = pCmd;
@ -131,12 +132,10 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getRequiredCmdSizeForPreamble(
}
template <typename GfxFamily>
inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw<GfxFamily>::addPipeControlCmd(LinearStream &commandStream) {
typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL;
auto pCmd = reinterpret_cast<PIPE_CONTROL *>(commandStream.getSpace(sizeof(PIPE_CONTROL)));
*pCmd = GfxFamily::cmdInitPipeControl;
pCmd->setCommandStreamerStallEnable(true);
return pCmd;
inline void CommandStreamReceiverHw<GfxFamily>::addPipeControlCmd(
LinearStream &commandStream,
PipeControlArgs &args) {
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStream, args);
}
template <typename GfxFamily>
@ -190,9 +189,15 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
auto address = getTagAllocation()->getGpuAddress();
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
address, taskCount + 1, dispatchFlags.dcFlush, peekHwInfo());
PipeControlArgs args(dispatchFlags.dcFlush);
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
commandStreamTask,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
address,
taskCount + 1,
peekHwInfo(),
args);
this->latestSentTaskCount = taskCount + 1;
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", taskCount);
@ -358,8 +363,9 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) {
if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) {
auto pCmd = addPipeControlCmd(commandStreamCSR);
pCmd->setTextureCacheInvalidationEnable(true);
PipeControlArgs args;
args.textureCacheInvalidationEnable = true;
addPipeControlCmd(commandStreamCSR, args);
if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) {
this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter;
} else {
@ -374,15 +380,17 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
if (requiresInstructionCacheFlush) {
auto pipeControl = MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStreamCSR, false);
pipeControl->setInstructionCacheInvalidateEnable(true);
PipeControlArgs args;
args.instructionCacheInvalidateEnable = true;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStreamCSR, args);
requiresInstructionCacheFlush = false;
}
// Add a PC if we have a dependency on a previous walker to avoid concurrency issues.
if (taskLevel > this->taskLevel) {
if (!timestampPacketWriteEnabled) {
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStreamCSR, false);
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(commandStreamCSR, args);
}
this->taskLevel = taskLevel;
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", this->taskCount);
@ -522,23 +530,26 @@ template <typename GfxFamily>
inline void CommandStreamReceiverHw<GfxFamily>::programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) {
stallingPipeControlOnNextFlushRequired = false;
PIPE_CONTROL *stallingPipeControlCmd;
auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes;
if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() != 0) {
auto barrierTimestampPacketGpuAddress = dispatchFlags.barrierTimestampPacketNodes->peekNodes()[0]->getGpuAddress() +
offsetof(TimestampPacketStorage, packets[0].contextEnd);
stallingPipeControlCmd = MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
barrierTimestampPacketGpuAddress, 0, true, peekHwInfo());
PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
cmdStream,
PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
barrierTimestampPacketGpuAddress,
0,
peekHwInfo(),
args);
dispatchFlags.barrierTimestampPacketNodes->makeResident(*this);
} else {
stallingPipeControlCmd = MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, false);
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControl(cmdStream, args);
}
stallingPipeControlCmd->setCommandStreamerStallEnable(true);
}
template <typename GfxFamily>

View File

@ -87,11 +87,10 @@ bool CommandStreamReceiverHw<GfxFamily>::isMultiOsContextCapable() const {
}
template <typename GfxFamily>
inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw<GfxFamily>::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) {
auto pCmd = addPipeControlCmd(commandStream);
pCmd->setTextureCacheInvalidationEnable(true);
pCmd->setDcFlushEnable(true);
return pCmd;
inline void CommandStreamReceiverHw<GfxFamily>::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) {
PipeControlArgs args(true);
args.textureCacheInvalidationEnable = true;
addPipeControlCmd(commandStream, args);
}
} // namespace NEO

View File

@ -9,6 +9,8 @@
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/helpers/state_compute_mode_helper.h"
#include "pipe_control_args.h"
namespace NEO {
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) {
@ -16,6 +18,7 @@ void CommandStreamReceiverHw<GfxFamily>::programComputeMode(LinearStream &stream
if (isComputeModeNeeded()) {
programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, true);
this->lastSentCoherencyRequest = static_cast<int8_t>(dispatchFlags.requiresCoherency);
auto stateComputeMode = GfxFamily::cmdInitStateComputeMode;
adjustThreadArbitionPolicy(&stateComputeMode);
EncodeStates<GfxFamily>::adjustStateComputeMode(stream, dispatchFlags.numGrfRequired, &stateComputeMode, isMultiOsContextCapable(), dispatchFlags.requiresCoherency);
@ -35,11 +38,10 @@ inline bool CommandStreamReceiverHw<Family>::isComputeModeNeeded() const {
}
template <>
inline typename Family::PIPE_CONTROL *CommandStreamReceiverHw<Family>::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) {
auto pCmd = addPipeControlCmd(commandStream);
pCmd->setTextureCacheInvalidationEnable(true);
pCmd->setDcFlushEnable(true);
pCmd->setHdcPipelineFlush(true);
return pCmd;
inline void CommandStreamReceiverHw<Family>::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) {
PipeControlArgs args(true);
args.textureCacheInvalidationEnable = true;
args.hdcPipelineFlush = true;
addPipeControlCmd(commandStream, args);
}
} // namespace NEO

View File

@ -12,6 +12,8 @@
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "pipe_control_args.h"
namespace NEO {
template <typename GfxFamily>
@ -73,10 +75,14 @@ void ExperimentalCommandBuffer::addTimeStampPipeControl() {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset;
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
*currentStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu,
false, *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo());
PipeControlArgs args;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*currentStream,
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
timeStampAddress,
0llu,
*commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo(),
args);
//moving to next chunk
timestampsOffset += sizeof(uint64_t);

View File

@ -10,6 +10,8 @@
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
#include "shared/source/helpers/hw_helper.h"
#include "pipe_control_args.h"
namespace NEO {
template <typename GfxFamily>
@ -29,13 +31,14 @@ inline void RenderDispatcher<GfxFamily>::dispatchMonitorFence(LinearStream &cmdB
uint64_t immediateData,
const HardwareInfo &hwInfo) {
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
cmdBuffer,
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
gpuAddress,
immediateData,
true,
hwInfo);
hwInfo,
args);
}
template <typename GfxFamily>

View File

@ -39,15 +39,16 @@ void CommandStreamReceiverHw<Family>::programMediaSampler(LinearStream &stream,
if (peekHwInfo().platform.eProductFamily == IGFX_ICELAKE_LP) {
if (dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) {
if (!lastVmeSubslicesConfig) {
auto pc = addPipeControlCmd(stream);
pc->setDcFlushEnable(true);
pc->setRenderTargetCacheFlushEnable(true);
pc->setInstructionCacheInvalidateEnable(true);
pc->setTextureCacheInvalidationEnable(true);
pc->setPipeControlFlushEnable(true);
pc->setVfCacheInvalidationEnable(true);
pc->setConstantCacheInvalidationEnable(true);
pc->setStateCacheInvalidationEnable(true);
PipeControlArgs args;
args.dcFlushEnable = true;
args.renderTargetCacheFlushEnable = true;
args.instructionCacheInvalidateEnable = true;
args.textureCacheInvalidationEnable = true;
args.pipeControlFlushEnable = true;
args.vfCacheInvalidationEnable = true;
args.constantCacheInvalidationEnable = true;
args.stateCacheInvalidationEnable = true;
addPipeControlCmd(stream, args);
uint32_t numSubslices = peekHwInfo().gtSystemInfo.SubSliceCount;
uint32_t numSubslicesWithVme = numSubslices / 2; // 1 VME unit per DSS
@ -62,24 +63,27 @@ void CommandStreamReceiverHw<Family>::programMediaSampler(LinearStream &stream,
reg.TheStructure.Common.SliceCountRequest = numSlicesForPowerGating;
LriHelper<Family>::program(&stream, PWR_CLK_STATE_REGISTER::REG_ADDRESS, reg.TheStructure.RawData[0]);
addPipeControlCmd(stream);
args = {};
addPipeControlCmd(stream, args);
lastVmeSubslicesConfig = true;
}
} else {
if (lastVmeSubslicesConfig) {
auto pc = addPipeControlCmd(stream);
pc->setDcFlushEnable(true);
pc->setRenderTargetCacheFlushEnable(true);
pc->setInstructionCacheInvalidateEnable(true);
pc->setTextureCacheInvalidationEnable(true);
pc->setPipeControlFlushEnable(true);
pc->setVfCacheInvalidationEnable(true);
pc->setConstantCacheInvalidationEnable(true);
pc->setStateCacheInvalidationEnable(true);
pc->setGenericMediaStateClear(true);
PipeControlArgs args;
args.dcFlushEnable = true;
args.renderTargetCacheFlushEnable = true;
args.instructionCacheInvalidateEnable = true;
args.textureCacheInvalidationEnable = true;
args.pipeControlFlushEnable = true;
args.vfCacheInvalidationEnable = true;
args.constantCacheInvalidationEnable = true;
args.stateCacheInvalidationEnable = true;
args.genericMediaStateClear = true;
addPipeControlCmd(stream, args);
addPipeControlCmd(stream);
args = {};
addPipeControlCmd(stream, args);
// In Gen11-LP, software programs this register as if GT consists of
// 2 slices with 4 subslices in each slice. Hardware maps this to the
@ -98,7 +102,7 @@ void CommandStreamReceiverHw<Family>::programMediaSampler(LinearStream &stream,
LriHelper<Family>::program(&stream, PWR_CLK_STATE_REGISTER::REG_ADDRESS, reg.TheStructure.RawData[0]);
addPipeControlCmd(stream);
addPipeControlCmd(stream, args);
}
}
}

View File

@ -12,6 +12,7 @@
#include "opencl/source/gen12lp/helpers_gen12lp.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "pipe_control_args.h"
#include "reg_configs_common.h"
namespace NEO {
@ -38,8 +39,9 @@ void PreambleHelper<TGLLPFamily>::programPipelineSelect(LinearStream *pCommandSt
using PIPELINE_SELECT = typename TGLLPFamily::PIPELINE_SELECT;
if (HardwareCommandsHelper<TGLLPFamily>::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) {
auto pipeControl = MemorySynchronizationCommands<TGLLPFamily>::addPipeControl(*pCommandStream, false);
pipeControl->setRenderTargetCacheFlushEnable(true);
PipeControlArgs args;
args.renderTargetCacheFlushEnable = true;
MemorySynchronizationCommands<TGLLPFamily>::addPipeControl(*pCommandStream, args);
}
auto pCmd = pCommandStream->getSpaceForCmd<PIPELINE_SELECT>();

View File

@ -80,6 +80,8 @@ set(NEO_CORE_HELPERS
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.cpp
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
${CMAKE_CURRENT_SOURCE_DIR}/vec.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions/pipe_control_args_base.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/pipe_control_args.h
)
set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS})

View File

@ -0,0 +1,16 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/helpers/definitions/pipe_control_args_base.h"
namespace NEO {
struct PipeControlArgs : PipeControlArgsBase {
PipeControlArgs() = default;
PipeControlArgs(bool dcFlush) : PipeControlArgsBase(dcFlush) {}
};
} // namespace NEO

View File

@ -0,0 +1,25 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace NEO {
struct PipeControlArgsBase {
PipeControlArgsBase() = default;
PipeControlArgsBase(bool dcFlush) : dcFlushEnable(dcFlush) {}
bool dcFlushEnable = false;
bool renderTargetCacheFlushEnable = false;
bool instructionCacheInvalidateEnable = false;
bool textureCacheInvalidationEnable = false;
bool pipeControlFlushEnable = false;
bool vfCacheInvalidationEnable = false;
bool constantCacheInvalidationEnable = false;
bool stateCacheInvalidationEnable = false;
bool genericMediaStateClear = false;
bool hdcPipelineFlush = false;
};
} // namespace NEO

View File

@ -21,10 +21,11 @@
#include <type_traits>
namespace NEO {
class GraphicsAllocation;
struct RootDeviceEnvironment;
struct HardwareCapabilities;
class GmmHelper;
class GraphicsAllocation;
struct HardwareCapabilities;
struct RootDeviceEnvironment;
struct PipeControlArgs;
class HwHelper {
public:
@ -260,26 +261,32 @@ template <typename GfxFamily>
struct MemorySynchronizationCommands {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
static PIPE_CONTROL *obtainPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
POST_SYNC_OPERATION operation,
uint64_t gpuAddress,
uint64_t immediateData,
bool dcFlush, const HardwareInfo &hwInfo);
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
static void addPipeControlAndProgramPostSyncOperation(LinearStream &commandStream,
POST_SYNC_OPERATION operation,
uint64_t gpuAddress,
uint64_t immediateData,
const HardwareInfo &hwInfo,
PipeControlArgs &args);
static void setPostSyncExtraProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo);
static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
static void setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo);
static PIPE_CONTROL *addPipeControl(LinearStream &commandStream, bool dcFlush);
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo);
static void addPipeControl(LinearStream &commandStream, PipeControlArgs &args);
static void addFullCacheFlush(LinearStream &commandStream);
static void setCacheFlushExtraProperties(PIPE_CONTROL &pipeControl);
static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo);
static size_t getSizeForSinglePipeControl();
static size_t getSizeForSingleSynchronization(const HardwareInfo &hwInfo);
static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo);
static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream);
static size_t getSizeForFullCacheFlush();
static void setExtraCacheFlushFields(PIPE_CONTROL &pipeControl);
protected:
static void setPipeControl(PIPE_CONTROL &pipeControl, bool dcFlush);
static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
static void setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args);
};
union SURFACE_STATE_BUFFER_LENGTH {

View File

@ -20,6 +20,8 @@
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "pipe_control_args.h"
namespace NEO {
template <typename Family>
@ -185,35 +187,48 @@ bool HwHelperHw<Family>::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo
(multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0);
}
template <typename Family>
typename Family::PIPE_CONTROL *MemorySynchronizationCommands<Family>::obtainPipeControlAndProgramPostSyncOperation(
LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) {
using PIPE_CONTROL = typename Family::PIPE_CONTROL;
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
LinearStream &commandStream,
POST_SYNC_OPERATION operation,
uint64_t gpuAddress,
uint64_t immediateData,
const HardwareInfo &hwInfo,
PipeControlArgs &args) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
addPipeControlWA(commandStream, gpuAddress, hwInfo);
PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
PIPE_CONTROL cmd = Family::cmdInitPipeControl;
setPipeControl(cmd, dcFlush);
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
setPipeControl(cmd, args);
cmd.setPostSyncOperation(operation);
cmd.setAddress(static_cast<uint32_t>(gpuAddress & 0x0000FFFFFFFFULL));
cmd.setAddressHigh(static_cast<uint32_t>(gpuAddress >> 32));
cmd.setDcFlushEnable(dcFlush);
if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
cmd.setImmediateData(immediateData);
}
setExtraPipeControlProperties(cmd, hwInfo);
setPostSyncExtraProperties(cmd, hwInfo);
*pipeControl = cmd;
MemorySynchronizationCommands<Family>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
return pipeControl;
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo);
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, bool dcFlush) {
void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) {
pipeControl.setCommandStreamerStallEnable(true);
pipeControl.setDcFlushEnable(dcFlush);
pipeControl.setDcFlushEnable(args.dcFlushEnable);
pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable);
pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable);
pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable);
pipeControl.setRenderTargetCacheFlushEnable(args.renderTargetCacheFlushEnable);
pipeControl.setStateCacheInvalidationEnable(args.stateCacheInvalidationEnable);
pipeControl.setTextureCacheInvalidationEnable(args.textureCacheInvalidationEnable);
pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable);
pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable);
pipeControl.setGenericMediaStateClear(args.genericMediaStateClear);
setPipeControlExtraProperties(pipeControl, args);
if (DebugManager.flags.FlushAllCaches.get()) {
pipeControl.setDcFlushEnable(true);
@ -228,14 +243,12 @@ void MemorySynchronizationCommands<GfxFamily>::setPipeControl(typename GfxFamily
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, bool dcFlush) {
void MemorySynchronizationCommands<GfxFamily>::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, dcFlush);
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args);
auto pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
*pipeControl = cmd;
return pipeControl;
}
template <typename GfxFamily>
@ -332,24 +345,22 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForFullCacheFlush() {
}
template <typename GfxFamily>
typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
void MemorySynchronizationCommands<GfxFamily>::addFullCacheFlush(LinearStream &commandStream) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd<PIPE_CONTROL>();
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, true);
cmd.setRenderTargetCacheFlushEnable(true);
cmd.setInstructionCacheInvalidateEnable(true);
cmd.setTextureCacheInvalidationEnable(true);
cmd.setPipeControlFlushEnable(true);
cmd.setConstantCacheInvalidationEnable(true);
cmd.setStateCacheInvalidationEnable(true);
MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(cmd);
PipeControlArgs args(true);
args.renderTargetCacheFlushEnable = true;
args.instructionCacheInvalidateEnable = true;
args.textureCacheInvalidationEnable = true;
args.pipeControlFlushEnable = true;
args.constantCacheInvalidationEnable = true;
args.stateCacheInvalidationEnable = true;
MemorySynchronizationCommands<GfxFamily>::setPipeControl(cmd, args);
MemorySynchronizationCommands<GfxFamily>::setCacheFlushExtraProperties(cmd);
*pipeControl = cmd;
return pipeControl;
}
template <typename GfxFamily>

View File

@ -76,15 +76,18 @@ uint64_t HwHelperHw<GfxFamily>::getGpuTimeStampInNS(uint64_t timeStamp, double f
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
inline void MemorySynchronizationCommands<GfxFamily>::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) {
inline void MemorySynchronizationCommands<GfxFamily>::setPostSyncExtraProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) {
}
template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setExtraCacheFlushFields(PIPE_CONTROL &pipeControl) {
inline void MemorySynchronizationCommands<GfxFamily>::setCacheFlushExtraProperties(PIPE_CONTROL &pipeControl) {
}
template <typename GfxFamily>
inline void MemorySynchronizationCommands<GfxFamily>::setPipeControlExtraProperties(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) {
}
} // namespace NEO

View File

@ -14,6 +14,8 @@
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/utilities/tag_allocator.h"
#include "pipe_control_args.h"
#include <atomic>
#include <cstdint>
#include <vector>
@ -160,9 +162,10 @@ struct TimestampPacketHelper {
auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies->cacheFlushNodes.peekNodes()[0]->getGpuAddress() +
offsetof(TimestampPacketStorage, packets[0].contextEnd);
MemorySynchronizationCommands<GfxFamily>::obtainPipeControlAndProgramPostSyncOperation(
PipeControlArgs args(true);
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
cmdStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
cacheFlushTimestampPacketGpuAddress, 0, true, hwInfo);
cacheFlushTimestampPacketGpuAddress, 0, hwInfo, args);
}
for (auto &node : container.peekNodes()) {