Enable task count update from wait
Signed-off-by: Jobczyk, Lukasz <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
022eb054e6
commit
d77a6cbe4b
|
@ -771,7 +771,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
blocking, //blocking
|
||||
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, //dcFlush
|
||||
multiDispatchInfo.usesSlm(), //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
|
||||
commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired
|
||||
requiresCoherency, //requiresCoherency
|
||||
(QueuePriority::LOW == priority), //lowPriority
|
||||
|
@ -1008,7 +1008,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
|||
blocking, //blocking
|
||||
false, //dcFlush
|
||||
false, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
!getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
|
||||
false, //GSBA32BitRequired
|
||||
false, //requiresCoherency
|
||||
false, //lowPriority
|
||||
|
|
|
@ -70,7 +70,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
|||
true, //blocking
|
||||
true, //dcFlush
|
||||
false, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
|
||||
false, //GSBA32BitRequired
|
||||
false, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
|
@ -199,7 +199,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||
true, //blocking
|
||||
flushDC, //dcFlush
|
||||
slmUsed, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
!commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
|
||||
commandType == CL_COMMAND_NDRANGE_KERNEL, //GSBA32BitRequired
|
||||
requiresCoherency, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
|
@ -357,7 +357,7 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
|||
true, //blocking
|
||||
false, //dcFlush
|
||||
false, //useSLM
|
||||
true, //guardCommandBufferWithPipeControl
|
||||
!commandStreamReceiver.isUpdateTagFromWaitEnabled(), //guardCommandBufferWithPipeControl
|
||||
false, //GSBA32BitRequired
|
||||
false, //requiresCoherency
|
||||
commandQueue.getPriority() == QueuePriority::LOW, //lowPriority
|
||||
|
|
|
@ -102,24 +102,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnableUpdateTaskFromWaitWhenN
|
|||
// Parse command list
|
||||
parseCommands<FamilyType>(commandStreamTask, 0);
|
||||
|
||||
auto pipeControlExpected = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo);
|
||||
|
||||
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
if (pipeControlExpected) {
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
if (UnitTestHelper<FamilyType>::isPipeControlWArequired(pDevice->getHardwareInfo())) {
|
||||
itorPC++;
|
||||
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itorPC);
|
||||
}
|
||||
|
||||
// Verify that the dcFlushEnabled bit is set in PC
|
||||
auto pCmdWA = reinterpret_cast<PIPE_CONTROL *>(*itorPC);
|
||||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pCmdWA->getDcFlushEnable());
|
||||
} else {
|
||||
EXPECT_EQ(cmdList.end(), itorPC);
|
||||
}
|
||||
EXPECT_EQ(cmdList.end(), itorPC);
|
||||
|
||||
buffer->release();
|
||||
}
|
||||
|
|
|
@ -1032,7 +1032,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrd
|
|||
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControl->getDcFlushEnable());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenThereIsNoPipeControlForUpdateTaskCount) {
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetAndGuardCommandBufferWithPipeControlWhenFlushTaskThenThereIsPipeControlForUpdateTaskCount) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
|
||||
|
@ -1061,6 +1061,37 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
|
|||
parseCommands<FamilyType>(commandStream);
|
||||
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
EXPECT_NE(itorPipeControl, cmdList.end());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenThereIsNoPipeControlForUpdateTaskCount) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.UpdateTaskCountFromWait.set(3);
|
||||
|
||||
CommandQueueHw<FamilyType> commandQueue(nullptr, pClDevice, 0, false);
|
||||
auto &commandStream = commandQueue.getCS(4096u);
|
||||
|
||||
auto mockCsr = new MockCsrHw2<FamilyType>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
pDevice->resetCommandStreamReceiver(mockCsr);
|
||||
mockCsr->useNewResourceImplicitFlush = false;
|
||||
mockCsr->useGpuIdleImplicitFlush = false;
|
||||
mockCsr->overrideDispatchPolicy(DispatchMode::BatchedDispatch);
|
||||
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.preemptionMode = PreemptionHelper::getDefaultPreemptionMode(pDevice->getHardwareInfo());
|
||||
|
||||
mockCsr->flushTask(commandStream,
|
||||
0,
|
||||
dsh,
|
||||
ioh,
|
||||
ssh,
|
||||
taskLevel,
|
||||
dispatchFlags,
|
||||
*pDevice);
|
||||
|
||||
parseCommands<FamilyType>(commandStream);
|
||||
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
||||
EXPECT_EQ(itorPipeControl, cmdList.end());
|
||||
}
|
||||
|
||||
|
|
|
@ -86,6 +86,12 @@ HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingGetBarriersCount
|
|||
EXPECT_EQ(32u, hwHelper.getBarriersCountFromHasBarriers(7u));
|
||||
}
|
||||
|
||||
HWTEST2_F(HwHelperTestPvcAndLater, givenHwHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
EXPECT_TRUE(hwHelper.isUpdateTaskCountFromWaitSupported());
|
||||
}
|
||||
|
||||
HWTEST2_F(HwHelperTestPvcAndLater, givenCooperativeContextSupportedWhenGetEngineInstancesThenReturnCorrectAmountOfCooperativeCcs, IsAtLeastXeHpcCore) {
|
||||
HardwareInfo hwInfo = *defaultHwInfo;
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2;
|
||||
|
|
|
@ -178,6 +178,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
bool checkPlatformSupportsNewResourceImplicitFlush() const;
|
||||
bool checkPlatformSupportsGpuIdleImplicitFlush() const;
|
||||
void configurePostSyncWriteOffset();
|
||||
void unregisterDirectSubmissionFromController();
|
||||
|
||||
HeapDirtyState dshState;
|
||||
HeapDirtyState iohState;
|
||||
|
|
|
@ -43,10 +43,7 @@ namespace NEO {
|
|||
|
||||
template <typename GfxFamily>
|
||||
CommandStreamReceiverHw<GfxFamily>::~CommandStreamReceiverHw() {
|
||||
auto directSubmissionController = executionEnvironment.directSubmissionController.get();
|
||||
if (directSubmissionController) {
|
||||
directSubmissionController->unregisterDirectSubmission(this);
|
||||
}
|
||||
this->unregisterDirectSubmissionFromController();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -197,7 +194,6 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
const auto &hwInfo = peekHwInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
bool updateTag = false;
|
||||
if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) {
|
||||
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
//for ImmediateDispatch we will send this right away, therefore this pipe control will close the level
|
||||
|
@ -221,29 +217,20 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
|
||||
auto address = getTagAllocation()->getGpuAddress();
|
||||
|
||||
updateTag = !isUpdateTagFromWaitEnabled();
|
||||
updateTag |= dispatchFlags.blocking;
|
||||
updateTag |= dispatchFlags.dcFlush;
|
||||
PipeControlArgs args;
|
||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(dispatchFlags.dcFlush, hwInfo);
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
|
||||
args.workloadPartitionOffset = isMultiTileOperationEnabled();
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
commandStreamTask,
|
||||
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
address,
|
||||
taskCount + 1,
|
||||
hwInfo,
|
||||
args);
|
||||
|
||||
if (updateTag) {
|
||||
PipeControlArgs args;
|
||||
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(dispatchFlags.dcFlush, hwInfo);
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
|
||||
args.workloadPartitionOffset = isMultiTileOperationEnabled();
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
commandStreamTask,
|
||||
PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA,
|
||||
address,
|
||||
taskCount + 1,
|
||||
hwInfo,
|
||||
args);
|
||||
} else {
|
||||
currentPipeControlForNooping = nullptr;
|
||||
}
|
||||
|
||||
this->latestSentTaskCount = taskCount + 1;
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", peekTaskCount());
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
flatBatchBufferHelper->setPatchInfoData(PatchInfoData(address, 0u,
|
||||
|
@ -258,6 +245,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
PatchInfoAllocationType::Default));
|
||||
}
|
||||
}
|
||||
this->latestSentTaskCount = taskCount + 1;
|
||||
|
||||
if (DebugManager.flags.ForceSLML3Config.get()) {
|
||||
dispatchFlags.useSLM = true;
|
||||
|
@ -585,7 +573,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
if (submitCSR | submitTask) {
|
||||
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
|
||||
flushHandler(batchBuffer, this->getResidencyAllocations());
|
||||
if (updateTag) {
|
||||
if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) {
|
||||
this->latestFlushedTaskCount = this->taskCount + 1;
|
||||
}
|
||||
} else {
|
||||
|
@ -1004,6 +992,14 @@ bool CommandStreamReceiverHw<GfxFamily>::detectInitProgrammingFlagsRequired(cons
|
|||
return DebugManager.flags.ForceCsrReprogramming.get();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::unregisterDirectSubmissionFromController() {
|
||||
auto directSubmissionController = executionEnvironment.directSubmissionController.get();
|
||||
if (directSubmissionController) {
|
||||
directSubmissionController->unregisterDirectSubmission(this);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) {
|
||||
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
|
||||
|
@ -1324,7 +1320,9 @@ inline void CommandStreamReceiverHw<GfxFamily>::flushHandler(BatchBuffer &batchB
|
|||
|
||||
template <typename GfxFamily>
|
||||
inline bool CommandStreamReceiverHw<GfxFamily>::isUpdateTagFromWaitEnabled() {
|
||||
bool enabled = false;
|
||||
auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily);
|
||||
auto enabled = hwHelper.isUpdateTaskCountFromWaitSupported();
|
||||
enabled &= this->isAnyDirectSubmissionEnabled();
|
||||
|
||||
switch (DebugManager.flags.UpdateTaskCountFromWait.get()) {
|
||||
case 0:
|
||||
|
@ -1430,6 +1428,9 @@ inline bool CommandStreamReceiverHw<GfxFamily>::initDirectSubmission(Device &dev
|
|||
if (directSubmissionController) {
|
||||
directSubmissionController->registerDirectSubmission(this);
|
||||
}
|
||||
if (this->isUpdateTagFromWaitEnabled()) {
|
||||
this->overrideDispatchPolicy(DispatchMode::ImmediateDispatch);
|
||||
}
|
||||
}
|
||||
osContext.setDirectSubmissionActive();
|
||||
}
|
||||
|
|
|
@ -64,6 +64,7 @@ class HwHelper {
|
|||
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
|
||||
virtual bool timestampPacketWriteSupported() const = 0;
|
||||
virtual bool isTimestampWaitSupported() const = 0;
|
||||
virtual bool isUpdateTaskCountFromWaitSupported() const = 0;
|
||||
virtual size_t getRenderSurfaceStateSize() const = 0;
|
||||
virtual void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
void *surfaceStateBuffer,
|
||||
|
@ -248,6 +249,8 @@ class HwHelperHw : public HwHelper {
|
|||
|
||||
bool isTimestampWaitSupported() const override;
|
||||
|
||||
bool isUpdateTaskCountFromWaitSupported() const override;
|
||||
|
||||
bool is1MbAlignmentSupported(const HardwareInfo &hwInfo, bool isCompressionEnabled) const override;
|
||||
|
||||
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override;
|
||||
|
|
|
@ -45,6 +45,11 @@ bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isUpdateTaskCountFromWaitSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isAssignEngineRoundRobinSupported() const {
|
||||
return false;
|
||||
|
|
|
@ -41,6 +41,11 @@ bool HwHelperHw<Family>::isTimestampWaitSupported() const {
|
|||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool HwHelperHw<Family>::isUpdateTaskCountFromWaitSupported() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||
const HardwareInfo &hwInfo, bool isEngineInstanced) const {
|
||||
|
|
|
@ -60,6 +60,11 @@ bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool HwHelperHw<GfxFamily>::isUpdateTaskCountFromWaitSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
const EngineInstancesContainer HwHelperHw<GfxFamily>::getGpgpuEngineInstances(const HardwareInfo &hwInfo) const {
|
||||
auto defaultEngine = getChosenEngineType(hwInfo);
|
||||
|
|
|
@ -43,6 +43,7 @@ class DrmCommandStreamReceiver : public DeviceCommandStreamReceiver<GfxFamily> {
|
|||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield,
|
||||
gemCloseWorkerMode mode = gemCloseWorkerMode::gemCloseWorkerActive);
|
||||
~DrmCommandStreamReceiver();
|
||||
|
||||
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
|
||||
MOCKABLE_VIRTUAL void processResidency(const ResidencyContainer &allocationsForResidency, uint32_t handleId) override;
|
||||
|
|
|
@ -80,6 +80,13 @@ DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver(ExecutionEnvironme
|
|||
kmdWaitTimeout = DebugManager.flags.SetKmdWaitTimeout.get();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline DrmCommandStreamReceiver<GfxFamily>::~DrmCommandStreamReceiver() {
|
||||
if (this->isUpdateTagFromWaitEnabled()) {
|
||||
this->waitForCompletionWithTimeout(WaitParams{false, false, 0}, this->peekTaskCount());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
SubmissionStatus DrmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) {
|
||||
this->printDeviceIndex();
|
||||
|
|
|
@ -561,6 +561,21 @@ HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckTaskCou
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckIfEnabledThenCanBeEnabledOnlyWithDirectSubmission) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto &hwHelper = HwHelper::get(csr.peekHwInfo().platform.eRenderCoreFamily);
|
||||
|
||||
{
|
||||
csr.directSubmissionAvailable = true;
|
||||
EXPECT_EQ(csr.isUpdateTagFromWaitEnabled(), hwHelper.isUpdateTaskCountFromWaitSupported());
|
||||
}
|
||||
|
||||
{
|
||||
csr.directSubmissionAvailable = false;
|
||||
EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
struct InitDirectSubmissionFixture {
|
||||
void SetUp() {
|
||||
DebugManager.flags.EnableDirectSubmission.set(1);
|
||||
|
|
Loading…
Reference in New Issue