Split wait for timestamps to queue and event

On PVC both enabled.
On DG2 only for events.

Related-To: NEO-6948

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek 2022-05-05 14:26:03 +00:00 committed by Compute-Runtime-Automation
parent 0b4ea8d2eb
commit 6e8cabdce5
17 changed files with 129 additions and 31 deletions

View File

@ -1089,10 +1089,10 @@ bool CommandQueue::isWaitForTimestampsEnabled() const {
const auto &hwHelper = HwHelper::get(getDevice().getHardwareInfo().platform.eRenderCoreFamily);
const auto &hwInfoConfig = *HwInfoConfig::get(getDevice().getHardwareInfo().platform.eProductFamily);
auto enabled = CommandQueue::isTimestampWaitEnabled();
enabled &= hwHelper.isTimestampWaitSupported();
enabled &= hwHelper.isTimestampWaitSupportedForQueues();
enabled &= !hwInfoConfig.isDcFlushAllowed();
switch (DebugManager.flags.EnableTimestampWait.get()) {
switch (DebugManager.flags.EnableTimestampWaitForQueues.get()) {
case 0:
enabled = false;
break;

View File

@ -680,9 +680,35 @@ bool Event::isCompleted() {
return cmdQueue->isCompleted(getCompletionStamp(), this->bcsState) || this->areTimestampsCompleted();
}
bool Event::isWaitForTimestampsEnabled() const {
const auto &hwHelper = HwHelper::get(cmdQueue->getDevice().getHardwareInfo().platform.eRenderCoreFamily);
auto enabled = cmdQueue->isTimestampWaitEnabled();
enabled &= hwHelper.isTimestampWaitSupportedForEvents();
switch (DebugManager.flags.EnableTimestampWaitForEvents.get()) {
case 0:
enabled = false;
break;
case 1:
enabled = cmdQueue->getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled();
break;
case 2:
enabled = cmdQueue->getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled();
break;
case 3:
enabled = cmdQueue->getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled();
break;
case 4:
enabled = true;
break;
}
return enabled;
}
bool Event::areTimestampsCompleted() {
if (this->timestampPacketContainer.get()) {
if (this->cmdQueue->isWaitForTimestampsEnabled()) {
if (this->isWaitForTimestampsEnabled()) {
for (const auto &timestamp : this->timestampPacketContainer->peekNodes()) {
for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) {
this->cmdQueue->getGpgpuCommandStreamReceiver().downloadAllocation(*timestamp->getBaseGraphicsAllocation()->getGraphicsAllocation(this->cmdQueue->getGpgpuCommandStreamReceiver().getRootDeviceIndex()));

View File

@ -356,6 +356,7 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
static void setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last);
bool isWaitForTimestampsEnabled() const;
bool areTimestampsCompleted();
bool currentCmdQVirtualEvent;

View File

@ -42,7 +42,7 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur
HWTEST_F(CommandQueueHwTest, givenNoTimestampPacketsWhenWaitForTimestampsThenNoWaitAndTagIsNotUpdated) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableTimestampPacket.set(0);
DebugManager.flags.EnableTimestampWait.set(4);
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
ExecutionEnvironment *executionEnvironment = platform()->peekExecutionEnvironment();
auto device = std::make_unique<MockClDevice>(MockDevice::create<MockDeviceWithDebuggerActive>(executionEnvironment, 0u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = false;

View File

@ -146,34 +146,34 @@ TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenRe
MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false);
{
DebugManager.flags.EnableTimestampWait.set(-1);
DebugManager.flags.EnableTimestampWaitForQueues.set(-1);
const auto &hwHelper = HwHelper::get(mockDevice->getHardwareInfo().platform.eRenderCoreFamily);
const auto &hwInfoConfig = *HwInfoConfig::get(mockDevice->getHardwareInfo().platform.eProductFamily);
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), hwHelper.isTimestampWaitSupported() && !hwInfoConfig.isDcFlushAllowed());
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), hwHelper.isTimestampWaitSupportedForQueues() && !hwInfoConfig.isDcFlushAllowed());
}
{
DebugManager.flags.EnableTimestampWait.set(0);
DebugManager.flags.EnableTimestampWaitForQueues.set(0);
EXPECT_FALSE(cmdQ.isWaitForTimestampsEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(1);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(2);
DebugManager.flags.EnableTimestampWaitForQueues.set(2);
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(3);
DebugManager.flags.EnableTimestampWaitForQueues.set(3);
EXPECT_EQ(cmdQ.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled());
}
{
DebugManager.flags.EnableTimestampWait.set(4);
DebugManager.flags.EnableTimestampWaitForQueues.set(4);
EXPECT_TRUE(cmdQ.isWaitForTimestampsEnabled());
}
}

View File

@ -1708,7 +1708,7 @@ TEST(EventsDebug, givenEventWhenTrackingOfParentsIsOffThenDoNotTrackParents) {
event.setStatus(CL_COMPLETE);
}
TEST(CommandQueue, givenTimestampPacketWritesDisabledAndQueueHasTimestampPacketContainerThenCreateTheContainerForEvent) {
TEST(EventTimestampTest, givenTimestampPacketWritesDisabledAndQueueHasTimestampPacketContainerThenCreateTheContainerForEvent) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.EnableTimestampPacket.set(0);
@ -1721,3 +1721,45 @@ TEST(CommandQueue, givenTimestampPacketWritesDisabledAndQueueHasTimestampPacketC
MockEvent<Event> event{&queue, CL_COMMAND_MARKER, 0, 0};
EXPECT_NE(nullptr, event.timestampPacketContainer);
}
TEST(EventTimestampTest, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenReturnProperValue) {
DebugManagerStateRestore restorer;
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.useWaitForTimestamps = true;
MockContext context{};
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
MockEvent<Event> event{&cmdQ, CL_COMMAND_MARKER, 0, 0};
{
DebugManager.flags.EnableTimestampWaitForEvents.set(-1);
const auto &hwHelper = HwHelper::get(mockDevice->getHardwareInfo().platform.eRenderCoreFamily);
EXPECT_EQ(event.isWaitForTimestampsEnabled(), hwHelper.isTimestampWaitSupportedForEvents());
}
{
DebugManager.flags.EnableTimestampWaitForEvents.set(0);
EXPECT_FALSE(event.isWaitForTimestampsEnabled());
}
{
DebugManager.flags.EnableTimestampWaitForEvents.set(1);
EXPECT_EQ(event.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled());
}
{
DebugManager.flags.EnableTimestampWaitForEvents.set(2);
EXPECT_EQ(event.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled());
}
{
DebugManager.flags.EnableTimestampWaitForEvents.set(3);
EXPECT_EQ(event.isWaitForTimestampsEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled());
}
{
DebugManager.flags.EnableTimestampWaitForEvents.set(4);
EXPECT_TRUE(event.isWaitForTimestampsEnabled());
}
}

View File

@ -819,7 +819,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
HWTEST_F(TimestampPacketTests, givenTimestampWaitEnabledWhenEnqueueWithEventThenEventHasCorrectTimestampsToCheckForCompletion) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
DebugManager.flags.EnableTimestampWaitForEvents.set(1);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
@ -889,10 +890,10 @@ HWTEST_F(TimestampPacketTests, givenTimestampWaitEnabledWhenEnqueueWithEventThen
*csr.getTagAddress() = csr.peekTaskCount();
}
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) {
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishWithoutEnqueueThenDoNotWaitOnTimestamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
@ -910,10 +911,10 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueT
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 1u);
}
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestamp) {
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenWaitOnTimestamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
@ -940,10 +941,10 @@ HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimes
EXPECT_EQ(csr.waitForCompletionWithTimeoutTaskCountCalled, 0u);
}
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestamp) {
HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitForQueuesWhenFinishThenWaitOnTimestamp) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
csr.timestampPacketWriteEnabled = true;
@ -982,10 +983,10 @@ extern uint32_t pauseOffset;
extern std::function<void()> setupPauseAddress;
} // namespace CpuIntrinsicsTests
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenCallWaitUtils) {
HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitForQueuesWhenFinishThenCallWaitUtils) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);
DebugManager.flags.EnableTimestampWait.set(1);
DebugManager.flags.EnableTimestampWaitForQueues.set(1);
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0};

View File

@ -37,6 +37,7 @@ struct MockEvent : public BaseEventType {
using BaseEventType::timeStampNode;
using Event::calcProfilingData;
using Event::calculateSubmitTimestampData;
using Event::isWaitForTimestampsEnabled;
using Event::magic;
using Event::queueTimeStamp;
using Event::submitTimeStamp;

View File

@ -234,7 +234,7 @@ PrintIoctlTimes = 0
PrintIoctlEntries = 0
PrintUmdSharedMigration = 0
UpdateTaskCountFromWait = -1
EnableTimestampWait = -1
EnableTimestampWaitForQueues = -1
PreferCopyEngineForCopyBufferToBuffer = -1
EnableStaticPartitioning = -1
DisableDeepBind = 0
@ -416,3 +416,4 @@ PrintImageBlitBlockCopyCmdDetails = 0
UseContextEndOffsetForEventCompletion = -1
DirectSubmissionInsertExtraMiMemFenceCommands = -1
DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1
EnableTimestampWaitForEvents = -1

View File

@ -417,7 +417,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, UseVmBind, -1, "Use new residency model on Linux
DECLARE_DEBUG_VARIABLE(int32_t, PassBoundBOToExec, -1, "Pass bound BOs to exec call to keep dependencies")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStaticPartitioning, -1, "Divide workload into partitions during dispatch, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, UpdateTaskCountFromWait, -1, " Do not update task count after each enqueue, but send update request while wait, -1: default(disabled), 0: disabled, 1: enabled on gpgpue engine with direct submission, 2: enabled on any direct submission, 3: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampWait, -1, "Wait using timestamps, -1: default(disabled), 0: disabled, 1: enabled where UpdateTaskCountFromWait enabled, 2: enabled on gpgpue engine with direct submission, 3: enabled on any direct submission, 4: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampWaitForQueues, -1, "Wait on queues using timestamps, -1: default(disabled), 0: disabled, 1: enabled where UpdateTaskCountFromWait enabled, 2: enabled on gpgpue engine with direct submission, 3: enabled on any direct submission, 4: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampWaitForEvents, -1, "Wait on events using timestamps, -1: default(disabled), 0: disabled, 1: enabled where UpdateTaskCountFromWait enabled, 2: enabled on gpgpue engine with direct submission, 3: enabled on any direct submission, 4: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, DeferOsContextInitialization, -1, "-1: default, 0: create all contexts immediately, 1: defer, if possible")
DECLARE_DEBUG_VARIABLE(int32_t, UsmInitialPlacement, -1, "-1: default, 0: optimize for first CPU access, 1: optimize for first GPU access")
DECLARE_DEBUG_VARIABLE(int32_t, ForceHostPointerImport, -1, "-1: default, 0: disable, 1: enable, Forces the driver to import every host pointer coming into driver, WARNING this is not spec complaint.")

View File

@ -63,7 +63,8 @@ class HwHelper {
static bool compressedImagesSupported(const HardwareInfo &hwInfo);
static bool cacheFlushAfterWalkerSupported(const HardwareInfo &hwInfo);
virtual bool timestampPacketWriteSupported() const = 0;
virtual bool isTimestampWaitSupported() const = 0;
virtual bool isTimestampWaitSupportedForQueues() const = 0;
virtual bool isTimestampWaitSupportedForEvents() const = 0;
virtual bool isUpdateTaskCountFromWaitSupported() const = 0;
virtual size_t getRenderSurfaceStateSize() const = 0;
virtual void setRenderSurfaceStateForBuffer(const RootDeviceEnvironment &rootDeviceEnvironment,
@ -248,7 +249,9 @@ class HwHelperHw : public HwHelper {
bool timestampPacketWriteSupported() const override;
bool isTimestampWaitSupported() const override;
bool isTimestampWaitSupportedForQueues() const override;
bool isTimestampWaitSupportedForEvents() const override;
bool isUpdateTaskCountFromWaitSupported() const override;

View File

@ -41,7 +41,12 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
bool HwHelperHw<GfxFamily>::isTimestampWaitSupportedForQueues() const {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isTimestampWaitSupportedForEvents() const {
return false;
}

View File

@ -56,7 +56,12 @@ bool HwHelperHw<GfxFamily>::timestampPacketWriteSupported() const {
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isTimestampWaitSupported() const {
bool HwHelperHw<GfxFamily>::isTimestampWaitSupportedForQueues() const {
return false;
}
template <typename GfxFamily>
bool HwHelperHw<GfxFamily>::isTimestampWaitSupportedForEvents() const {
return false;
}

View File

@ -446,7 +446,12 @@ size_t HwHelperHw<Family>::getSipKernelMaxDbgSurfaceSize(const HardwareInfo &hwI
}
template <>
bool HwHelperHw<Family>::isTimestampWaitSupported() const {
bool HwHelperHw<Family>::isTimestampWaitSupportedForQueues() const {
return true;
}
template <>
bool HwHelperHw<Family>::isTimestampWaitSupportedForEvents() const {
return true;
}

View File

@ -130,6 +130,11 @@ inline bool HwHelperHw<Family>::isLinuxCompletionFenceSupported() const {
return true;
}
template <>
bool HwHelperHw<Family>::isTimestampWaitSupportedForEvents() const {
return true;
}
template class HwHelperHw<Family>;
template class FlatBatchBufferHelperHw<Family>;
template struct MemorySynchronizationCommands<Family>;

View File

@ -61,7 +61,8 @@ XE_HPC_CORETEST_F(HwHelperXeHpcCoreTest, givenHwHelperWhenGettingIfRevisionSpeci
XE_HPC_CORETEST_F(HwHelperXeHpcCoreTest, givenHwHelperWhenCheckTimestampWaitSupportThenReturnTrue) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_TRUE(helper.isTimestampWaitSupported());
EXPECT_TRUE(helper.isTimestampWaitSupportedForQueues());
EXPECT_TRUE(helper.isTimestampWaitSupportedForEvents());
}
XE_HPC_CORETEST_F(HwHelperXeHpcCoreTest, givenXeHPCPlatformWhenCheckAssignEngineRoundRobinSupportedThenReturnTrue) {

View File

@ -127,9 +127,10 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenXeHPAndLaterPlatformWhenCheckAssig
EXPECT_FALSE(hwHelper.isAssignEngineRoundRobinSupported());
}
XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenHwHelperWhenCheckTimestampWaitSupportThenReturnFalse) {
XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenHwHelperWhenCheckTimestampWaitSupportThenReturnFalseForQueuesButTrueForEvents) {
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_FALSE(helper.isTimestampWaitSupported());
EXPECT_FALSE(helper.isTimestampWaitSupportedForQueues());
EXPECT_TRUE(helper.isTimestampWaitSupportedForEvents());
}
XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabledWhenLocalMemoryIsEnabledThenReturnTrueAndProgramPipeControl) {