Add minor fixes to Direct Submission

Change-Id: Idfa23e6663e4b188fe1e54ab89b7e11c97e9550c
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2020-03-23 10:14:50 +01:00
committed by sys_ocldev
parent 1eb3bd5c0a
commit f423679de9
4 changed files with 94 additions and 63 deletions

View File

@@ -43,10 +43,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, CFEFusedEUDispatch, -1, "Set Fused EU dispatch i
DECLARE_DEBUG_VARIABLE(int32_t, ForceAuxTranslationMode, -1, "-1: Default, 0: Builtin, 1: Blit")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideGpuAddressSpace, -1, "-1: Default, !=-1: GPU address space range in bits")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideMaxWorkgroupSize, -1, "-1: Default, !=-1: Overrides max worgkroup size to this value")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferPlacement, -1, "-1: do not override, 0: non-system, 1: system")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphorePlacement, -1, "-1: do not override, 0: non-system, 1: system")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCpuCacheFlush, -1, "-1: do not override, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionEnableDebugBuffer, 0, "0: diagnostic feature disabled, 1: dispatch simple self-buffer mode 1, 2: dispatch simple self-buffer mode 2")
DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnReadBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Read Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)")
DECLARE_DEBUG_VARIABLE(int32_t, DoCpuCopyOnWriteBuffer, -1, "-1: default 0: do not use CPU copy, 1: triggers CPU copy path for Write Buffer calls, only supported for some basic use cases (no blocked user events in dependencies tree)")
DECLARE_DEBUG_VARIABLE(bool, EnableDebugBreak, true, "Enable DEBUG_BREAKs")
@@ -61,8 +57,6 @@ DECLARE_DEBUG_VARIABLE(bool, DoNotRegisterTrimCallback, false, "When set to true
DECLARE_DEBUG_VARIABLE(bool, OverrideInvalidEngineWithDefault, false, "When set to true driver chooses engine 0 if no engine is found.")
DECLARE_DEBUG_VARIABLE(bool, DisableAuxTranslation, false, "Disable aux translation when required by Kernel.")
DECLARE_DEBUG_VARIABLE(bool, DisableTimestampPacketOptimizations, false, "Allocate new allocation per node + dont reuse old nodes")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionDisableCacheFlush, false, "Disable dispatching cache flush commands")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionDisableMonitorFence, false, "Disable dispatching monitor fence commands")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, PrintDebugSettings, false, "Dump all debug variables settings to text file. Print to stdout if value is different than default.")
@@ -84,6 +78,16 @@ DECLARE_DEBUG_VARIABLE(bool, PrintDispatchParameters, false, "prints dispatch pa
DECLARE_DEBUG_VARIABLE(bool, PrintProgramBinaryProcessingTime, false, "prints execution time of Program::processGenBinary() method during program building")
DECLARE_DEBUG_VARIABLE(bool, WddmResidencyLogger, false, "gather Wddm residency statistics to file")
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
/*DIRECT SUBMISSION FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferPlacement, -1, "-1: do not override, 0: non-system, 1: system")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphorePlacement, -1, "-1: do not override, 0: non-system, 1: system")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCpuCacheFlush, -1, "-1: do not override, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionEnableDebugBuffer, 0, "0: diagnostic feature disabled - dispatch regular workload, 1: dispatch diagnostic buffer - mode 1 - single SDI command, 2: dispatch diagnostic buffer - mode 2 - no command")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionDisableCacheFlush, false, "Disable dispatching cache flush commands")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionDisableMonitorFence, false, "Disable dispatching monitor fence commands")
/*PERFORMANCE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, EnableNullHardware, false, "works on Windows only, sets the Null Hardware flag that makes all Command buffers completed while GPU does nothing")
DECLARE_DEBUG_VARIABLE(bool, ForceLinearImages, false, "Force linear images. Default is Y-tiled.")
@@ -151,7 +155,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, NodeOrdinal, -1, "-1: default do not override, 0
DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont override) or any valid config (0: Age Based, 1: Round Robin)")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id")
DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampPacket, -1, "-1: default, 0: disable, 1:enable. Write Timestamp Packet for each set of gpu walkers")
DECLARE_DEBUG_VARIABLE(int32_t, EnableDirectSubmission, -1, "-1: default (disabled), 0: disable, 1:enable. Enables direct submission of command buffers bypassing KMD")
DECLARE_DEBUG_VARIABLE(int32_t, AllocateSharedAllocationsWithCpuAndGpuStorage, -1, "When enabled driver creates cpu & gpu storage for shared unified memory allocations. (-1 - devices default mode, 0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.")

View File

@@ -104,30 +104,34 @@ class DirectSubmissionHw {
uint64_t getCommandBufferPositionGpuAddress(void *position);
Device &device;
OsContext &osContext;
std::unique_ptr<Dispatcher> cmdDispatcher;
const HardwareInfo *hwInfo = nullptr;
enum RingBufferUse : uint32_t {
FirstBuffer,
SecondBuffer,
MaxBuffers
};
LinearStream ringCommandStream;
std::unique_ptr<Dispatcher> cmdDispatcher;
FlushStamp completionRingBuffers[RingBufferUse::MaxBuffers] = {0ull, 0ull};
uint64_t semaphoreGpuVa = 0u;
Device &device;
OsContext &osContext;
const HardwareInfo *hwInfo = nullptr;
GraphicsAllocation *ringBuffer = nullptr;
GraphicsAllocation *ringBuffer2 = nullptr;
FlushStamp completionRingBuffers[RingBufferUse::MaxBuffers] = {0ull, 0ull};
RingBufferUse currentRingBuffer = RingBufferUse::FirstBuffer;
LinearStream ringCommandStream;
GraphicsAllocation *semaphores = nullptr;
void *semaphorePtr = nullptr;
uint64_t semaphoreGpuVa = 0u;
volatile RingSemaphoreData *semaphoreData = nullptr;
uint32_t currentQueueWorkCount = 1u;
RingBufferUse currentRingBuffer = RingBufferUse::FirstBuffer;
uint32_t workloadMode = 0;
bool ringStart = false;
bool disableCpuCacheFlush = false;
bool disableCacheFlush = false;
bool disableMonitorFence = false;
};
} // namespace NEO

View File

@@ -36,6 +36,9 @@ DirectSubmissionHw<GfxFamily>::DirectSubmissionHw(Device &device,
if (disableCacheFlushKey != -1) {
disableCpuCacheFlush = disableCacheFlushKey == 1 ? true : false;
}
workloadMode = DebugManager.flags.DirectSubmissionEnableDebugBuffer.get();
disableCacheFlush = DebugManager.flags.DirectSubmissionDisableCacheFlush.get();
disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
hwInfo = &device.getHardwareInfo();
}
@@ -146,7 +149,7 @@ template <typename GfxFamily>
bool DirectSubmissionHw<GfxFamily>::stopRingBuffer() {
void *flushPtr = ringCommandStream.getSpace(0);
dispatchFlushSection();
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get()) {
if (disableMonitorFence) {
TagData currentTagData = {};
getTagAddressValue(currentTagData);
dispatchTagUpdateSection(currentTagData.tagAddress, currentTagData.tagValue);
@@ -275,7 +278,7 @@ template <typename GfxFamily>
inline size_t DirectSubmissionHw<GfxFamily>::getSizeEnd() {
size_t size = getSizeEndingSection() +
getSizeFlushSection();
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get()) {
if (disableMonitorFence) {
size += getSizeTagUpdateSection();
}
return size;
@@ -292,18 +295,17 @@ inline uint64_t DirectSubmissionHw<GfxFamily>::getCommandBufferPositionGpuAddres
template <typename GfxFamily>
inline size_t DirectSubmissionHw<GfxFamily>::getSizeDispatch() {
size_t size = getSizeSemaphoreSection();
int32_t dispatchMode = DebugManager.flags.DirectSubmissionEnableDebugBuffer.get();
if (dispatchMode == 0) {
if (workloadMode == 0) {
size += getSizeStartSection();
} else if (dispatchMode == 1) {
} else if (workloadMode == 1) {
size += getSizeStoraDataSection();
}
//mode 2 does not dispatch any commands
if (!DebugManager.flags.DirectSubmissionDisableCacheFlush.get()) {
if (!disableCacheFlush) {
size += getSizeFlushSection();
}
if (!DebugManager.flags.DirectSubmissionDisableMonitorFence.get()) {
if (!disableMonitorFence) {
size += getSizeTagUpdateSection();
}
@@ -313,8 +315,8 @@ inline size_t DirectSubmissionHw<GfxFamily>::getSizeDispatch() {
template <typename GfxFamily>
void *DirectSubmissionHw<GfxFamily>::dispatchWorkloadSection(BatchBuffer &batchBuffer) {
void *currentPosition = ringCommandStream.getSpace(0);
int32_t dispatchMode = DebugManager.flags.DirectSubmissionEnableDebugBuffer.get();
if (dispatchMode == 0) {
if (workloadMode == 0) {
auto commandStreamAddress = ptrOffset(batchBuffer.commandBufferAllocation->getGpuAddress(), batchBuffer.startOffset);
void *returnCmd = batchBuffer.endCmdPtr;
@@ -322,18 +324,18 @@ void *DirectSubmissionHw<GfxFamily>::dispatchWorkloadSection(BatchBuffer &batchB
void *returnPosition = ringCommandStream.getSpace(0);
setReturnAddress(returnCmd, getCommandBufferPositionGpuAddress(returnPosition));
} else if (dispatchMode == 1) {
} else if (workloadMode == 1) {
uint64_t storeAddress = semaphoreGpuVa;
storeAddress += ptrDiff(&static_cast<RingSemaphoreData *>(semaphorePtr)->Reserved1Uint32, semaphorePtr);
dispatchStoreDataSection(storeAddress, currentQueueWorkCount);
}
//mode 2 does not dispatch any commands
if (!DebugManager.flags.DirectSubmissionDisableCacheFlush.get()) {
if (!disableCacheFlush) {
dispatchFlushSection();
}
if (!DebugManager.flags.DirectSubmissionDisableMonitorFence.get()) {
if (!disableMonitorFence) {
TagData currentTagData = {};
getTagAddressValue(currentTagData);
dispatchTagUpdateSection(currentTagData.tagAddress, currentTagData.tagValue);

View File

@@ -287,6 +287,14 @@ HWTEST_F(DirectSubmissionTest,
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
MockDirectSubmissionHw<FamilyType> regularDirectSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(true);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
@@ -296,12 +304,8 @@ HWTEST_F(DirectSubmissionTest,
size_t alreadyDispatchedSize = directSubmission.ringCommandStream.getUsed();
uint32_t oldQueueCount = directSubmission.semaphoreData->QueueWorkCount;
size_t regularSizeEnd = directSubmission.getSizeEnd();
size_t tagUpdateSize = directSubmission.getSizeTagUpdateSection();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(true);
size_t disabledSizeEnd = directSubmission.getSizeEnd();
EXPECT_EQ(disabledSizeEnd, regularSizeEnd + tagUpdateSize);
@@ -326,7 +330,7 @@ HWTEST_F(DirectSubmissionTest,
uint32_t address = pipeControl->getAddress();
uint64_t actualAddress = (static_cast<uint64_t>(addressHigh) << 32ull) | address;
uint64_t data = pipeControl->getImmediateData();
if ((directSubmission.tagAddressSetValue == actualAddress) ||
if ((directSubmission.tagAddressSetValue == actualAddress) &&
(directSubmission.tagValueSetValue == data)) {
foundFenceUpdate = true;
break;
@@ -340,6 +344,14 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
MockDirectSubmissionHw<FamilyType> regularDirectSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(true);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
@@ -348,12 +360,8 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_TRUE(ret);
size_t alreadyDispatchedSize = directSubmission.ringCommandStream.getUsed();
size_t regularSizeDispatch = directSubmission.getSizeDispatch();
size_t tagUpdateSize = directSubmission.getSizeTagUpdateSection();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(true);
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - tagUpdateSize));
@@ -377,7 +385,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
uint32_t address = pipeControl->getAddress();
uint64_t actualAddress = (static_cast<uint64_t>(addressHigh) << 32ull) | address;
uint64_t data = pipeControl->getImmediateData();
if ((directSubmission.tagAddressSetValue == actualAddress) ||
if ((directSubmission.tagAddressSetValue == actualAddress) &&
(directSubmission.tagValueSetValue == data)) {
foundFenceUpdate = true;
break;
@@ -391,6 +399,14 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
MockDirectSubmissionHw<FamilyType> regularDirectSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(true);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
@@ -399,12 +415,8 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_TRUE(ret);
size_t alreadyDispatchedSize = directSubmission.ringCommandStream.getUsed();
size_t regularSizeDispatch = directSubmission.getSizeDispatch();
size_t flushSize = directSubmission.getSizeFlushSection();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(true);
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
@@ -440,6 +452,15 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
MockDirectSubmissionHw<FamilyType> regularDirectSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionEnableDebugBuffer.set(1);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
@@ -448,13 +469,9 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_TRUE(ret);
size_t alreadyDispatchedSize = directSubmission.ringCommandStream.getUsed();
size_t regularSizeDispatch = directSubmission.getSizeDispatch();
size_t startSize = directSubmission.getSizeStartSection();
size_t loadDataSize = directSubmission.getSizeStoraDataSection();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionEnableDebugBuffer.set(1);
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize + loadDataSize));
@@ -483,6 +500,14 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
MockDirectSubmissionHw<FamilyType> regularDirectSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionEnableDebugBuffer.set(2);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
@@ -491,12 +516,8 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_TRUE(ret);
size_t alreadyDispatchedSize = directSubmission.ringCommandStream.getUsed();
size_t regularSizeDispatch = directSubmission.getSizeDispatch();
size_t startSize = directSubmission.getSizeStartSection();
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionEnableDebugBuffer.set(2);
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize));
@@ -603,12 +624,12 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpec
HWTEST_F(DirectSubmissionTest,
givenDirectSubmissionEnableDebugBufferModeOneWhenGetDispatchSizeThenExpectCorrectSizeReturned) {
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionEnableDebugBuffer.set(1);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t expectedSize = directSubmission.getSizeStoraDataSection() +
directSubmission.getSizeFlushSection() +
directSubmission.getSizeTagUpdateSection() +
@@ -619,12 +640,12 @@ HWTEST_F(DirectSubmissionTest,
HWTEST_F(DirectSubmissionTest,
givenDirectSubmissionEnableDebugBufferModeTwoWhenGetDispatchSizeThenExpectCorrectSizeReturned) {
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionEnableDebugBuffer.set(2);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t expectedSize = directSubmission.getSizeFlushSection() +
directSubmission.getSizeTagUpdateSection() +
directSubmission.getSizeSemaphoreSection();
@@ -634,12 +655,12 @@ HWTEST_F(DirectSubmissionTest,
HWTEST_F(DirectSubmissionTest,
givenDirectSubmissionDisableCacheFlushWhenGetDispatchSizeThenExpectCorrectSizeReturned) {
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(true);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
size_t expectedSize = directSubmission.getSizeStartSection() +
directSubmission.getSizeTagUpdateSection() +
directSubmission.getSizeSemaphoreSection();
@@ -649,11 +670,12 @@ HWTEST_F(DirectSubmissionTest,
HWTEST_F(DirectSubmissionTest,
givenDirectSubmissionDisableMonitorFenceWhenGetDispatchSizeThenExpectCorrectSizeReturned) {
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(true);
MockDirectSubmissionHw<FamilyType> directSubmission(*pDevice,
std::make_unique<RenderDispatcher<FamilyType>>(),
*osContext.get());
DebugManagerStateRestore restore;
DebugManager.flags.DirectSubmissionDisableMonitorFence.set(true);
size_t expectedSize = directSubmission.getSizeStartSection() +
directSubmission.getSizeFlushSection() +