Pass copy engines to waitUntilComplete in OpenCL command queue

Related-To: NEO-6057
Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
Maciej Dziuban
2021-09-09 16:57:09 +00:00
committed by Compute-Runtime-Automation
parent 9bb1ef45dd
commit c04f8e5e5b
14 changed files with 48 additions and 36 deletions

View File

@@ -188,7 +188,7 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState
return false;
}
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) {
WAIT_ENTER()
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
@@ -206,10 +206,10 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcs
gtpinNotifyTaskCompletion(gpgpuTaskCountToWait);
}
if (bcsEngine) {
auto bcsCsr = getBcsCommandStreamReceiver(bcsEngine->getEngineType());
bcsCsr->waitForTaskCountWithKmdNotifyFallback(bcsTaskCountToWait, 0, false, false);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(bcsTaskCountToWait);
for (const CopyEngineState &copyEngine : copyEnginesToWait) {
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
}
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait);
@@ -919,7 +919,8 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
deferredTimestampPackets->swapNodes(nodesToRelease);
}
waitUntilComplete(taskCount, this->bcsState.taskCount, flushStamp->peekStamp(), false);
Range<CopyEngineState> states{&bcsState, bcsState.isValid() ? 1u : 0u};
waitUntilComplete(taskCount, states, flushStamp->peekStamp(), false);
if (printfHandler) {
printfHandler->printEnqueueOutput();

View File

@@ -7,6 +7,7 @@
#pragma once
#include "shared/source/helpers/engine_control.h"
#include "shared/source/utilities/range.h"
#include "opencl/source/command_queue/copy_engine_state.h"
#include "opencl/source/command_queue/csr_selection_args.h"
@@ -216,7 +217,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
MOCKABLE_VIRTUAL bool isQueueBlocked();
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep);
MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler);
static uint32_t getTaskLevelFromWaitList(uint32_t taskLevel,

View File

@@ -331,7 +331,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
getGpgpuCommandStreamReceiver().setMediaVFEStateDirty(true);
if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, this->bcsState.taskCount, completionStamp.flushStamp, false);
waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
this->runSchedulerSimulation(*devQueueHw, *parentKernel);
}
}

View File

@@ -408,7 +408,8 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) {
}
}
cmdQueue->waitUntilComplete(taskCount.load(), this->bcsState.taskCount, flushStamp->peekStamp(), useQuickKmdSleep);
Range<CopyEngineState> states{&bcsState, bcsState.isValid() ? 1u : 0u};
cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep);
updateExecutionStatus();
DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0);

View File

@@ -97,7 +97,7 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::DependencyResolveOnGpu);
if (!memObj.isMemObjZeroCopy()) {
commandQueue.waitUntilComplete(completionStamp.taskCount, 0u, completionStamp.flushStamp, false);
commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
if (operationType == MAP) {
memObj.transferDataToHostPtr(copySize, copyOffset);
} else if (!readOnly) {
@@ -287,10 +287,9 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
dispatchFlags,
commandQueue.getDevice());
uint32_t bcsTaskCount = 0u;
if (kernelOperation->blitPropertiesContainer.size() > 0) {
bcsTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), bcsTaskCount);
const auto newTaskCount = bcsCsrForAuxTranslation->blitBuffer(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice());
commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount);
}
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::GpuKernel);
@@ -299,7 +298,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
if (printfHandler) {
commandQueue.waitUntilComplete(completionStamp.taskCount, bcsTaskCount, completionStamp.flushStamp, false);
commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
printfHandler.get()->printEnqueueOutput();
}

View File

@@ -1275,7 +1275,8 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW
uint32_t gpgpuTaskCount = 123;
uint32_t bcsTaskCount = 123;
commandQueue->waitUntilComplete(gpgpuTaskCount, bcsTaskCount, 0, false);
CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount};
commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false);
EXPECT_EQ(gpgpuTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());
EXPECT_EQ(bcsTaskCount, static_cast<UltCommandStreamReceiver<FamilyType> *>(bcsCsr)->latestWaitForCompletionWithTimeoutTaskCount.load());

View File

@@ -807,7 +807,7 @@ struct WaitForQueueCompletionTests : public ::testing::Test {
template <typename Family>
struct MyCmdQueue : public CommandQueueHw<Family> {
MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw<Family>(context, device, nullptr, false){};
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
requestedUseQuickKmdSleep = useQuickKmdSleep;
waitUntilCompleteCounter++;
}

View File

@@ -109,9 +109,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest {
public:
MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw<FamilyType>(context, device, nullptr, false) {}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
waitUntilCompleteCalled = true;
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
}
void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override {

View File

@@ -801,9 +801,9 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest {
auxTranslationDirection);
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
waitCalled++;
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
CommandQueueHw<FamilyType>::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
}
std::vector<AuxTranslationDirection> auxTranslationDirections;

View File

@@ -762,13 +762,13 @@ HWTEST_F(EnqueueReadImageTest, GivenImage1DThatIsZeroCopyWhenReadImageWithTheSam
HWTEST_F(EnqueueReadImageTest, givenDeviceWithBlitterSupportWhenEnqueueReadImageThenBlitEnqueueImageAllowedReturnsCorrectResult) {
DebugManagerStateRestore restorer;
DebugManager.flags.OverrideInvalidEngineWithDefault.set(1);
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
DebugManager.flags.EnableBlitterForEnqueueImageOperations.set(1);
auto hwInfo = pClDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
hwInfo->capabilityTable.blitterOperationsSupported = true;
REQUIRE_BLITTER_OR_SKIP(hwInfo);
size_t origin[] = {0, 0, 0};
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
std::unique_ptr<Image> image(Image2dHelper<>::create(context));

View File

@@ -99,7 +99,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTr
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) {
@@ -109,7 +109,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompleti
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndKmdWait) {
@@ -122,7 +122,7 @@ HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenWaitUntilCompletionCalledThen
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, taskCountToWait)).Times(1).WillOnce(::testing::Return(false));
//we have unrecoverable for this case, this will throw.
EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false), std::exception);
EXPECT_THROW(cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false), std::exception);
}
HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTryCpuPollingAndDontCallKmdWait) {
@@ -132,7 +132,7 @@ HWTEST_F(KmdNotifyTests, givenReadyTaskCountWhenWaitUntilCompletionCalledThenTry
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 2, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
EXPECT_CALL(*csr, waitForFlushStamp(::testing::_)).Times(0);
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDisableQuickKmdSleep) {
@@ -141,7 +141,7 @@ HWTEST_F(KmdNotifyTests, givenDefaultArgumentWhenWaitUntilCompleteIsCalledThenDi
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, false);
cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false);
}
HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThenChangeDelayValue) {
@@ -150,7 +150,7 @@ HWTEST_F(KmdNotifyTests, givenEnabledQuickSleepWhenWaitUntilCompleteIsCalledThen
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true);
cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, true);
}
HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSleepRequestIsCalledThenUseBaseDelayValue) {
@@ -160,7 +160,7 @@ HWTEST_F(KmdNotifyTests, givenDisabledQuickSleepWhenWaitUntilCompleteWithQuickSl
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, expectedTimeout, taskCountToWait)).Times(1).WillOnce(::testing::Return(true));
cmdQ->waitUntilComplete(taskCountToWait, 0, flushStampToWait, true);
cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, true);
}
HWTEST_F(KmdNotifyTests, givenNotReadyTaskCountWhenPollForCompletionCalledThenTimeout) {
@@ -214,7 +214,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal
auto csr = createMockCsr<FamilyType>();
EXPECT_CALL(*csr, waitForCompletionWithTimeout(true, 1, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
cmdQ->throttle = QueueThrottle::LOW;
cmdQ->waitUntilComplete(1, 0, 1, false);
cmdQ->waitUntilComplete(1, {}, 1, false);
}
HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButThereIsNoFlushStampeAndCallWaitThenTimeoutIsDisabled) {
@@ -223,7 +223,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher
EXPECT_CALL(*csr, waitForCompletionWithTimeout(false, 0, ::testing::_)).Times(1).WillOnce(::testing::Return(true));
cmdQ->throttle = QueueThrottle::LOW;
cmdQ->waitUntilComplete(1, 0, 0, false);
cmdQ->waitUntilComplete(1, {}, 0, false);
}
HWTEST_F(KmdNotifyTests, givenQuickSleepRequestWhenItsSporadicWaitOptimizationIsDisabledThenDontOverrideQuickSleepRequest) {

View File

@@ -176,6 +176,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
mockCmdQueue->bcsEngine = nullptr;
mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@@ -191,6 +192,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
mockCmdQueue->bcsEngine = nullptr;
mockCmdQueue->bcsState.engineType = aub_stream::EngineType::NUM_ENGINES;
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@@ -206,6 +208,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(0);
mockCmdQueue->bcsEngine = bcsEngine;
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@@ -222,6 +225,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(-1);
mockCmdQueue->bcsEngine = bcsEngine;
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr);
@@ -240,6 +244,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
mockCmdQueue->bcsEngine = bcsEngine;
mockCmdQueue->bcsState.engineType = bcsEngine->getEngineType();
commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(8u, bcsCsr->blitBufferCalled);
commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);

View File

@@ -70,9 +70,9 @@ class MockCommandQueue : public CommandQueue {
return writeBufferRetValue;
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
}
cl_int enqueueCopyImage(Image *srcImage, Image *dstImage, const size_t *srcOrigin,
@@ -300,9 +300,9 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
useBcsCsrOnNotifyEnabled = notifyBcsCsr;
}
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, uint32_t bcsTaskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override {
latestTaskCountWaited = gpgpuTaskCountToWait;
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, bcsTaskCountToWait, flushStampToWait, useQuickKmdSleep);
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep);
}
bool isCacheFlushForBcsRequired() const override {