Add missing cache flush

Resolves: NEO-6505

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2021-12-06 10:01:46 +00:00 committed by Compute-Runtime-Automation
parent 0346a5679f
commit 09d2ffb9ed
11 changed files with 85 additions and 45 deletions

View File

@ -54,7 +54,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
false, //useSingleSubdevice
false, //useGlobalAtomics
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
false //memoryMigrationRequired
false, //memoryMigrationRequired
false //textureCacheFlush
);
this->commandContainer.removeDuplicatesFromResidencyContainer();

View File

@ -685,6 +685,10 @@ uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) co
return state.taskCount;
}
bool CommandQueue::isTextureCacheFlushNeeded(uint32_t commandType) const {
return commandType == CL_COMMAND_COPY_IMAGE && getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled();
}
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
return getGpgpuCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize);
}

View File

@ -45,14 +45,6 @@ enum class QueuePriority {
HIGH
};
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) {
return (commandType == CL_COMMAND_READ_BUFFER ||
commandType == CL_COMMAND_READ_BUFFER_RECT ||
commandType == CL_COMMAND_READ_IMAGE ||
commandType == CL_COMMAND_SVM_MAP ||
printfHandler);
}
template <>
struct OpenCLObjectMapper<_cl_command_queue> {
typedef class CommandQueue DerivedType;
@ -371,6 +363,17 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
void providePerformanceHint(TransferProperties &transferProperties);
bool queueDependenciesClearRequired() const;
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
bool isTextureCacheFlushNeeded(uint32_t commandType) const;
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const {
return (commandType == CL_COMMAND_READ_BUFFER ||
commandType == CL_COMMAND_READ_BUFFER_RECT ||
commandType == CL_COMMAND_READ_IMAGE ||
commandType == CL_COMMAND_SVM_MAP ||
printfHandler ||
isTextureCacheFlushNeeded(commandType));
}
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;

View File

@ -892,8 +892,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
useGlobalAtomics, //useGlobalAtomics
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
kernel->requiresMemoryMigration() //memoryMigrationRequired
);
kernel->requiresMemoryMigration(), //memoryMigrationRequired
isTextureCacheFlushNeeded(commandType)); //textureCacheFlush
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
@ -1116,7 +1116,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
false, //useSingleSubdevice
false, //useGlobalAtomics
context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
false); //memoryMigrationRequired
false, //memoryMigrationRequired
false); //textureCacheFlush
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);

View File

@ -79,7 +79,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
false, //useSingleSubdevice
false, //useGlobalAtomics
false, //areMultipleSubDevicesInContext
false); //memoryMigrationRequired
false, //memoryMigrationRequired
false); //textureCacheFlush
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
@ -246,7 +247,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
kernel->requiresMemoryMigration()); //memoryMigrationRequired
kernel->requiresMemoryMigration(), //memoryMigrationRequired
false); //textureCacheFlush
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
@ -388,7 +390,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
false, //useSingleSubdevice
false, //useGlobalAtomics
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
false); //memoryMigrationRequired
false, //memoryMigrationRequired
false); //textureCacheFlush
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);

View File

@ -379,6 +379,29 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCommandQueueThatWaitsOnAbortedUserE
EXPECT_EQ(100u, cmdQ.taskLevel);
}
HWTEST_F(CommandQueueCommandStreamTest, WhenCheckIsTextureCacheFlushNeededThenReturnProperValue) {
MockContext context;
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver<FamilyType>();
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(CL_COMMAND_COPY_BUFFER_RECT));
for (auto i = CL_COMMAND_NDRANGE_KERNEL; i < CL_COMMAND_RELEASE_GL_OBJECTS; i++) {
if (i == CL_COMMAND_COPY_IMAGE) {
commandStreamReceiver.directSubmissionAvailable = true;
EXPECT_TRUE(cmdQ.isTextureCacheFlushNeeded(i));
commandStreamReceiver.directSubmissionAvailable = false;
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
} else {
commandStreamReceiver.directSubmissionAvailable = true;
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
commandStreamReceiver.directSubmissionAvailable = false;
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
}
}
}
TEST_F(CommandQueueCommandStreamTest, GivenValidCommandQueueWhenGettingCommandStreamThenValidObjectIsReturned) {
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
MockCommandQueue commandQueue(context.get(), pClDevice, props, false);

View File

@ -83,6 +83,6 @@ struct ComputeModeRequirements : public ::testing::Test {
CommandStreamReceiver *csr = nullptr;
std::unique_ptr<MockDevice> device;
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
GraphicsAllocation *alloc = nullptr;
};

View File

@ -26,6 +26,7 @@ class MockCommandQueue : public CommandQueue {
using CommandQueue::device;
using CommandQueue::gpgpuEngine;
using CommandQueue::isCopyOnly;
using CommandQueue::isTextureCacheFlushNeeded;
using CommandQueue::obtainNewTimestampPacketNodes;
using CommandQueue::overrideEngine;
using CommandQueue::queueCapabilities;

View File

@ -217,6 +217,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
PipeControlArgs args(dispatchFlags.dcFlush);
args.notifyEnable = isUsedNotifyEnableForPostSync();
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
commandStreamTask,

View File

@ -55,34 +55,35 @@ struct DispatchFlags {
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP) : csrDependencies(csrDependenciesP),
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
flushStampReference(flushStampReferenceP),
throttle(throttleP),
preemptionMode(preemptionModeP),
numGrfRequired(numGrfRequiredP),
l3CacheSettings(l3CacheSettingsP),
threadArbitrationPolicy(threadArbitrationPolicyP),
additionalKernelExecInfo(additionalKernelExecInfoP),
kernelExecutionType(kernelExecutionTypeP),
memoryCompressionState(memoryCompressionStateP),
sliceCount(sliceCountP),
blocking(blockingP),
dcFlush(dcFlushP),
useSLM(useSLMP),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
gsba32BitRequired(gsba32BitRequiredP),
requiresCoherency(requiresCoherencyP),
lowPriority(lowPriorityP),
implicitFlush(implicitFlushP),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
epilogueRequired(epilogueRequiredP),
usePerDssBackedBuffer(usePerDSSbackedBufferP),
useSingleSubdevice(useSingleSubdeviceP),
useGlobalAtomics(useGlobalAtomicsP),
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
memoryMigrationRequired(memoryMigrationRequiredP){};
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush) : csrDependencies(csrDependenciesP),
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
flushStampReference(flushStampReferenceP),
throttle(throttleP),
preemptionMode(preemptionModeP),
numGrfRequired(numGrfRequiredP),
l3CacheSettings(l3CacheSettingsP),
threadArbitrationPolicy(threadArbitrationPolicyP),
additionalKernelExecInfo(additionalKernelExecInfoP),
kernelExecutionType(kernelExecutionTypeP),
memoryCompressionState(memoryCompressionStateP),
sliceCount(sliceCountP),
blocking(blockingP),
dcFlush(dcFlushP),
useSLM(useSLMP),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
gsba32BitRequired(gsba32BitRequiredP),
requiresCoherency(requiresCoherencyP),
lowPriority(lowPriorityP),
implicitFlush(implicitFlushP),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
epilogueRequired(epilogueRequiredP),
usePerDssBackedBuffer(usePerDSSbackedBufferP),
useSingleSubdevice(useSingleSubdeviceP),
useGlobalAtomics(useGlobalAtomicsP),
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
memoryMigrationRequired(memoryMigrationRequiredP),
textureCacheFlush(textureCacheFlush){};
CsrDependencies csrDependencies;
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
@ -113,6 +114,7 @@ struct DispatchFlags {
bool useGlobalAtomics = false;
bool areMultipleSubDevicesInContext = false;
bool memoryMigrationRequired = false;
bool textureCacheFlush = false;
};
struct CsrSizeRequestFlags {

View File

@ -41,7 +41,8 @@ struct DispatchFlagsHelper {
false, //useSingleSubdevice
false, //useGlobalAtomics
false, //areMultipleSubDevicesInContext
false //memoryMigrationRequired
false, //memoryMigrationRequired
false //textureCacheFlush
);
}
};