Add missing cache flush
Resolves: NEO-6505 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
0346a5679f
commit
09d2ffb9ed
|
@ -54,7 +54,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
|||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
|
||||
false //memoryMigrationRequired
|
||||
false, //memoryMigrationRequired
|
||||
false //textureCacheFlush
|
||||
);
|
||||
|
||||
this->commandContainer.removeDuplicatesFromResidencyContainer();
|
||||
|
|
|
@ -685,6 +685,10 @@ uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) co
|
|||
return state.taskCount;
|
||||
}
|
||||
|
||||
bool CommandQueue::isTextureCacheFlushNeeded(uint32_t commandType) const {
|
||||
return commandType == CL_COMMAND_COPY_IMAGE && getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled();
|
||||
}
|
||||
|
||||
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
|
||||
return getGpgpuCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize);
|
||||
}
|
||||
|
|
|
@ -45,14 +45,6 @@ enum class QueuePriority {
|
|||
HIGH
|
||||
};
|
||||
|
||||
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) {
|
||||
return (commandType == CL_COMMAND_READ_BUFFER ||
|
||||
commandType == CL_COMMAND_READ_BUFFER_RECT ||
|
||||
commandType == CL_COMMAND_READ_IMAGE ||
|
||||
commandType == CL_COMMAND_SVM_MAP ||
|
||||
printfHandler);
|
||||
}
|
||||
|
||||
template <>
|
||||
struct OpenCLObjectMapper<_cl_command_queue> {
|
||||
typedef class CommandQueue DerivedType;
|
||||
|
@ -371,6 +363,17 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||
void providePerformanceHint(TransferProperties &transferProperties);
|
||||
bool queueDependenciesClearRequired() const;
|
||||
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
|
||||
|
||||
bool isTextureCacheFlushNeeded(uint32_t commandType) const;
|
||||
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const {
|
||||
return (commandType == CL_COMMAND_READ_BUFFER ||
|
||||
commandType == CL_COMMAND_READ_BUFFER_RECT ||
|
||||
commandType == CL_COMMAND_READ_IMAGE ||
|
||||
commandType == CL_COMMAND_SVM_MAP ||
|
||||
printfHandler ||
|
||||
isTextureCacheFlushNeeded(commandType));
|
||||
}
|
||||
|
||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
|
||||
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
|
||||
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
||||
|
|
|
@ -892,8 +892,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
useGlobalAtomics, //useGlobalAtomics
|
||||
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
||||
kernel->requiresMemoryMigration() //memoryMigrationRequired
|
||||
);
|
||||
kernel->requiresMemoryMigration(), //memoryMigrationRequired
|
||||
isTextureCacheFlushNeeded(commandType)); //textureCacheFlush
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
|
||||
|
@ -1116,7 +1116,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
|||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
||||
false); //memoryMigrationRequired
|
||||
false, //memoryMigrationRequired
|
||||
false); //textureCacheFlush
|
||||
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||
|
|
|
@ -79,7 +79,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
|||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
false, //areMultipleSubDevicesInContext
|
||||
false); //memoryMigrationRequired
|
||||
false, //memoryMigrationRequired
|
||||
false); //textureCacheFlush
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
||||
|
||||
|
@ -246,7 +247,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
||||
kernel->requiresMemoryMigration()); //memoryMigrationRequired
|
||||
kernel->requiresMemoryMigration(), //memoryMigrationRequired
|
||||
false); //textureCacheFlush
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
|
@ -388,7 +390,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
|||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
||||
false); //memoryMigrationRequired
|
||||
false, //memoryMigrationRequired
|
||||
false); //textureCacheFlush
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
|
|
|
@ -379,6 +379,29 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCommandQueueThatWaitsOnAbortedUserE
|
|||
EXPECT_EQ(100u, cmdQ.taskLevel);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCommandStreamTest, WhenCheckIsTextureCacheFlushNeededThenReturnProperValue) {
|
||||
MockContext context;
|
||||
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
|
||||
auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(CL_COMMAND_COPY_BUFFER_RECT));
|
||||
|
||||
for (auto i = CL_COMMAND_NDRANGE_KERNEL; i < CL_COMMAND_RELEASE_GL_OBJECTS; i++) {
|
||||
if (i == CL_COMMAND_COPY_IMAGE) {
|
||||
commandStreamReceiver.directSubmissionAvailable = true;
|
||||
EXPECT_TRUE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||
commandStreamReceiver.directSubmissionAvailable = false;
|
||||
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||
} else {
|
||||
commandStreamReceiver.directSubmissionAvailable = true;
|
||||
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||
commandStreamReceiver.directSubmissionAvailable = false;
|
||||
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueCommandStreamTest, GivenValidCommandQueueWhenGettingCommandStreamThenValidObjectIsReturned) {
|
||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||
MockCommandQueue commandQueue(context.get(), pClDevice, props, false);
|
||||
|
|
|
@ -83,6 +83,6 @@ struct ComputeModeRequirements : public ::testing::Test {
|
|||
|
||||
CommandStreamReceiver *csr = nullptr;
|
||||
std::unique_ptr<MockDevice> device;
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||
GraphicsAllocation *alloc = nullptr;
|
||||
};
|
||||
|
|
|
@ -26,6 +26,7 @@ class MockCommandQueue : public CommandQueue {
|
|||
using CommandQueue::device;
|
||||
using CommandQueue::gpgpuEngine;
|
||||
using CommandQueue::isCopyOnly;
|
||||
using CommandQueue::isTextureCacheFlushNeeded;
|
||||
using CommandQueue::obtainNewTimestampPacketNodes;
|
||||
using CommandQueue::overrideEngine;
|
||||
using CommandQueue::queueCapabilities;
|
||||
|
|
|
@ -217,6 +217,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
PipeControlArgs args(dispatchFlags.dcFlush);
|
||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
|
||||
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||
commandStreamTask,
|
||||
|
|
|
@ -55,34 +55,35 @@ struct DispatchFlags {
|
|||
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
|
||||
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
||||
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
||||
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP) : csrDependencies(csrDependenciesP),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
||||
pipelineSelectArgs(pipelineSelectArgsP),
|
||||
flushStampReference(flushStampReferenceP),
|
||||
throttle(throttleP),
|
||||
preemptionMode(preemptionModeP),
|
||||
numGrfRequired(numGrfRequiredP),
|
||||
l3CacheSettings(l3CacheSettingsP),
|
||||
threadArbitrationPolicy(threadArbitrationPolicyP),
|
||||
additionalKernelExecInfo(additionalKernelExecInfoP),
|
||||
kernelExecutionType(kernelExecutionTypeP),
|
||||
memoryCompressionState(memoryCompressionStateP),
|
||||
sliceCount(sliceCountP),
|
||||
blocking(blockingP),
|
||||
dcFlush(dcFlushP),
|
||||
useSLM(useSLMP),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
||||
gsba32BitRequired(gsba32BitRequiredP),
|
||||
requiresCoherency(requiresCoherencyP),
|
||||
lowPriority(lowPriorityP),
|
||||
implicitFlush(implicitFlushP),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
||||
epilogueRequired(epilogueRequiredP),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
||||
useSingleSubdevice(useSingleSubdeviceP),
|
||||
useGlobalAtomics(useGlobalAtomicsP),
|
||||
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
||||
memoryMigrationRequired(memoryMigrationRequiredP){};
|
||||
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush) : csrDependencies(csrDependenciesP),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
||||
pipelineSelectArgs(pipelineSelectArgsP),
|
||||
flushStampReference(flushStampReferenceP),
|
||||
throttle(throttleP),
|
||||
preemptionMode(preemptionModeP),
|
||||
numGrfRequired(numGrfRequiredP),
|
||||
l3CacheSettings(l3CacheSettingsP),
|
||||
threadArbitrationPolicy(threadArbitrationPolicyP),
|
||||
additionalKernelExecInfo(additionalKernelExecInfoP),
|
||||
kernelExecutionType(kernelExecutionTypeP),
|
||||
memoryCompressionState(memoryCompressionStateP),
|
||||
sliceCount(sliceCountP),
|
||||
blocking(blockingP),
|
||||
dcFlush(dcFlushP),
|
||||
useSLM(useSLMP),
|
||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
||||
gsba32BitRequired(gsba32BitRequiredP),
|
||||
requiresCoherency(requiresCoherencyP),
|
||||
lowPriority(lowPriorityP),
|
||||
implicitFlush(implicitFlushP),
|
||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
||||
epilogueRequired(epilogueRequiredP),
|
||||
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
||||
useSingleSubdevice(useSingleSubdeviceP),
|
||||
useGlobalAtomics(useGlobalAtomicsP),
|
||||
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
||||
memoryMigrationRequired(memoryMigrationRequiredP),
|
||||
textureCacheFlush(textureCacheFlush){};
|
||||
|
||||
CsrDependencies csrDependencies;
|
||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||
|
@ -113,6 +114,7 @@ struct DispatchFlags {
|
|||
bool useGlobalAtomics = false;
|
||||
bool areMultipleSubDevicesInContext = false;
|
||||
bool memoryMigrationRequired = false;
|
||||
bool textureCacheFlush = false;
|
||||
};
|
||||
|
||||
struct CsrSizeRequestFlags {
|
||||
|
|
|
@ -41,7 +41,8 @@ struct DispatchFlagsHelper {
|
|||
false, //useSingleSubdevice
|
||||
false, //useGlobalAtomics
|
||||
false, //areMultipleSubDevicesInContext
|
||||
false //memoryMigrationRequired
|
||||
false, //memoryMigrationRequired
|
||||
false //textureCacheFlush
|
||||
);
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue