mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add missing cache flush
Resolves: NEO-6505 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
0346a5679f
commit
09d2ffb9ed
@ -54,7 +54,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
|||||||
false, //useSingleSubdevice
|
false, //useSingleSubdevice
|
||||||
false, //useGlobalAtomics
|
false, //useGlobalAtomics
|
||||||
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
|
this->device->getNEODevice()->getNumGenericSubDevices() > 1, //areMultipleSubDevicesInContext
|
||||||
false //memoryMigrationRequired
|
false, //memoryMigrationRequired
|
||||||
|
false //textureCacheFlush
|
||||||
);
|
);
|
||||||
|
|
||||||
this->commandContainer.removeDuplicatesFromResidencyContainer();
|
this->commandContainer.removeDuplicatesFromResidencyContainer();
|
||||||
|
@ -685,6 +685,10 @@ uint32_t CommandQueue::peekBcsTaskCount(aub_stream::EngineType bcsEngineType) co
|
|||||||
return state.taskCount;
|
return state.taskCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CommandQueue::isTextureCacheFlushNeeded(uint32_t commandType) const {
|
||||||
|
return commandType == CL_COMMAND_COPY_IMAGE && getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled();
|
||||||
|
}
|
||||||
|
|
||||||
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
|
IndirectHeap &CommandQueue::getIndirectHeap(IndirectHeap::Type heapType, size_t minRequiredSize) {
|
||||||
return getGpgpuCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize);
|
return getGpgpuCommandStreamReceiver().getIndirectHeap(heapType, minRequiredSize);
|
||||||
}
|
}
|
||||||
|
@ -45,14 +45,6 @@ enum class QueuePriority {
|
|||||||
HIGH
|
HIGH
|
||||||
};
|
};
|
||||||
|
|
||||||
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) {
|
|
||||||
return (commandType == CL_COMMAND_READ_BUFFER ||
|
|
||||||
commandType == CL_COMMAND_READ_BUFFER_RECT ||
|
|
||||||
commandType == CL_COMMAND_READ_IMAGE ||
|
|
||||||
commandType == CL_COMMAND_SVM_MAP ||
|
|
||||||
printfHandler);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct OpenCLObjectMapper<_cl_command_queue> {
|
struct OpenCLObjectMapper<_cl_command_queue> {
|
||||||
typedef class CommandQueue DerivedType;
|
typedef class CommandQueue DerivedType;
|
||||||
@ -371,6 +363,17 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
void providePerformanceHint(TransferProperties &transferProperties);
|
void providePerformanceHint(TransferProperties &transferProperties);
|
||||||
bool queueDependenciesClearRequired() const;
|
bool queueDependenciesClearRequired() const;
|
||||||
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
|
bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;
|
||||||
|
|
||||||
|
bool isTextureCacheFlushNeeded(uint32_t commandType) const;
|
||||||
|
inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const {
|
||||||
|
return (commandType == CL_COMMAND_READ_BUFFER ||
|
||||||
|
commandType == CL_COMMAND_READ_BUFFER_RECT ||
|
||||||
|
commandType == CL_COMMAND_READ_IMAGE ||
|
||||||
|
commandType == CL_COMMAND_SVM_MAP ||
|
||||||
|
printfHandler ||
|
||||||
|
isTextureCacheFlushNeeded(commandType));
|
||||||
|
}
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
|
MOCKABLE_VIRTUAL bool blitEnqueueImageAllowed(const size_t *origin, const size_t *region, const Image &image) const;
|
||||||
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
|
void aubCaptureHook(bool &blocking, bool &clearAllDependencies, const MultiDispatchInfo &multiDispatchInfo);
|
||||||
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
virtual bool obtainTimestampPacketForCacheFlush(bool isCacheFlushRequired) const = 0;
|
||||||
|
@ -892,8 +892,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||||
useGlobalAtomics, //useGlobalAtomics
|
useGlobalAtomics, //useGlobalAtomics
|
||||||
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
||||||
kernel->requiresMemoryMigration() //memoryMigrationRequired
|
kernel->requiresMemoryMigration(), //memoryMigrationRequired
|
||||||
);
|
isTextureCacheFlushNeeded(commandType)); //textureCacheFlush
|
||||||
|
|
||||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||||
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
|
dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode = specialPipelineSelectMode;
|
||||||
@ -1116,7 +1116,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
|||||||
false, //useSingleSubdevice
|
false, //useSingleSubdevice
|
||||||
false, //useGlobalAtomics
|
false, //useGlobalAtomics
|
||||||
context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
context->containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
||||||
false); //memoryMigrationRequired
|
false, //memoryMigrationRequired
|
||||||
|
false); //textureCacheFlush
|
||||||
|
|
||||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
|
||||||
|
@ -79,7 +79,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
|||||||
false, //useSingleSubdevice
|
false, //useSingleSubdevice
|
||||||
false, //useGlobalAtomics
|
false, //useGlobalAtomics
|
||||||
false, //areMultipleSubDevicesInContext
|
false, //areMultipleSubDevicesInContext
|
||||||
false); //memoryMigrationRequired
|
false, //memoryMigrationRequired
|
||||||
|
false); //textureCacheFlush
|
||||||
|
|
||||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
||||||
|
|
||||||
@ -246,7 +247,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
kernel->isSingleSubdevicePreferred(), //useSingleSubdevice
|
||||||
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
|
||||||
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
kernel->areMultipleSubDevicesInContext(), //areMultipleSubDevicesInContext
|
||||||
kernel->requiresMemoryMigration()); //memoryMigrationRequired
|
kernel->requiresMemoryMigration(), //memoryMigrationRequired
|
||||||
|
false); //textureCacheFlush
|
||||||
|
|
||||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||||
@ -388,7 +390,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
false, //useSingleSubdevice
|
false, //useSingleSubdevice
|
||||||
false, //useGlobalAtomics
|
false, //useGlobalAtomics
|
||||||
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), //areMultipleSubDevicesInContext
|
||||||
false); //memoryMigrationRequired
|
false, //memoryMigrationRequired
|
||||||
|
false); //textureCacheFlush
|
||||||
|
|
||||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||||
|
@ -379,6 +379,29 @@ HWTEST_F(CommandQueueCommandStreamTest, givenCommandQueueThatWaitsOnAbortedUserE
|
|||||||
EXPECT_EQ(100u, cmdQ.taskLevel);
|
EXPECT_EQ(100u, cmdQ.taskLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandQueueCommandStreamTest, WhenCheckIsTextureCacheFlushNeededThenReturnProperValue) {
|
||||||
|
MockContext context;
|
||||||
|
auto mockDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||||
|
MockCommandQueue cmdQ(&context, mockDevice.get(), 0, false);
|
||||||
|
auto &commandStreamReceiver = mockDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(CL_COMMAND_COPY_BUFFER_RECT));
|
||||||
|
|
||||||
|
for (auto i = CL_COMMAND_NDRANGE_KERNEL; i < CL_COMMAND_RELEASE_GL_OBJECTS; i++) {
|
||||||
|
if (i == CL_COMMAND_COPY_IMAGE) {
|
||||||
|
commandStreamReceiver.directSubmissionAvailable = true;
|
||||||
|
EXPECT_TRUE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||||
|
commandStreamReceiver.directSubmissionAvailable = false;
|
||||||
|
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||||
|
} else {
|
||||||
|
commandStreamReceiver.directSubmissionAvailable = true;
|
||||||
|
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||||
|
commandStreamReceiver.directSubmissionAvailable = false;
|
||||||
|
EXPECT_FALSE(cmdQ.isTextureCacheFlushNeeded(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(CommandQueueCommandStreamTest, GivenValidCommandQueueWhenGettingCommandStreamThenValidObjectIsReturned) {
|
TEST_F(CommandQueueCommandStreamTest, GivenValidCommandQueueWhenGettingCommandStreamThenValidObjectIsReturned) {
|
||||||
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, 0, 0};
|
||||||
MockCommandQueue commandQueue(context.get(), pClDevice, props, false);
|
MockCommandQueue commandQueue(context.get(), pClDevice, props, false);
|
||||||
|
@ -83,6 +83,6 @@ struct ComputeModeRequirements : public ::testing::Test {
|
|||||||
|
|
||||||
CommandStreamReceiver *csr = nullptr;
|
CommandStreamReceiver *csr = nullptr;
|
||||||
std::unique_ptr<MockDevice> device;
|
std::unique_ptr<MockDevice> device;
|
||||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||||
GraphicsAllocation *alloc = nullptr;
|
GraphicsAllocation *alloc = nullptr;
|
||||||
};
|
};
|
||||||
|
@ -26,6 +26,7 @@ class MockCommandQueue : public CommandQueue {
|
|||||||
using CommandQueue::device;
|
using CommandQueue::device;
|
||||||
using CommandQueue::gpgpuEngine;
|
using CommandQueue::gpgpuEngine;
|
||||||
using CommandQueue::isCopyOnly;
|
using CommandQueue::isCopyOnly;
|
||||||
|
using CommandQueue::isTextureCacheFlushNeeded;
|
||||||
using CommandQueue::obtainNewTimestampPacketNodes;
|
using CommandQueue::obtainNewTimestampPacketNodes;
|
||||||
using CommandQueue::overrideEngine;
|
using CommandQueue::overrideEngine;
|
||||||
using CommandQueue::queueCapabilities;
|
using CommandQueue::queueCapabilities;
|
||||||
|
@ -217,6 +217,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||||||
PipeControlArgs args(dispatchFlags.dcFlush);
|
PipeControlArgs args(dispatchFlags.dcFlush);
|
||||||
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
args.notifyEnable = isUsedNotifyEnableForPostSync();
|
||||||
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired;
|
||||||
|
args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush;
|
||||||
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
args.workloadPartitionOffset = this->activePartitions > 1 && this->staticWorkPartitioningEnabled;
|
||||||
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
|
||||||
commandStreamTask,
|
commandStreamTask,
|
||||||
|
@ -55,34 +55,35 @@ struct DispatchFlags {
|
|||||||
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
|
KernelExecutionType kernelExecutionTypeP, MemoryCompressionState memoryCompressionStateP,
|
||||||
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
||||||
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
||||||
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP) : csrDependencies(csrDependenciesP),
|
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush) : csrDependencies(csrDependenciesP),
|
||||||
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
||||||
pipelineSelectArgs(pipelineSelectArgsP),
|
pipelineSelectArgs(pipelineSelectArgsP),
|
||||||
flushStampReference(flushStampReferenceP),
|
flushStampReference(flushStampReferenceP),
|
||||||
throttle(throttleP),
|
throttle(throttleP),
|
||||||
preemptionMode(preemptionModeP),
|
preemptionMode(preemptionModeP),
|
||||||
numGrfRequired(numGrfRequiredP),
|
numGrfRequired(numGrfRequiredP),
|
||||||
l3CacheSettings(l3CacheSettingsP),
|
l3CacheSettings(l3CacheSettingsP),
|
||||||
threadArbitrationPolicy(threadArbitrationPolicyP),
|
threadArbitrationPolicy(threadArbitrationPolicyP),
|
||||||
additionalKernelExecInfo(additionalKernelExecInfoP),
|
additionalKernelExecInfo(additionalKernelExecInfoP),
|
||||||
kernelExecutionType(kernelExecutionTypeP),
|
kernelExecutionType(kernelExecutionTypeP),
|
||||||
memoryCompressionState(memoryCompressionStateP),
|
memoryCompressionState(memoryCompressionStateP),
|
||||||
sliceCount(sliceCountP),
|
sliceCount(sliceCountP),
|
||||||
blocking(blockingP),
|
blocking(blockingP),
|
||||||
dcFlush(dcFlushP),
|
dcFlush(dcFlushP),
|
||||||
useSLM(useSLMP),
|
useSLM(useSLMP),
|
||||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
||||||
gsba32BitRequired(gsba32BitRequiredP),
|
gsba32BitRequired(gsba32BitRequiredP),
|
||||||
requiresCoherency(requiresCoherencyP),
|
requiresCoherency(requiresCoherencyP),
|
||||||
lowPriority(lowPriorityP),
|
lowPriority(lowPriorityP),
|
||||||
implicitFlush(implicitFlushP),
|
implicitFlush(implicitFlushP),
|
||||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
||||||
epilogueRequired(epilogueRequiredP),
|
epilogueRequired(epilogueRequiredP),
|
||||||
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
||||||
useSingleSubdevice(useSingleSubdeviceP),
|
useSingleSubdevice(useSingleSubdeviceP),
|
||||||
useGlobalAtomics(useGlobalAtomicsP),
|
useGlobalAtomics(useGlobalAtomicsP),
|
||||||
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
||||||
memoryMigrationRequired(memoryMigrationRequiredP){};
|
memoryMigrationRequired(memoryMigrationRequiredP),
|
||||||
|
textureCacheFlush(textureCacheFlush){};
|
||||||
|
|
||||||
CsrDependencies csrDependencies;
|
CsrDependencies csrDependencies;
|
||||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||||
@ -113,6 +114,7 @@ struct DispatchFlags {
|
|||||||
bool useGlobalAtomics = false;
|
bool useGlobalAtomics = false;
|
||||||
bool areMultipleSubDevicesInContext = false;
|
bool areMultipleSubDevicesInContext = false;
|
||||||
bool memoryMigrationRequired = false;
|
bool memoryMigrationRequired = false;
|
||||||
|
bool textureCacheFlush = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CsrSizeRequestFlags {
|
struct CsrSizeRequestFlags {
|
||||||
|
@ -41,7 +41,8 @@ struct DispatchFlagsHelper {
|
|||||||
false, //useSingleSubdevice
|
false, //useSingleSubdevice
|
||||||
false, //useGlobalAtomics
|
false, //useGlobalAtomics
|
||||||
false, //areMultipleSubDevicesInContext
|
false, //areMultipleSubDevicesInContext
|
||||||
false //memoryMigrationRequired
|
false, //memoryMigrationRequired
|
||||||
|
false //textureCacheFlush
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user