Add missing cache flush for blocked path

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
2026-01-05 09:09:04 +08:00 · 2021-12-07 08:40:35 +00:00
parent fca0d03c90
commit 0c49fa8203
5 changed files with 58 additions and 8 deletions
--- a/opencl/source/command_queue/command_queue.h
+++ b/opencl/source/command_queue/command_queue.h
@@ -350,6 +350,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {

    uint64_t dispatchHints = 0;

+    bool isTextureCacheFlushNeeded(uint32_t commandType) const;
+
  protected:
    void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
    cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
@@ -370,7 +372,6 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
    bool queueDependenciesClearRequired() const;
    bool blitEnqueueAllowed(const CsrSelectionArgs &args) const;

-    bool isTextureCacheFlushNeeded(uint32_t commandType) const;
    inline bool shouldFlushDC(uint32_t commandType, PrintfHandler *printfHandler) const {
        return (commandType == CL_COMMAND_READ_BUFFER ||
                commandType == CL_COMMAND_READ_BUFFER_RECT ||
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@@ -1032,7 +1032,7 @@ void CommandQueueHw<GfxFamily>::enqueueBlocked(
                                                         allSurfaces,
                                                         shouldFlushDC(commandType, printfHandler.get()),
                                                         slmUsed,
-                                                         commandType == CL_COMMAND_NDRANGE_KERNEL,
+                                                         commandType,
                                                         std::move(printfHandler),
                                                         preemptionMode,
                                                         multiDispatchInfo.peekMainKernel(),
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@@ -113,10 +113,10 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
 }

 CommandComputeKernel::CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces,
-                                           bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> &&printfHandler,
+                                           bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
                                           PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount)
    : Command(commandQueue, kernelOperation), flushDC(flushDC), slmUsed(usesSLM),
-      NDRangeKernel(ndRangeKernel), printfHandler(std::move(printfHandler)), kernel(kernel),
+      commandType(commandType), printfHandler(std::move(printfHandler)), kernel(kernel),
      kernelCount(kernelCount), preemptionMode(preemptionMode) {
    for (auto surface : surfaces) {
        this->surfaces.push_back(surface);
@@ -241,7 +241,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
        flushDC,                                                                          //dcFlush
        slmUsed,                                                                          //useSLM
        true,                                                                             //guardCommandBufferWithPipeControl
-        NDRangeKernel,                                                                    //GSBA32BitRequired
+        commandType == CL_COMMAND_NDRANGE_KERNEL,                                         //GSBA32BitRequired
        requiresCoherency,                                                                //requiresCoherency
        commandQueue.getPriority() == QueuePriority::LOW,                                 //lowPriority
        false,                                                                            //implicitFlush
@@ -252,7 +252,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
        kernel->getKernelInfo().kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, //useGlobalAtomics
        kernel->areMultipleSubDevicesInContext(),                                         //areMultipleSubDevicesInContext
        kernel->requiresMemoryMigration(),                                                //memoryMigrationRequired
-        false);                                                                           //textureCacheFlush
+        commandQueue.isTextureCacheFlushNeeded(this->commandType));                       //textureCacheFlush

    if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
        eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
--- a/opencl/source/helpers/task_information.h
+++ b/opencl/source/helpers/task_information.h
@@ -130,7 +130,7 @@ class CommandMapUnmap : public Command {
 class CommandComputeKernel : public Command {
  public:
    CommandComputeKernel(CommandQueue &commandQueue, std::unique_ptr<KernelOperation> &kernelOperation, std::vector<Surface *> &surfaces,
-                         bool flushDC, bool usesSLM, bool ndRangeKernel, std::unique_ptr<PrintfHandler> &&printfHandler,
+                         bool flushDC, bool usesSLM, uint32_t commandType, std::unique_ptr<PrintfHandler> &&printfHandler,
                         PreemptionMode preemptionMode, Kernel *kernel, uint32_t kernelCount);

    ~CommandComputeKernel() override;
@@ -145,7 +145,7 @@ class CommandComputeKernel : public Command {
    std::vector<Surface *> surfaces;
    bool flushDC;
    bool slmUsed;
-    bool NDRangeKernel;
+    uint32_t commandType;
    std::unique_ptr<PrintfHandler> printfHandler;
    Kernel *kernel;
    uint32_t kernelCount;