From 7166dfdb051b2065ec1d85f75eab0373c5eee7f4 Mon Sep 17 00:00:00 2001
From: Konstanty Misiak <konstanty.misiak@intel.com>
Date: Fri, 7 Aug 2020 11:21:04 +0200
Subject: [PATCH] Fix debugSurface allocation for non debug kernels

Change-Id: I253968f7ea617bc68275bec0d6279f1fcf948967
Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
---
 opencl/source/command_queue/command_queue.cpp     |  4 ----
 opencl/source/command_queue/command_queue_hw.h    |  4 ++++
 .../command_queue/command_queue_hw_tests.cpp      |  4 ++--
 .../command_queue/command_queue_tests.cpp         |  1 +
 .../command_queue/enqueue_debug_kernel_tests.cpp  | 15 +++++++++++++++
 5 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp
index 8ddbdecb12..5ee4103f15 100644
--- a/opencl/source/command_queue/command_queue.cpp
+++ b/opencl/source/command_queue/command_queue.cpp
@@ -521,10 +521,6 @@ void CommandQueue::enqueueBlockedMapUnmapOperation(const cl_event *eventWaitList
 bool CommandQueue::setupDebugSurface(Kernel *kernel) {
     auto debugSurface = getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation();
 
-    if (!debugSurface) {
-        debugSurface = getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
-    }
-
     DEBUG_BREAK_IF(!kernel->requiresSshForBuffers());
 
     auto surfaceState = ptrOffset(reinterpret_cast<uintptr_t *>(kernel->getSurfaceStateHeap()),
diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h
index 36ed4b1975..b15f61fe65 100644
--- a/opencl/source/command_queue/command_queue_hw.h
+++ b/opencl/source/command_queue/command_queue_hw.h
@@ -71,6 +71,10 @@ class CommandQueueHw : public CommandQueue {
             getGpgpuCommandStreamReceiver().enableNTo1SubmissionModel();
         }
 
+        if (device->getDevice().getDebugger()) {
+            getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
+        }
+
         uint64_t requestedSliceCount = getCmdQueueProperties<cl_command_queue_properties>(properties, CL_QUEUE_SLICE_COUNT_INTEL);
         if (requestedSliceCount > 0) {
             sliceCount = requestedSliceCount;
diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp
index 7325f403a1..91247df6f7 100644
--- a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp
+++ b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp
@@ -1297,7 +1297,7 @@ struct MockCommandQueueHwWithOverwrittenCsr : public CommandQueueHw<GfxFamily> {
 
 HWTEST_F(CommandQueueHwTest, givenFlushWhenFlushBatchedSubmissionsFailsThenErrorIsRetured) {
 
-    MockCommandQueueHwWithOverwrittenCsr<FamilyType> cmdQueue(context, device, nullptr, false);
+    MockCommandQueueHwWithOverwrittenCsr<FamilyType> cmdQueue(context, pClDevice, nullptr, false);
     MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0);
     cmdQueue.csr = &csr;
     cl_int errorCode = cmdQueue.flush();
@@ -1305,7 +1305,7 @@ HWTEST_F(CommandQueueHwTest, givenFlushWhenFlushBatchedSubmissionsFailsThenError
 }
 
 HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErrorIsRetured) {
-    MockCommandQueueHwWithOverwrittenCsr<FamilyType> cmdQueue(context, device, nullptr, false);
+    MockCommandQueueHwWithOverwrittenCsr<FamilyType> cmdQueue(context, pClDevice, nullptr, false);
     MockCommandStreamReceiverWithFailingFlushBatchedSubmission csr(*pDevice->executionEnvironment, 0);
     cmdQueue.csr = &csr;
     cl_int errorCode = cmdQueue.finish();
diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp
index fba01e2aab..99e1548be9 100644
--- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp
+++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp
@@ -1035,6 +1035,7 @@ HWTEST_F(CommandQueueCommandStreamTest, givenDebugKernelWhenSetupDebugSurfaceIsC
     kernel->getAllocatedKernelInfo()->usesSsh = true;
     auto &commandStreamReceiver = cmdQ.getGpgpuCommandStreamReceiver();
 
+    cmdQ.getGpgpuCommandStreamReceiver().allocateDebugSurface(SipKernel::maxDbgSurfaceSize);
     cmdQ.setupDebugSurface(kernel.get());
 
     auto debugSurface = commandStreamReceiver.getDebugSurfaceAllocation();
diff --git a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp
index 872457fb86..bfece2293d 100644
--- a/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_debug_kernel_tests.cpp
@@ -202,4 +202,19 @@ HWTEST_F(EnqueueDebugKernelSimpleTest, givenKernelFromProgramWithoutDebugEnabled
     mockCmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
 
     ::testing::Mock::VerifyAndClearExpectations(mockCmdQ.get());
+    EXPECT_EQ(nullptr, mockCmdQ->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
+}
+
+using ActiveDebuggerTest = EnqueueDebugKernelTest;
+
+HWTEST_F(ActiveDebuggerTest, givenKernelFromProgramWithoutDebugEnabledAndActiveDebuggerWhenEnqueuedThenDebugSurfaceIsSetup) {
+    MockProgram program(*pDevice->getExecutionEnvironment());
+    std::unique_ptr<MockDebugKernel> kernel(MockKernel::create<MockDebugKernel>(*pDevice, &program));
+    kernel->setContext(&context);
+    std::unique_ptr<CommandQueueHw<FamilyType>> cmdQ(new CommandQueueHw<FamilyType>(&context, pClDevice, nullptr, false));
+
+    size_t gws[] = {1, 1, 1};
+    cmdQ->enqueueKernel(kernel.get(), 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
+
+    EXPECT_NE(nullptr, cmdQ->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation());
 }