refactor: remove not needed code

Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
2023-09-11 17:12:15 +00:00 · 2023-09-11 17:12:15 +00:00 · cac547946a
parent b5e9c10f64
commit cac547946a
7 changed files with 2 additions and 150 deletions
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@ -796,10 +796,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
    }

    bool anyUncacheableArgs = false;
-    auto requiresCoherency = false;
    for (auto surface : createRange(surfaces, surfaceCount)) {
        surface->makeResident(getGpgpuCommandStreamReceiver());
-        requiresCoherency |= surface->isCoherent;
        if (!surface->allowsL3Caching()) {
            anyUncacheableArgs = true;
        }
@ -819,7 +817,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
            continue;
        }
        kernel->makeResident(getGpgpuCommandStreamReceiver());
-        requiresCoherency |= kernel->requiresCoherency();
        mediaSamplerRequired |= kernel->isVmeKernel();
        auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
        numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
@ -886,7 +883,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
        multiDispatchInfo.usesSlm(),                                                                            // useSLM
        !getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
        commandType == CL_COMMAND_NDRANGE_KERNEL,                                                               // GSBA32BitRequired
-        requiresCoherency,                                                                                      // requiresCoherency
+        false,                                                                                                  // requiresCoherency
        (QueuePriority::LOW == priority),                                                                       // lowPriority
        implicitFlush,                                                                                          // implicitFlush
        !eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(),             // outOfOrderExecutionAllowed
--- a/opencl/source/command_queue/enqueue_kernel.h
+++ b/opencl/source/command_queue/enqueue_kernel.h
@ -107,9 +107,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
        if (kernel.hasPrintfOutput()) {
            context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
        }
-        if (kernel.requiresCoherency()) {
-            context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
-        }
    }

    if (kernelInfo.builtinDispatchBuilder != nullptr) {
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@ -154,12 +154,10 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
    IndirectHeap *ioh = kernelOperation->ioh.get();
    IndirectHeap *ssh = kernelOperation->ssh.get();

-    auto requiresCoherency = false;
    auto anyUncacheableArgs = false;
    for (auto &surface : surfaces) {
        DEBUG_BREAK_IF(!surface);
        surface->makeResident(commandStreamReceiver);
-        requiresCoherency |= surface->isCoherent;
        if (!surface->allowsL3Caching()) {
            anyUncacheableArgs = true;
        }
@ -211,7 +209,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
        slmUsed,                                                                          // useSLM
        !commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(),       // guardCommandBufferWithPipeControl
        commandType == CL_COMMAND_NDRANGE_KERNEL,                                         // GSBA32BitRequired
-        requiresCoherency,                                                                // requiresCoherency
+        false,                                                                            // requiresCoherency
        commandQueue.getPriority() == QueuePriority::LOW,                                 // lowPriority
        false,                                                                            // implicitFlush
        commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(),      // outOfOrderExecutionAllowed
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@ -1398,29 +1398,6 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
    gtpinNotifyUpdateResidencyList(this, &dst);
 }

-bool Kernel::requiresCoherency() {
-    auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size();
-    for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
-        if (kernelArguments[argIndex].object) {
-            if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
-                auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
-                if (pSVMAlloc->isCoherent()) {
-                    return true;
-                }
-            }
-
-            if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
-                auto clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(kernelArguments[argIndex].object));
-                auto memObj = castToObjectOrAbort<MemObj>(clMem);
-                if (memObj->getMultiGraphicsAllocation().isCoherent()) {
-                    return true;
-                }
-            }
-        }
-    }
-    return false;
-}
-
 cl_int Kernel::setArgLocal(uint32_t argIndexIn,
                           size_t argSize,
                           const void *argVal) {
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@ -289,7 +289,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
    // residency for kernel surfaces
    MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
    MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
-    bool requiresCoherency();
    void resetSharedObjectsPatchAddresses();
    bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
    bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
--- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp
+++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp
@ -813,42 +813,6 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK
    EXPECT_TRUE(containsHint(expectedHint, userData));
 }

-TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
-    size_t preferredWorkGroupSize[3];
-    size_t globalWorkGroupSize[3] = {1, 1, 1};
-    auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
-    MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
-    Kernel::SimpleKernelArgInfo kernelArgInfo;
-
-    if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
-        auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment();
-        WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false);
-        computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
-    } else
-        computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
-
-    auto buffer = new MockBuffer();
-    buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
-    auto clBuffer = (cl_mem)buffer;
-
-    kernelArgInfo.object = clBuffer;
-    kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
-
-    std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
-    kernelArguments.resize(1);
-    kernelArguments[0] = kernelArgInfo;
-    mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
-    mockKernel.mockKernel->setKernelArguments(kernelArguments);
-
-    retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
-
-    EXPECT_EQ(CL_SUCCESS, retVal);
-
-    snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
-    EXPECT_TRUE(containsHint(expectedHint, userData));
-    delete buffer;
-}
-
 const int validDimensions[] = {0, 1, 2};

 INSTANTIATE_TEST_CASE_P(
--- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp
+++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp
@ -128,86 +128,6 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB
    }
 }

-TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
-    Buffer *buffer = new MockBuffer();
-
-    auto val = (cl_mem)buffer;
-    auto pVal = &val;
-
-    pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
-
-    auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
-    EXPECT_EQ(CL_SUCCESS, retVal);
-    EXPECT_FALSE(pKernel->requiresCoherency());
-
-    delete buffer;
-}
-
-HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
-    Buffer *buffer = new MockBuffer();
-
-    auto val = (cl_mem)buffer;
-    auto pVal = &val;
-
-    auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
-    EXPECT_EQ(CL_SUCCESS, retVal);
-    EXPECT_FALSE(pKernel->requiresCoherency());
-
-    EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
-
-    typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
-    auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
-        ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
-
-    auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
-    EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
-
-    delete buffer;
-}
-
-HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
-    cl_mem val = pBuffer.get();
-    auto pVal = &val;
-
-    int32_t retVal = CL_INVALID_VALUE;
-    for (auto &kernelInfo : pKernelInfosStorage) {
-        kernelInfo->argAsPtr(0).bindful = 0;
-    }
-    auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
-
-    EXPECT_EQ(CL_SUCCESS, retVal);
-
-    retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
-    EXPECT_EQ(CL_SUCCESS, retVal);
-
-    for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
-        auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
-        EXPECT_FALSE(pKernel->requiresCoherency());
-        EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
-        typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
-        auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
-            ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
-
-        auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
-        EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
-    }
-}
-
-HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
-
-    Buffer *buffer = new MockBuffer();
-    buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
-
-    auto val = (cl_mem)buffer;
-    auto pVal = &val;
-
-    auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
-    EXPECT_EQ(CL_SUCCESS, retVal);
-    EXPECT_TRUE(pKernel->requiresCoherency());
-
-    delete buffer;
-}
-
 TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
    char *ptr = new char[sizeof(Buffer)];