refactor: remove not needed code

Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
2023-09-11 17:12:15 +00:00 · 2023-09-11 17:12:15 +00:00 · cac547946a
parent b5e9c10f64
commit cac547946a
7 changed files with 2 additions and 150 deletions
--- a/opencl/source/command_queue/enqueue_common.h
+++ b/opencl/source/command_queue/enqueue_common.h
@ -796,10 +796,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
    }
    bool anyUncacheableArgs = false;
    auto requiresCoherency = false;
    for (auto surface : createRange(surfaces, surfaceCount)) {
        surface->makeResident(getGpgpuCommandStreamReceiver());
        requiresCoherency |= surface->isCoherent;
        if (!surface->allowsL3Caching()) {
            anyUncacheableArgs = true;
        }
@ -819,7 +817,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
            continue;
        }
        kernel->makeResident(getGpgpuCommandStreamReceiver());
        requiresCoherency |= kernel->requiresCoherency();
        mediaSamplerRequired |= kernel->isVmeKernel();
        auto numGrfRequiredByKernel = static_cast<uint32_t>(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired);
        numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
@ -886,7 +883,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
        multiDispatchInfo.usesSlm(),                                                                            // useSLM
        !getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
        commandType == CL_COMMAND_NDRANGE_KERNEL,                                                               // GSBA32BitRequired
-        requiresCoherency,                                                                                      // requiresCoherency
+        false,                                                                                                  // requiresCoherency
        (QueuePriority::LOW == priority),                                                                       // lowPriority
        implicitFlush,                                                                                          // implicitFlush
        !eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(),             // outOfOrderExecutionAllowed
--- a/opencl/source/command_queue/enqueue_kernel.h
+++ b/opencl/source/command_queue/enqueue_kernel.h
@ -107,9 +107,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueKernel(
        if (kernel.hasPrintfOutput()) {
            context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
        }
        if (kernel.requiresCoherency()) {
            context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str());
        }
    }
    if (kernelInfo.builtinDispatchBuilder != nullptr) {
--- a/opencl/source/helpers/task_information.cpp
+++ b/opencl/source/helpers/task_information.cpp
@ -154,12 +154,10 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
    IndirectHeap *ioh = kernelOperation->ioh.get();
    IndirectHeap *ssh = kernelOperation->ssh.get();
    auto requiresCoherency = false;
    auto anyUncacheableArgs = false;
    for (auto &surface : surfaces) {
        DEBUG_BREAK_IF(!surface);
        surface->makeResident(commandStreamReceiver);
        requiresCoherency |= surface->isCoherent;
        if (!surface->allowsL3Caching()) {
            anyUncacheableArgs = true;
        }
@ -211,7 +209,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
        slmUsed,                                                                          // useSLM
        !commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(),       // guardCommandBufferWithPipeControl
        commandType == CL_COMMAND_NDRANGE_KERNEL,                                         // GSBA32BitRequired
-        requiresCoherency,                                                                // requiresCoherency
+        false,                                                                            // requiresCoherency
        commandQueue.getPriority() == QueuePriority::LOW,                                 // lowPriority
        false,                                                                            // implicitFlush
        commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(),      // outOfOrderExecutionAllowed
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@ -1398,29 +1398,6 @@ void Kernel::getResidency(std::vector<Surface *> &dst) {
    gtpinNotifyUpdateResidencyList(this, &dst);
 }
 bool Kernel::requiresCoherency() {
    auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size();
    for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) {
        if (kernelArguments[argIndex].object) {
            if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
                auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
                if (pSVMAlloc->isCoherent()) {
                    return true;
                }
            }
            if (Kernel::isMemObj(kernelArguments[argIndex].type)) {
                auto clMem = const_cast<cl_mem>(static_cast<const _cl_mem *>(kernelArguments[argIndex].object));
                auto memObj = castToObjectOrAbort<MemObj>(clMem);
                if (memObj->getMultiGraphicsAllocation().isCoherent()) {
                    return true;
                }
            }
        }
    }
    return false;
 }
 cl_int Kernel::setArgLocal(uint32_t argIndexIn,
                           size_t argSize,
                           const void *argVal) {
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@ -289,7 +289,6 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
    // residency for kernel surfaces
    MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver);
    MOCKABLE_VIRTUAL void getResidency(std::vector<Surface *> &dst);
    bool requiresCoherency();
    void resetSharedObjectsPatchAddresses();
    bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
    bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
--- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp
+++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp
@ -813,42 +813,6 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK
    EXPECT_TRUE(containsHint(expectedHint, userData));
 }
 TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) {
    size_t preferredWorkGroupSize[3];
    size_t globalWorkGroupSize[3] = {1, 1, 1};
    auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
    MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context);
    Kernel::SimpleKernelArgInfo kernelArgInfo;
    if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
        auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment();
        WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false);
        computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
    } else
        computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
    auto buffer = new MockBuffer();
    buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
    auto clBuffer = (cl_mem)buffer;
    kernelArgInfo.object = clBuffer;
    kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ;
    std::vector<Kernel::SimpleKernelArgInfo> kernelArguments;
    kernelArguments.resize(1);
    kernelArguments[0] = kernelArgInfo;
    mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1);
    mockKernel.mockKernel->setKernelArguments(kernelArguments);
    retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr);
    EXPECT_EQ(CL_SUCCESS, retVal);
    snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str());
    EXPECT_TRUE(containsHint(expectedHint, userData));
    delete buffer;
 }
 const int validDimensions[] = {0, 1, 2};
 INSTANTIATE_TEST_CASE_P(
--- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp
+++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp
@ -128,86 +128,6 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB
    }
 }
 TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
    Buffer *buffer = new MockBuffer();
    auto val = (cl_mem)buffer;
    auto pVal = &val;
    pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
    auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
    EXPECT_EQ(CL_SUCCESS, retVal);
    EXPECT_FALSE(pKernel->requiresCoherency());
    delete buffer;
 }
 HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
    Buffer *buffer = new MockBuffer();
    auto val = (cl_mem)buffer;
    auto pVal = &val;
    auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
    EXPECT_EQ(CL_SUCCESS, retVal);
    EXPECT_FALSE(pKernel->requiresCoherency());
    EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
    typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
    auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
        ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful));
    auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
    EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress);
    delete buffer;
 }
 HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
    cl_mem val = pBuffer.get();
    auto pVal = &val;
    int32_t retVal = CL_INVALID_VALUE;
    for (auto &kernelInfo : pKernelInfosStorage) {
        kernelInfo->argAsPtr(0).bindful = 0;
    }
    auto pMultiDeviceKernel = std::unique_ptr<MultiDeviceKernel>(MultiDeviceKernel::create<MockKernel>(pProgram.get(), kernelInfos, retVal));
    EXPECT_EQ(CL_SUCCESS, retVal);
    retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal);
    EXPECT_EQ(CL_SUCCESS, retVal);
    for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) {
        auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex);
        EXPECT_FALSE(pKernel->requiresCoherency());
        EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize());
        typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE;
        auto surfaceState = reinterpret_cast<const RENDER_SURFACE_STATE *>(
            ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as<ArgDescPointer>().bindful));
        auto surfaceAddress = surfaceState->getSurfaceBaseAddress();
        EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress);
    }
 }
 HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) {
    Buffer *buffer = new MockBuffer();
    buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true);
    auto val = (cl_mem)buffer;
    auto pVal = &val;
    auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal);
    EXPECT_EQ(CL_SUCCESS, retVal);
    EXPECT_TRUE(pKernel->requiresCoherency());
    delete buffer;
 }
 TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) {
    char *ptr = new char[sizeof(Buffer)];