diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 1284b5e54f..af9716c56a 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -796,10 +796,8 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( } bool anyUncacheableArgs = false; - auto requiresCoherency = false; for (auto surface : createRange(surfaces, surfaceCount)) { surface->makeResident(getGpgpuCommandStreamReceiver()); - requiresCoherency |= surface->isCoherent; if (!surface->allowsL3Caching()) { anyUncacheableArgs = true; } @@ -819,7 +817,6 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( continue; } kernel->makeResident(getGpgpuCommandStreamReceiver()); - requiresCoherency |= kernel->requiresCoherency(); mediaSamplerRequired |= kernel->isVmeKernel(); auto numGrfRequiredByKernel = static_cast(kernel->getKernelInfo().kernelDescriptor.kernelAttributes.numGrfRequired); numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); @@ -886,7 +883,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( multiDispatchInfo.usesSlm(), // useSLM !getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired - requiresCoherency, // requiresCoherency + false, // requiresCoherency (QueuePriority::LOW == priority), // lowPriority implicitFlush, // implicitFlush !eventBuilder.getEvent() || getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed diff --git a/opencl/source/command_queue/enqueue_kernel.h b/opencl/source/command_queue/enqueue_kernel.h index 41075e3545..f5f186cba2 100644 --- a/opencl/source/command_queue/enqueue_kernel.h +++ b/opencl/source/command_queue/enqueue_kernel.h @@ -107,9 +107,6 @@ cl_int CommandQueueHw::enqueueKernel( if (kernel.hasPrintfOutput()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, PRINTF_DETECTED_IN_KERNEL, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); } - if (kernel.requiresCoherency()) { - context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL, KERNEL_REQUIRES_COHERENCY, kernelInfo.kernelDescriptor.kernelMetadata.kernelName.c_str()); - } } if (kernelInfo.builtinDispatchBuilder != nullptr) { diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 830ef3ade8..827ece4cc1 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -154,12 +154,10 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term IndirectHeap *ioh = kernelOperation->ioh.get(); IndirectHeap *ssh = kernelOperation->ssh.get(); - auto requiresCoherency = false; auto anyUncacheableArgs = false; for (auto &surface : surfaces) { DEBUG_BREAK_IF(!surface); surface->makeResident(commandStreamReceiver); - requiresCoherency |= surface->isCoherent; if (!surface->allowsL3Caching()) { anyUncacheableArgs = true; } @@ -211,7 +209,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term slmUsed, // useSLM !commandQueue.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(), // guardCommandBufferWithPipeControl commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired - requiresCoherency, // requiresCoherency + false, // requiresCoherency commandQueue.getPriority() == QueuePriority::LOW, // lowPriority false, // implicitFlush commandQueue.getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled(), // outOfOrderExecutionAllowed diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index c4e021831f..31ab791ec0 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1398,29 +1398,6 @@ void Kernel::getResidency(std::vector &dst) { gtpinNotifyUpdateResidencyList(this, &dst); } -bool Kernel::requiresCoherency() { - auto numArgs = kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.size(); - for (decltype(numArgs) argIndex = 0; argIndex < numArgs; argIndex++) { - if (kernelArguments[argIndex].object) { - if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { - auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; - if (pSVMAlloc->isCoherent()) { - return true; - } - } - - if (Kernel::isMemObj(kernelArguments[argIndex].type)) { - auto clMem = const_cast(static_cast(kernelArguments[argIndex].object)); - auto memObj = castToObjectOrAbort(clMem); - if (memObj->getMultiGraphicsAllocation().isCoherent()) { - return true; - } - } - } - } - return false; -} - cl_int Kernel::setArgLocal(uint32_t argIndexIn, size_t argSize, const void *argVal) { diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index c1726efbbb..1cd8631ace 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -289,7 +289,6 @@ class Kernel : public ReferenceTrackedObject { // residency for kernel surfaces MOCKABLE_VIRTUAL void makeResident(CommandStreamReceiver &commandStreamReceiver); MOCKABLE_VIRTUAL void getResidency(std::vector &dst); - bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; } diff --git a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp index 63b2ac34ef..4a3d787533 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp @@ -813,42 +813,6 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK EXPECT_TRUE(containsHint(expectedHint, userData)); } -TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIsCalledWithWorkDim2ThenContextProvidesProperHint) { - size_t preferredWorkGroupSize[3]; - size_t globalWorkGroupSize[3] = {1, 1, 1}; - auto maxWorkGroupSize = static_cast(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize); - MockKernelWithInternals mockKernel(*pPlatform->getClDevice(0), context); - Kernel::SimpleKernelArgInfo kernelArgInfo; - - if (DebugManager.flags.EnableComputeWorkSizeND.get()) { - auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment(); - WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); - computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); - } else - computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); - - auto buffer = new MockBuffer(); - buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true); - auto clBuffer = (cl_mem)buffer; - - kernelArgInfo.object = clBuffer; - kernelArgInfo.type = Kernel::kernelArgType::BUFFER_OBJ; - - std::vector kernelArguments; - kernelArguments.resize(1); - kernelArguments[0] = kernelArgInfo; - mockKernel.kernelInfo.kernelDescriptor.payloadMappings.explicitArgs.resize(1); - mockKernel.mockKernel->setKernelArguments(kernelArguments); - - retVal = pCmdQ->enqueueKernel(mockKernel.mockKernel, 2, nullptr, globalWorkGroupSize, preferredWorkGroupSize, 0, nullptr, nullptr); - - EXPECT_EQ(CL_SUCCESS, retVal); - - snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[KERNEL_REQUIRES_COHERENCY], mockKernel.mockKernel->getKernelInfo().kernelDescriptor.kernelMetadata.kernelName.c_str()); - EXPECT_TRUE(containsHint(expectedHint, userData)); - delete buffer; -} - const int validDimensions[] = {0, 1, 2}; INSTANTIATE_TEST_CASE_P( diff --git a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp index e2b2e62a4c..55d40a24ba 100644 --- a/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_arg_buffer_tests.cpp @@ -128,86 +128,6 @@ TEST_F(MultiDeviceKernelArgBufferTest, GivenValidBufferWhenSettingKernelArgThenB } } -TEST_F(KernelArgBufferTest, GivenSvmPtrStatelessWhenSettingKernelArgThenArgumentsAreSetCorrectly) { - Buffer *buffer = new MockBuffer(); - - auto val = (cl_mem)buffer; - auto pVal = &val; - - pKernelInfo->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless; - - auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_FALSE(pKernel->requiresCoherency()); - - delete buffer; -} - -HWTEST_F(KernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { - Buffer *buffer = new MockBuffer(); - - auto val = (cl_mem)buffer; - auto pVal = &val; - - auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_FALSE(pKernel->requiresCoherency()); - - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); - - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), pKernelInfo->argAsPtr(0).bindful)); - - auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); - EXPECT_EQ(buffer->getGraphicsAllocation(mockRootDeviceIndex)->getGpuAddress(), surfaceAddress); - - delete buffer; -} - -HWTEST_F(MultiDeviceKernelArgBufferTest, GivenSvmPtrStatefulWhenSettingKernelArgThenArgumentsAreSetCorrectly) { - cl_mem val = pBuffer.get(); - auto pVal = &val; - - int32_t retVal = CL_INVALID_VALUE; - for (auto &kernelInfo : pKernelInfosStorage) { - kernelInfo->argAsPtr(0).bindful = 0; - } - auto pMultiDeviceKernel = std::unique_ptr(MultiDeviceKernel::create(pProgram.get(), kernelInfos, retVal)); - - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = pMultiDeviceKernel->setArg(0, sizeof(cl_mem *), pVal); - EXPECT_EQ(CL_SUCCESS, retVal); - - for (auto &rootDeviceIndex : pContext->getRootDeviceIndices()) { - auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - EXPECT_FALSE(pKernel->requiresCoherency()); - EXPECT_NE(0u, pKernel->getSurfaceStateHeapSize()); - typedef typename FamilyType::RENDER_SURFACE_STATE RENDER_SURFACE_STATE; - auto surfaceState = reinterpret_cast( - ptrOffset(pKernel->getSurfaceStateHeap(), kernelInfos[rootDeviceIndex]->getArgDescriptorAt(0).as().bindful)); - - auto surfaceAddress = surfaceState->getSurfaceBaseAddress(); - EXPECT_EQ(pBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), surfaceAddress); - } -} - -HWTEST_F(KernelArgBufferTest, GivenBufferFromSvmPtrWhenSettingKernelArgThenArgumentsAreSetCorrectly) { - - Buffer *buffer = new MockBuffer(); - buffer->getGraphicsAllocation(mockRootDeviceIndex)->setCoherent(true); - - auto val = (cl_mem)buffer; - auto pVal = &val; - - auto retVal = this->pKernel->setArg(0, sizeof(cl_mem *), pVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_TRUE(pKernel->requiresCoherency()); - - delete buffer; -} - TEST_F(KernelArgBufferTest, GivenInvalidBufferWhenSettingKernelArgThenInvalidMemObjectErrorIsReturned) { char *ptr = new char[sizeof(Buffer)];