diff --git a/opencl/source/gtpin/gtpin_callbacks.cpp b/opencl/source/gtpin/gtpin_callbacks.cpp index 62d5f62a74..e5c0a93ca7 100644 --- a/opencl/source/gtpin/gtpin_callbacks.cpp +++ b/opencl/source/gtpin/gtpin_callbacks.cpp @@ -74,10 +74,7 @@ void gtpinNotifyKernelCreate(cl_kernel kernel) { // Enlarge local copy of SSH by 1 SS auto >pinHelper = device->getGTPinGfxCoreHelper(); - if (!gtpinHelper.addSurfaceState(pKernel)) { - // Kernel with no SSH or Kernel EM, not supported - return; - } + gtpinHelper.addSurfaceState(pKernel); if (pKernel->isKernelHeapSubstituted()) { // ISA for this kernel was already substituted return; @@ -121,10 +118,6 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) { auto rootDeviceIndex = device.getRootDeviceIndex(); auto pMultiDeviceKernel = castToObjectOrAbort(kernel); auto pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - if (pKernel->getSurfaceStateHeapSize() == 0) { - // Kernel with no SSH, not supported - return; - } Context *pContext = &(pKernel->getContext()); cl_context context = (cl_context)pContext; uint64_t kernelId = pKernel->getKernelId(); diff --git a/opencl/source/gtpin/gtpin_gfx_core_helper.h b/opencl/source/gtpin/gtpin_gfx_core_helper.h index 59749620dc..ed3cd9c778 100644 --- a/opencl/source/gtpin/gtpin_gfx_core_helper.h +++ b/opencl/source/gtpin/gtpin_gfx_core_helper.h @@ -25,7 +25,7 @@ class GTPinGfxCoreHelper { public: static std::unique_ptr create(GFXCORE_FAMILY gfxCore); virtual uint32_t getGenVersion() const = 0; - virtual bool addSurfaceState(Kernel *pKernel) const = 0; + virtual void addSurfaceState(Kernel *pKernel) const = 0; virtual void *getSurfaceState(Kernel *pKernel, size_t bti) const = 0; virtual bool canUseSharedAllocation(const HardwareInfo &hwInfo) const = 0; @@ -43,7 +43,7 @@ class GTPinGfxCoreHelperHw : public GTPinGfxCoreHelper { return gtpinHelper; } uint32_t getGenVersion() const override; - bool addSurfaceState(Kernel *pKernel) const override; + void addSurfaceState(Kernel *pKernel) const override; void *getSurfaceState(Kernel *pKernel, size_t bti) const override; bool canUseSharedAllocation(const HardwareInfo &hwInfo) const override; diff --git a/opencl/source/gtpin/gtpin_gfx_core_helper.inl b/opencl/source/gtpin/gtpin_gfx_core_helper.inl index 6d65e96d9d..2893c80aa5 100644 --- a/opencl/source/gtpin/gtpin_gfx_core_helper.inl +++ b/opencl/source/gtpin/gtpin_gfx_core_helper.inl @@ -14,19 +14,18 @@ namespace NEO { template -bool GTPinGfxCoreHelperHw::addSurfaceState(Kernel *pKernel) const { +void GTPinGfxCoreHelperHw::addSurfaceState(Kernel *pKernel) const { using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE; using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE; size_t sshSize = pKernel->getSurfaceStateHeapSize(); - if (sshSize == 0) { - // Kernels which do not use SSH or use Execution Model are not supported (yet) - return false; - } size_t ssSize = sizeof(RENDER_SURFACE_STATE); size_t btsSize = sizeof(BINDING_TABLE_STATE); size_t sizeToEnlarge = ssSize + btsSize; - size_t currBTOffset = pKernel->getBindingTableOffset(); + size_t currBTOffset = 0u; + if (isValidOffset(static_cast(pKernel->getBindingTableOffset()))) { + currBTOffset = pKernel->getBindingTableOffset(); + } size_t currSurfaceStateSize = currBTOffset; char *pSsh = static_cast(pKernel->getSurfaceStateHeap()); char *pNewSsh = new char[sshSize + sizeToEnlarge]; @@ -40,7 +39,6 @@ bool GTPinGfxCoreHelperHw::addSurfaceState(Kernel *pKernel) const { *pNewBTS = GfxFamily::cmdInitBindingTableState; pNewBTS->setSurfaceStatePointer((uint64_t)currBTOffset); pKernel->resizeSurfaceStateHeap(pNewSsh, sshSize + sizeToEnlarge, currBTCount + 1, newSurfaceStateSize); - return true; } template diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index 37fa19b3c5..8f4f93e7f9 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -122,5 +122,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static bool inlineDataProgrammingRequired(const Kernel &kernel); static bool kernelUsesLocalIds(const Kernel &kernel); + static size_t checkForAdditionalBTAndSetBTPointer(IndirectHeap &ssh, const Kernel &kernel); }; } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 7c4fdedc21..25dadc1d00 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -228,9 +228,7 @@ size_t HardwareCommandsHelper::sendIndirectState( ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); - auto dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries, - kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), - kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); + size_t dstBindingTablePointer = HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(ssh, kernel); // Copy our sampler state if it exists const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable; @@ -326,4 +324,20 @@ bool HardwareCommandsHelper::kernelUsesLocalIds(const Kernel &kernel) return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.numLocalIdChannels > 0; } +template +size_t HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(IndirectHeap &ssh, const Kernel &kernel) { + size_t dstBindingTablePointer{0u}; + const auto &kernelInfo = kernel.getKernelInfo(); + if (isGTPinInitialized && 0u == kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries) { + dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, 1u, + kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), + kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); + } else { + dstBindingTablePointer = EncodeSurfaceState::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries, + kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(), + kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset()); + } + return dstBindingTablePointer; +} + } // namespace NEO diff --git a/opencl/test/unit_test/gtpin/gtpin_tests.cpp b/opencl/test/unit_test/gtpin/gtpin_tests.cpp index 7526e2ede1..3056e003f8 100644 --- a/opencl/test/unit_test/gtpin/gtpin_tests.cpp +++ b/opencl/test/unit_test/gtpin/gtpin_tests.cpp @@ -1038,301 +1038,6 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelINTELIsExecutedThenGT EXPECT_EQ(CL_SUCCESS, retVal); } -TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenKernelCreateCallbacksIsNotCalled) { - gtpinCallbacks.onContextCreate = onContextCreate; - gtpinCallbacks.onContextDestroy = onContextDestroy; - gtpinCallbacks.onKernelCreate = onKernelCreate; - gtpinCallbacks.onKernelSubmit = onKernelSubmit; - gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; - gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; - retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - - cl_device_id device = (cl_device_id)pDevice; - cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, context); - auto pContext = castToObject(context); - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); - - char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; - size_t binSize = 10; - MockProgram *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), &binary[0], binSize, &retVal); - ASSERT_NE(nullptr, pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - PatchTokensTestData::ValidProgramWithKernel programTokens; - - pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(reinterpret_cast(programTokens.storage.data()), programTokens.storage.size()); - pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = programTokens.storage.size(); - retVal = pProgram->processGenBinary(*pContext->getDevice(0)); - EXPECT_EQ(CL_SUCCESS, retVal); - - int prevCount = KernelCreateCallbackCount; - cl_kernel kernel = clCreateKernel(pProgram, std::string(programTokens.kernels[0].name.begin(), programTokens.kernels[0].name.size()).c_str(), &retVal); - EXPECT_NE(nullptr, kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount, KernelCreateCallbackCount); - - retVal = clReleaseKernel(kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseProgram(pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseContext(context); - EXPECT_EQ(CL_SUCCESS, retVal); -} - -TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { - - const auto &compilerProductHelper = pDevice->getRootDeviceEnvironment().getHelper(); - if (compilerProductHelper.isForceToStatelessRequired() || !compilerProductHelper.isStatelessToStatefulBufferOffsetSupported()) { - GTEST_SKIP(); - } - - gtpinCallbacks.onContextCreate = onContextCreate; - gtpinCallbacks.onContextDestroy = onContextDestroy; - gtpinCallbacks.onKernelCreate = onKernelCreate; - gtpinCallbacks.onKernelSubmit = onKernelSubmit; - gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; - gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; - retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - - cl_kernel kernel = nullptr; - cl_program pProgram = nullptr; - cl_device_id device = (cl_device_id)pDevice; - size_t sourceSize = 0; - std::string testFile; - cl_command_queue cmdQ = nullptr; - cl_queue_properties properties = 0; - cl_context context = nullptr; - - KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); - testFile.append(clFiles); - testFile.append("CopyBuffer_simd16.cl"); - auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); - EXPECT_NE(0u, sourceSize); - EXPECT_NE(nullptr, pSource); - - context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, context); - - cmdQ = clCreateCommandQueue(context, device, properties, &retVal); - ASSERT_NE(nullptr, cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - - const char *sources[1] = {pSource.get()}; - pProgram = clCreateProgramWithSource( - context, - 1, - sources, - &sourceSize, - &retVal); - ASSERT_NE(nullptr, pProgram); - - retVal = clBuildProgram( - pProgram, - 1, - &device, - nullptr, - nullptr, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - - int prevCount1 = KernelCreateCallbackCount; - kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); - EXPECT_NE(nullptr, kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); - - MultiDeviceKernel *pMultiDeviceKernel = static_cast(kernel); - Kernel *pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - const KernelInfo &kInfo = pKernel->getKernelInfo(); - uint64_t gtpinKernelId = pKernel->getKernelId(); - EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); - - constexpr size_t n = 256; - auto buff0 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); - auto buff1 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); - - retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &buff0); - EXPECT_EQ(CL_SUCCESS, retVal); - retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &buff1); - EXPECT_EQ(CL_SUCCESS, retVal); - - // Verify that when SSH is removed then during kernel execution - // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. - pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); - - int prevCount2 = KernelSubmitCallbackCount; - int prevCount3 = CommandBufferCreateCallbackCount; - int prevCount4 = CommandBufferCompleteCallbackCount; - cl_uint workDim = 1; - size_t globalWorkOffset[3] = {0, 0, 0}; - size_t globalWorkSize[3] = {n, 1, 1}; - size_t localWorkSize[3] = {1, 1, 1}; - retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 0, nullptr, nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); - EXPECT_EQ(prevCount3, CommandBufferCreateCallbackCount); - - retVal = clFinish(cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); - - // Cleanup - retVal = clReleaseKernel(kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseProgram(pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - pSource.reset(); - - retVal = clReleaseMemObject(buff0); - EXPECT_EQ(CL_SUCCESS, retVal); - retVal = clReleaseMemObject(buff1); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseCommandQueue(cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseContext(context); - EXPECT_EQ(CL_SUCCESS, retVal); -} - -TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenBlockedKernelWithoutSSHIsUsedThenGTPinSubmitKernelCallbackIsNotCalled) { - - const auto &compilerProductHelper = pDevice->getRootDeviceEnvironment().getHelper(); - if (compilerProductHelper.isForceToStatelessRequired() || !compilerProductHelper.isStatelessToStatefulBufferOffsetSupported()) { - GTEST_SKIP(); - } - - gtpinCallbacks.onContextCreate = onContextCreate; - gtpinCallbacks.onContextDestroy = onContextDestroy; - gtpinCallbacks.onKernelCreate = onKernelCreate; - gtpinCallbacks.onKernelSubmit = onKernelSubmit; - gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; - gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; - retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - - cl_kernel kernel = nullptr; - cl_program pProgram = nullptr; - cl_device_id device = (cl_device_id)pDevice; - size_t sourceSize = 0; - std::string testFile; - cl_command_queue cmdQ = nullptr; - cl_queue_properties properties = 0; - cl_context context = nullptr; - - KernelBinaryHelper kbHelper("CopyBuffer_simd16", false); - testFile.append(clFiles); - testFile.append("CopyBuffer_simd16.cl"); - auto pSource = loadDataFromFile(testFile.c_str(), sourceSize); - EXPECT_NE(0u, sourceSize); - EXPECT_NE(nullptr, pSource); - - context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, context); - - cmdQ = clCreateCommandQueue(context, device, properties, &retVal); - ASSERT_NE(nullptr, cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - - const char *sources[1] = {pSource.get()}; - pProgram = clCreateProgramWithSource( - context, - 1, - sources, - &sourceSize, - &retVal); - ASSERT_NE(nullptr, pProgram); - - retVal = clBuildProgram( - pProgram, - 1, - &device, - nullptr, - nullptr, - nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - - int prevCount1 = KernelCreateCallbackCount; - kernel = clCreateKernel(pProgram, "CopyBuffer", &retVal); - EXPECT_NE(nullptr, kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount1 + 1, KernelCreateCallbackCount); - - MultiDeviceKernel *pMultiDeviceKernel = static_cast(kernel); - Kernel *pKernel = pMultiDeviceKernel->getKernel(rootDeviceIndex); - const KernelInfo &kInfo = pKernel->getKernelInfo(); - uint64_t gtpinKernelId = pKernel->getKernelId(); - EXPECT_EQ(kInfo.shaderHashCode, gtpinKernelId); - - constexpr size_t n = 256; - auto buff0 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); - auto buff1 = clCreateBuffer(context, 0, n * sizeof(unsigned int), nullptr, nullptr); - - retVal = clSetKernelArg(pMultiDeviceKernel, 0, sizeof(cl_mem), &buff0); - EXPECT_EQ(CL_SUCCESS, retVal); - retVal = clSetKernelArg(pMultiDeviceKernel, 1, sizeof(cl_mem), &buff1); - EXPECT_EQ(CL_SUCCESS, retVal); - - // Verify that when SSH is removed then during kernel execution - // GT-Pin Kernel Submit, Command Buffer Create and Command Buffer Complete callbacks are not called. - pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); - - cl_event userEvent = clCreateUserEvent(context, &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - - int prevCount2 = KernelSubmitCallbackCount; - int prevCount3 = CommandBufferCreateCallbackCount; - int prevCount4 = CommandBufferCompleteCallbackCount; - cl_uint workDim = 1; - size_t globalWorkOffset[3] = {0, 0, 0}; - size_t globalWorkSize[3] = {n, 1, 1}; - size_t localWorkSize[3] = {1, 1, 1}; - retVal = clEnqueueNDRangeKernel(cmdQ, pMultiDeviceKernel, workDim, globalWorkOffset, globalWorkSize, localWorkSize, 1, &userEvent, nullptr); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount2, KernelSubmitCallbackCount); - EXPECT_EQ(prevCount3, CommandBufferCreateCallbackCount); - - retVal = clSetUserEventStatus(userEvent, CL_COMPLETE); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clFinish(cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_EQ(prevCount4, CommandBufferCompleteCallbackCount); - - // Cleanup - retVal = clReleaseKernel(kernel); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseProgram(pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - pSource.reset(); - - retVal = clReleaseMemObject(buff0); - EXPECT_EQ(CL_SUCCESS, retVal); - retVal = clReleaseMemObject(buff1); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseEvent(userEvent); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseCommandQueue(cmdQ); - EXPECT_EQ(CL_SUCCESS, retVal); - - retVal = clReleaseContext(context); - EXPECT_EQ(CL_SUCCESS, retVal); -} - TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenTheSameKerneIsExecutedTwiceThenGTPinCreateKernelCallbackIsCalledOnce) { const auto &compilerProductHelper = pDevice->getRootDeviceEnvironment().getHelper(); @@ -2015,75 +1720,6 @@ TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenOneKernelIsSubmittedSeveral EXPECT_EQ(CL_SUCCESS, retVal); } -TEST_F(GTPinTests, givenInitializedGTPinInterfaceWhenLowMemoryConditionOccursThenKernelCreationFails) { - - InjectedFunction allocBufferFunc = [this](size_t failureIndex) { - cl_device_id device = (cl_device_id)pDevice; - cl_context context = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &retVal); - EXPECT_EQ(CL_SUCCESS, retVal); - EXPECT_NE(nullptr, context); - auto pContext = castToObject(context); - - char binary[1024] = {1, 2, 3, 4, 5, 6, 7, 8, 9, '\0'}; - size_t binSize = 10; - MockProgram *pProgram = Program::createBuiltInFromGenBinary(pContext, pContext->getDevices(), &binary[0], binSize, &retVal); - ASSERT_NE(nullptr, pProgram); - EXPECT_EQ(CL_SUCCESS, retVal); - - PatchTokensTestData::ValidProgramWithKernel programTokens; - - auto rootDeviceIndex = pDevice->getRootDeviceIndex(); - - pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinary = makeCopy(programTokens.storage.data(), programTokens.storage.size()); - pProgram->buildInfos[rootDeviceIndex].unpackedDeviceBinarySize = programTokens.storage.size(); - retVal = pProgram->processGenBinary(*pDevice); - if (retVal == CL_OUT_OF_HOST_MEMORY) { - auto nonFailingAlloc = MemoryManagement::nonfailingAllocation; - EXPECT_NE(nonFailingAlloc, failureIndex); - } else { - EXPECT_EQ(CL_SUCCESS, retVal); - // Create kernels from program - cl_kernel kernels[2] = {0}; - cl_uint numCreatedKernels = 0; - - if (MemoryManagement::nonfailingAllocation != failureIndex) { - memoryManager->failAllAllocationsInDevicePool = true; - } - retVal = clCreateKernelsInProgram(pProgram, 2, kernels, &numCreatedKernels); - - if (MemoryManagement::nonfailingAllocation != failureIndex) { - if (retVal != CL_SUCCESS) { - EXPECT_EQ(nullptr, kernels[0]); - EXPECT_EQ(1u, numCreatedKernels); - } - clReleaseKernel(kernels[0]); - } else { - EXPECT_NE(nullptr, kernels[0]); - EXPECT_EQ(1u, numCreatedKernels); - clReleaseKernel(kernels[0]); - } - } - - clReleaseProgram(pProgram); - clReleaseContext(context); - }; - - gtpinCallbacks.onContextCreate = onContextCreate; - gtpinCallbacks.onContextDestroy = onContextDestroy; - gtpinCallbacks.onKernelCreate = onKernelCreate; - gtpinCallbacks.onKernelSubmit = onKernelSubmit; - gtpinCallbacks.onCommandBufferCreate = onCommandBufferCreate; - gtpinCallbacks.onCommandBufferComplete = onCommandBufferComplete; - retFromGtPin = GTPin_Init(>pinCallbacks, &driverServices, nullptr); - EXPECT_EQ(GTPIN_DI_SUCCESS, retFromGtPin); - ASSERT_EQ(&NEO::gtpinCreateBuffer, driverServices.bufferAllocate); - ASSERT_EQ(&NEO::gtpinFreeBuffer, driverServices.bufferDeallocate); - EXPECT_EQ(&NEO::gtpinMapBuffer, driverServices.bufferMap); - EXPECT_EQ(&NEO::gtpinUnmapBuffer, driverServices.bufferUnMap); - - injectFailures(allocBufferFunc); -} - TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { const auto &compilerProductHelper = pDevice->getRootDeviceEnvironment().getHelper(); if (compilerProductHelper.isForceToStatelessRequired() || !compilerProductHelper.isStatelessToStatefulBufferOffsetSupported()) { @@ -2146,8 +1782,7 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { EXPECT_NE(nullptr, pSS1); // Enlarge SSH by one SURFACE STATE element - bool surfaceAdded = gtpinHelper.addSurfaceState(pKernel); - EXPECT_TRUE(surfaceAdded); + gtpinHelper.addSurfaceState(pKernel); size_t numBTS2 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(numBTS1 + 1, numBTS2); @@ -2165,10 +1800,6 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { // Remove kernel's SSH pKernel->resizeSurfaceStateHeap(nullptr, 0, 0, 0); - // Try to enlarge SSH once again, this time the operation must fail - surfaceAdded = gtpinHelper.addSurfaceState(pKernel); - EXPECT_FALSE(surfaceAdded); - size_t numBTS3 = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numBTS3); size_t sizeSurfaceStates3 = pKernel->getSurfaceStateHeapSize(); @@ -2189,6 +1820,28 @@ TEST_F(GTPinTests, givenKernelWithSSHThenVerifyThatSSHResizeWorksWell) { EXPECT_EQ(CL_SUCCESS, retVal); } +TEST_F(GTPinTests, givenKernelWithoutAllocatedSSHThenGTPinStillCanAllocateNewSSHAndProperlyAddNewSurfaceState) { + auto kernelInfo = std::make_unique(); + ASSERT_EQ(nullptr, kernelInfo->heapInfo.pSsh); + ASSERT_EQ(0u, kernelInfo->heapInfo.SurfaceStateHeapSize); + + MockContext context(pDevice); + MockProgram program(&context, false, toClDeviceVector(*pDevice)); + auto kernel = std::make_unique(&program, *kernelInfo, *pDevice); + kernel->localBindingTableOffset = kernelInfo->kernelDescriptor.payloadMappings.bindingTable.tableOffset; + ASSERT_FALSE(isValidOffset(static_cast(kernel->localBindingTableOffset))); + + const auto >pinHelper = pDevice->getGTPinGfxCoreHelper(); + gtpinHelper.addSurfaceState(kernel.get()); + + auto numBts = kernel->getNumberOfBindingTableStates(); + EXPECT_EQ(1u, numBts); + auto sshSize = kernel->getSurfaceStateHeapSize(); + EXPECT_GT(sshSize, 0u); + auto offsetBTS = static_cast(kernel->getBindingTableOffset()); + EXPECT_TRUE(isValidOffset(offsetBTS)); +} + TEST_F(GTPinTests, givenKernelThenVerifyThatKernelCodeSubstitutionWorksWell) { cl_kernel kernel = nullptr; cl_program pProgram = nullptr; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index 471fe900a3..feb900f6b9 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -797,7 +797,7 @@ HWTEST_F(HardwareCommandsTest, GivenBuffersNotRequiringSshWhenSettingBindingTabl EXPECT_EQ(0u, numSurfaceStates); // set binding table states - auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); + auto dstBindingTablePointer = HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); auto usedAfter = ssh.getUsed(); @@ -836,19 +836,46 @@ HWTEST_F(HardwareCommandsTest, GivenZeroSurfaceStatesWhenSettingBindingTableStat auto numSurfaceStates = pKernel->getNumberOfBindingTableStates(); EXPECT_EQ(0u, numSurfaceStates); - auto dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); + auto dstBindingTablePointer = HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); - dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); + dstBindingTablePointer = HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); pKernelInfo->setBindingTable(64, 0); - dstBindingTablePointer = pushBindingTableAndSurfaceStates(ssh, *pKernel); + dstBindingTablePointer = HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(ssh, *pKernel); EXPECT_EQ(0u, dstBindingTablePointer); delete pKernel; } + +HWTEST_F(HardwareCommandsTest, givenNoBTEntriesInKernelDescriptorAndGTPinInitializedWhenSettingBTPointerThenBTPointerIsSet) { + isGTPinInitialized = true; + + auto pKernelInfo = std::make_unique(); + pKernelInfo->kernelDescriptor.kernelAttributes.simdSize = 1; + ASSERT_EQ(0u, pKernelInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries); + + MockContext context; + MockProgram program(&context, false, toClDeviceVector(*pClDevice)); + + auto pKernel = std::make_unique(&program, *pKernelInfo, *pClDevice); + + constexpr auto mockSshSize{256u}; + constexpr auto mockBTOffset{32u}; + auto mockSsh = new char[mockSshSize]{0}; + ASSERT_EQ(CL_SUCCESS, pKernel->initialize()); + pKernel->resizeSurfaceStateHeap(mockSsh, mockSshSize, 1u, mockBTOffset); + + CommandQueueHw cmdQ(nullptr, pClDevice, 0, false); + auto &ssh = cmdQ.getIndirectHeap(IndirectHeap::Type::SURFACE_STATE, 8192); + + auto dstBindingTablePointer = HardwareCommandsHelper::checkForAdditionalBTAndSetBTPointer(ssh, *pKernel); + EXPECT_NE(0u, dstBindingTablePointer); + isGTPinInitialized = false; +} + HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerStateArrayWhenSendIndirectStateIsCalledThenInterfaceDescriptorIsNotPopulated) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h index cd1b6d9b31..02187f7303 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.h @@ -45,11 +45,4 @@ struct HardwareCommandsTest : ClDeviceFixture, std::unique_ptr mockKernelWithInternal; Kernel::SimpleKernelArgInfo kernelArgInfo = {}; std::vector kernelArguments; - - template - size_t pushBindingTableAndSurfaceStates(IndirectHeap &dstHeap, const Kernel &srcKernel) { - return EncodeSurfaceState::pushBindingTableAndSurfaceStates(dstHeap, srcKernel.getKernelInfo().kernelDescriptor.payloadMappings.bindingTable.numEntries, - srcKernel.getSurfaceStateHeap(), srcKernel.getSurfaceStateHeapSize(), - srcKernel.getNumberOfBindingTableStates(), srcKernel.getBindingTableOffset()); - } }; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index 1d72185313..af571b93ad 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -116,6 +116,7 @@ class MockKernel : public Kernel { using Kernel::kernelSubmissionMap; using Kernel::kernelSvmGfxAllocations; using Kernel::kernelUnifiedMemoryGfxAllocations; + using Kernel::localBindingTableOffset; using Kernel::localIdsCache; using Kernel::maxKernelWorkGroupSize; using Kernel::maxWorkGroupSizeForCrossThreadData;