diff --git a/runtime/built_ins/aux_translation_builtin.h b/runtime/built_ins/aux_translation_builtin.h index e700c7d3e9..38b8db547b 100644 --- a/runtime/built_ins/aux_translation_builtin.h +++ b/runtime/built_ins/aux_translation_builtin.h @@ -21,11 +21,11 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder template bool buildDispatchInfosForAuxTranslation(MultiDispatchInfo &multiDispatchInfo, const BuiltinOpParams &operationParams) const { size_t kernelInstanceNumber = 0; - size_t numMemObjectsToTranslate = operationParams.memObjsForAuxTranslation->size(); + size_t numMemObjectsToTranslate = multiDispatchInfo.getMemObjsForAuxTranslation()->size(); resizeKernelInstances(numMemObjectsToTranslate); multiDispatchInfo.setBuiltinOpParams(operationParams); - for (auto &memObj : *operationParams.memObjsForAuxTranslation) { + for (auto &memObj : *multiDispatchInfo.getMemObjsForAuxTranslation()) { DispatchInfoBuilder builder; size_t allocationSize = alignUp(memObj->getSize(), 512); diff --git a/runtime/built_ins/builtins_dispatch_builder.h b/runtime/built_ins/builtins_dispatch_builder.h index 09e04c7284..e6b7ea1eee 100644 --- a/runtime/built_ins/builtins_dispatch_builder.h +++ b/runtime/built_ins/builtins_dispatch_builder.h @@ -38,7 +38,6 @@ struct BuiltinOpParams { GraphicsAllocation *srcSvmAlloc = nullptr; GraphicsAllocation *dstSvmAlloc = nullptr; GraphicsAllocation *transferAllocation = nullptr; //mapAllocation or hostPtrAllocation - const MemObjsForAuxTranslation *memObjsForAuxTranslation = nullptr; AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::None; bool unifiedMemoryArgsRequireMemSync = true; Vec3 srcOffset = {0, 0, 0}; diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index bd6afdc3d9..7e510d7f27 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -392,8 +392,7 @@ class CommandQueueHw : public CommandQueue { cl_int enqueueMarkerForReadWriteOperation(MemObj *memObj, void *ptr, cl_command_type commandType, cl_bool blocking, cl_uint numEventsInWaitList, const cl_event *eventWaitList, cl_event *event); - MOCKABLE_VIRTUAL void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, - AuxTranslationDirection auxTranslationDirection); + MOCKABLE_VIRTUAL void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection); MOCKABLE_VIRTUAL bool forceStateless(size_t size); diff --git a/runtime/command_queue/command_queue_hw_base.inl b/runtime/command_queue/command_queue_hw_base.inl index c5082cae28..affe42e950 100644 --- a/runtime/command_queue/command_queue_hw_base.inl +++ b/runtime/command_queue/command_queue_hw_base.inl @@ -102,13 +102,16 @@ cl_int CommandQueueHw::enqueueMarkerForReadWriteOperation(MemObj *memObj } template -void CommandQueueHw::dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, - AuxTranslationDirection auxTranslationDirection) { +void CommandQueueHw::dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, + AuxTranslationDirection auxTranslationDirection) { + if (DebugManager.flags.DisableAuxTranslationBuiltinDispatch.get()) { + return; + } + auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice()); auto &auxTranslationBuilder = static_cast &>(builder); BuiltinOpParams dispatchParams; - dispatchParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; dispatchParams.auxTranslationDirection = auxTranslationDirection; auxTranslationBuilder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, dispatchParams); diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 6d5ab5df8d..06df902afd 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -63,8 +63,9 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getContext(), getDevice()); builtInLock.takeOwnership(builder, this->context); kernel->fillWithBuffersForAuxTranslation(memObjsForAuxTranslation); + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); if (!memObjsForAuxTranslation.empty()) { - dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, AuxTranslationDirection::AuxToNonAux); + dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); } } @@ -84,7 +85,7 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount if (kernel->isAuxTranslationRequired()) { if (!memObjsForAuxTranslation.empty()) { UNRECOVERABLE_IF(kernel->isParentKernel); - dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, AuxTranslationDirection::NonAuxToAux); + dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); } } } diff --git a/runtime/helpers/dispatch_info.h b/runtime/helpers/dispatch_info.h index f8709ce2fd..92d103eaca 100644 --- a/runtime/helpers/dispatch_info.h +++ b/runtime/helpers/dispatch_info.h @@ -192,10 +192,19 @@ struct MultiDispatchInfo { return builtinOpParams; } + void setMemObjsForAuxTranslation(const MemObjsForAuxTranslation &memObjsForAuxTranslation) { + this->memObjsForAuxTranslation = &memObjsForAuxTranslation; + } + + const MemObjsForAuxTranslation *getMemObjsForAuxTranslation() const { + return memObjsForAuxTranslation; + } + protected: BuiltinOpParams builtinOpParams = {}; StackVec dispatchInfos; StackVec redescribedSurfaces; + const MemObjsForAuxTranslation *memObjsForAuxTranslation = nullptr; Kernel *mainKernel = nullptr; }; } // namespace NEO diff --git a/runtime/os_interface/debug_variables_base.inl b/runtime/os_interface/debug_variables_base.inl index e46e4996a9..e9d6390d46 100644 --- a/runtime/os_interface/debug_variables_base.inl +++ b/runtime/os_interface/debug_variables_base.inl @@ -53,6 +53,7 @@ DECLARE_DEBUG_VARIABLE(bool, LoopAtPlatformInitialize, false, "Adds endless loop DECLARE_DEBUG_VARIABLE(bool, DoNotRegisterTrimCallback, false, "When set to true driver is not registering trim callback.") DECLARE_DEBUG_VARIABLE(bool, OverrideInvalidEngineWithDefault, false, "When set to true driver chooses engine 0 if no engine is found.") DECLARE_DEBUG_VARIABLE(bool, DisableAuxTranslation, false, "Disable aux translation when required by Kernel.") +DECLARE_DEBUG_VARIABLE(bool, DisableAuxTranslationBuiltinDispatch, false, "Disable aux translation builtin dispatch when required.") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(bool, PrintDebugSettings, false, "Enables dumping debug variables settings to text file") diff --git a/unit_tests/built_ins/built_in_tests.cpp b/unit_tests/built_ins/built_in_tests.cpp index 45f5a1909c..68ea8c2e91 100644 --- a/unit_tests/built_ins/built_in_tests.cpp +++ b/unit_tests/built_ins/built_in_tests.cpp @@ -260,6 +260,7 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTrans MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); std::vector builtinKernels; MockBuffer mockBuffer[3]; mockBuffer[0].getGraphicsAllocation()->setSize(0x1000); @@ -267,7 +268,6 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingNonAuxDispatchInfoForAuxTrans mockBuffer[2].getGraphicsAllocation()->setSize(0x30000); BuiltinOpParams builtinOpsParams; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; for (auto &buffer : mockBuffer) { @@ -307,6 +307,7 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslat MemObjsForAuxTranslation memObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); std::vector builtinKernels; MockBuffer mockBuffer[3]; mockBuffer[0].getGraphicsAllocation()->setSize(0x1000); @@ -314,7 +315,6 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxDispatchInfoForAuxTranslat mockBuffer[2].getGraphicsAllocation()->setSize(0x30000); BuiltinOpParams builtinOpsParams; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; for (auto &buffer : mockBuffer) { @@ -357,8 +357,8 @@ HWTEST_F(BuiltInTests, givenInputBufferWhenBuildingAuxTranslationDispatchThenPic std::vector builtinKernels; MultiDispatchInfo multiDispatchInfo; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpsParams; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; for (auto &buffer : mockBuffer) { memObjsForAuxTranslation.insert(&buffer); @@ -391,8 +391,8 @@ HWTEST_F(BuiltInTests, givenInvalidAuxTranslationDirectionWhenBuildingDispatchIn MockBuffer mockBuffer; MultiDispatchInfo multiDispatchInfo; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpsParams; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; memObjsForAuxTranslation.insert(&mockBuffer); @@ -427,9 +427,9 @@ HWTEST_F(BuiltInTests, givenMoreBuffersForAuxTranslationThanKernelInstancesWhenD MemObjsForAuxTranslation memObjsForAuxTranslation; BuiltinOpParams builtinOpsParams; MultiDispatchInfo multiDispatchInfo; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); MockBuffer mockBuffer[7]; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; for (auto &buffer : mockBuffer) { @@ -500,13 +500,12 @@ HWTEST_F(BuiltInTests, givenAuxTranslationKernelWhenSettingKernelArgsThenSetVali MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpParamsToAux; - builtinOpParamsToAux.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpParamsToAux.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; BuiltinOpParams builtinOpParamsToNonAux; - builtinOpParamsToNonAux.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpParamsToNonAux.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; cl_int retVal = CL_SUCCESS; @@ -558,9 +557,9 @@ HWTEST_F(BuiltInTests, givenAuxToNonAuxTranslationWhenSettingSurfaceStateThenSet MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpParams; - builtinOpParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; cl_int retVal = CL_SUCCESS; @@ -604,9 +603,9 @@ HWTEST_F(BuiltInTests, givenNonAuxToAuxTranslationWhenSettingSurfaceStateThenSet MockAuxBuilInOp mockAuxBuiltInOp(*pBuiltIns, *pContext, *pDevice); MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); BuiltinOpParams builtinOpParams; - builtinOpParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; cl_int retVal = CL_SUCCESS; diff --git a/unit_tests/command_queue/dispatch_walker_tests.cpp b/unit_tests/command_queue/dispatch_walker_tests.cpp index d5539ffc96..e2cffa5a36 100644 --- a/unit_tests/command_queue/dispatch_walker_tests.cpp +++ b/unit_tests/command_queue/dispatch_walker_tests.cpp @@ -1259,11 +1259,11 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenAuxToNonAuxWhenTranslationRequiredTh MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); memObjsForAuxTranslation.insert(&mockBuffer[0]); memObjsForAuxTranslation.insert(&mockBuffer[1]); BuiltinOpParams builtinOpsParams; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::AuxToNonAux; builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); @@ -1315,11 +1315,11 @@ HWTEST_F(DispatchWalkerTest, givenKernelWhenNonAuxToAuxWhenTranslationRequiredTh MultiDispatchInfo multiDispatchInfo; MemObjsForAuxTranslation memObjsForAuxTranslation; + multiDispatchInfo.setMemObjsForAuxTranslation(memObjsForAuxTranslation); memObjsForAuxTranslation.insert(&mockBuffer[0]); memObjsForAuxTranslation.insert(&mockBuffer[1]); BuiltinOpParams builtinOpsParams; - builtinOpsParams.memObjsForAuxTranslation = &memObjsForAuxTranslation; builtinOpsParams.auxTranslationDirection = AuxTranslationDirection::NonAuxToAux; builder.buildDispatchInfosForAuxTranslation(multiDispatchInfo, builtinOpsParams); diff --git a/unit_tests/command_queue/enqueue_kernel_2_tests.cpp b/unit_tests/command_queue/enqueue_kernel_2_tests.cpp index 680ccfdeed..307cc1998d 100644 --- a/unit_tests/command_queue/enqueue_kernel_2_tests.cpp +++ b/unit_tests/command_queue/enqueue_kernel_2_tests.cpp @@ -673,16 +673,16 @@ struct EnqueueAuxKernelTests : public EnqueueKernelTest { public: using CommandQueueHw::commandStream; MyCmdQ(Context *context, Device *device) : CommandQueueHw(context, device, nullptr) {} - void dispatchAuxTranslation(MultiDispatchInfo &multiDispatchInfo, MemObjsForAuxTranslation &memObjsForAuxTranslation, - AuxTranslationDirection auxTranslationDirection) override { - CommandQueueHw::dispatchAuxTranslation(multiDispatchInfo, memObjsForAuxTranslation, auxTranslationDirection); + void dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) override { + CommandQueueHw::dispatchAuxTranslationBuiltin(multiDispatchInfo, auxTranslationDirection); auxTranslationDirections.push_back(auxTranslationDirection); Kernel *lastKernel = nullptr; for (const auto &dispatchInfo : multiDispatchInfo) { lastKernel = dispatchInfo.getKernel(); dispatchInfos.emplace_back(dispatchInfo); } - dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), memObjsForAuxTranslation, auxTranslationDirection); + dispatchAuxTranslationInputs.emplace_back(lastKernel, multiDispatchInfo.size(), *multiDispatchInfo.getMemObjsForAuxTranslation(), + auxTranslationDirection); } void waitUntilComplete(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { @@ -803,6 +803,36 @@ HWTEST_F(EnqueueAuxKernelTests, givenKernelWithRequiredAuxTranslationWhenEnqueue EXPECT_TRUE(kernelAfter->isBuiltIn); } +HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableSetWhenDispatchingKernelWithRequiredAuxTranslationThenDontDispatch) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.DisableAuxTranslationBuiltinDispatch.set(true); + + MockKernelWithInternals mockKernel(*pDevice, context); + MyCmdQ cmdQ(context, pDevice); + size_t gws[3] = {1, 0, 0}; + MockBuffer buffer; + cl_mem clMem = &buffer; + + buffer.getGraphicsAllocation()->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); + mockKernel.kernelInfo.kernelArgInfo.resize(1); + mockKernel.kernelInfo.kernelArgInfo.at(0).kernelArgPatchInfoVector.resize(1); + mockKernel.kernelInfo.kernelArgInfo.at(0).pureStatefulBufferAccess = false; + mockKernel.mockKernel->initialize(); + mockKernel.mockKernel->auxTranslationRequired = true; + mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); + + cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); + + // aux builtin not dispatched before NDR + EXPECT_EQ(0u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); + + // only NDR is dispatched + EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); + auto kernel = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); + EXPECT_FALSE(kernel->isBuiltIn); +} + HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) { using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; diff --git a/unit_tests/test_files/igdrcl.config b/unit_tests/test_files/igdrcl.config index 59763cf413..6e7fb2105a 100644 --- a/unit_tests/test_files/igdrcl.config +++ b/unit_tests/test_files/igdrcl.config @@ -116,6 +116,7 @@ AllowOpenFdOperations = 0 EnableBlitterOperationsSupport = -1 EnableBlitterOperationsForReadWriteBuffers = -1 DisableAuxTranslation = 0 +DisableAuxTranslationBuiltinDispatch = 0 EnableFreeMemory = 0 OverrideStatelessMocsIndex = -1 CFEFusedEUDispatch = -1