diff --git a/opencl/source/command_queue/command_queue_hw.h b/opencl/source/command_queue/command_queue_hw.h index 65bd7848f7..15532d8d1e 100644 --- a/opencl/source/command_queue/command_queue_hw.h +++ b/opencl/source/command_queue/command_queue_hw.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2020 Intel Corporation + * Copyright (C) 2017-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -488,5 +488,7 @@ class CommandQueueHw : public CommandQueue { bool isGpgpuSubmissionForBcsRequired(bool queueBlocked) const; void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType); + + bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo); }; } // namespace NEO diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 2aedbd003f..a80028491b 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2020 Intel Corporation + * Copyright (C) 2019-2021 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -112,10 +112,6 @@ cl_int CommandQueueHw::enqueueMarkerForReadWriteOperation(MemObj *memObj template void CommandQueueHw::dispatchAuxTranslationBuiltin(MultiDispatchInfo &multiDispatchInfo, AuxTranslationDirection auxTranslationDirection) { - if (HwHelperHw::getAuxTranslationMode() != AuxTranslationMode::Builtin) { - return; - } - auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); auto &auxTranslationBuilder = static_cast &>(builder); BuiltinOpParams dispatchParams; diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index d19e2bf04b..10c4204df0 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -61,20 +61,30 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount KernelObjsForAuxTranslation kernelObjsForAuxTranslation; MultiDispatchInfo multiDispatchInfo(kernel); + auto auxTranslationMode = AuxTranslationMode::None; + if (DebugManager.flags.ForceDispatchScheduler.get()) { forceDispatchScheduler(multiDispatchInfo); } else { auto rootDeviceIndex = device->getRootDeviceIndex(); + kernel->updateAuxTranslationRequired(); if (kernel->isAuxTranslationRequired()) { - auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); - builtInLock.takeOwnership(builder); kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation, rootDeviceIndex); multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation); + if (!kernelObjsForAuxTranslation.empty()) { - dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); + auxTranslationMode = HwHelperHw::get().getAuxTranslationMode(device->getHardwareInfo()); } } + + if (AuxTranslationMode::Builtin == auxTranslationMode) { + auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice()); + builtInLock.takeOwnership(builder); + + dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux); + } + if (kernel->getKernelInfo(rootDeviceIndex).builtinDispatchBuilder == nullptr) { DispatchInfoBuilder builder(getClDevice()); builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3{0, 0, 0}, localWorkSizesIn); @@ -88,15 +98,14 @@ void CommandQueueHw::enqueueHandler(Surface *(&surfaces)[surfaceCount return; } } - if (kernel->isAuxTranslationRequired()) { - if (!kernelObjsForAuxTranslation.empty()) { - UNRECOVERABLE_IF(kernel->isParentKernel); - dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); - } + + if (AuxTranslationMode::Builtin == auxTranslationMode) { + UNRECOVERABLE_IF(kernel->isParentKernel); + dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux); } } - if (ClHwHelperHw::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo)) { + if (AuxTranslationMode::Blit == auxTranslationMode) { setupBlitAuxTranslation(multiDispatchInfo); } @@ -195,7 +204,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, CsrDependencies csrDeps; BlitPropertiesContainer blitPropertiesContainer; - bool enqueueWithBlitAuxTranslation = ClHwHelperHw::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo); + bool enqueueWithBlitAuxTranslation = isBlitAuxTranslationRequired(multiDispatchInfo); if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { eventsRequest.fillCsrDependencies(csrDeps, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OnCsr); @@ -223,7 +232,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, blockedCommandsData, surfacesForResidency, numSurfaceForResidency); auto commandStreamStart = commandStream.getUsed(); - if (ClHwHelperHw::isBlitAuxTranslationRequired(device->getHardwareInfo(), multiDispatchInfo)) { + if (enqueueWithBlitAuxTranslation) { processDispatchForBlitAuxTranslation(multiDispatchInfo, blitPropertiesContainer, timestampPacketDependencies, eventsRequest, blockQueue); } @@ -1176,4 +1185,12 @@ void CommandQueueHw::dispatchBcsOrGpgpuEnqueue(MultiDispatchInfo &dis event); } } + +template +bool CommandQueueHw::isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo) { + return multiDispatchInfo.getKernelObjsForAuxTranslation() && + (multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0) && + (HwHelperHw::get().getAuxTranslationMode(device->getHardwareInfo()) == AuxTranslationMode::Blit); +} + } // namespace NEO diff --git a/opencl/source/helpers/cl_hw_helper.h b/opencl/source/helpers/cl_hw_helper.h index 7864844b3b..965ce0d4b8 100644 --- a/opencl/source/helpers/cl_hw_helper.h +++ b/opencl/source/helpers/cl_hw_helper.h @@ -46,7 +46,6 @@ class ClHwHelperHw : public ClHwHelper { return clHwHelper; } - static bool isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo); bool requiresAuxResolves(const KernelInfo &kernelInfo) const override; bool allowRenderCompressionForContext(const HardwareInfo &hwInfo, const Context &context) const override; cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override; diff --git a/opencl/source/helpers/cl_hw_helper_base.inl b/opencl/source/helpers/cl_hw_helper_base.inl index 0a49bb7de0..c7b7080d92 100644 --- a/opencl/source/helpers/cl_hw_helper_base.inl +++ b/opencl/source/helpers/cl_hw_helper_base.inl @@ -13,14 +13,6 @@ namespace NEO { -template -bool ClHwHelperHw::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo, const MultiDispatchInfo &multiDispatchInfo) { - return (HwHelperHw::getAuxTranslationMode() == AuxTranslationMode::Blit) && - hwInfo.capabilityTable.blitterOperationsSupported && - multiDispatchInfo.getKernelObjsForAuxTranslation() && - (multiDispatchInfo.getKernelObjsForAuxTranslation()->size() > 0); -} - template inline bool ClHwHelperHw::requiresAuxResolves(const KernelInfo &kernelInfo) const { return hasStatelessAccessToBuffer(kernelInfo); diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index f0e1075729..bb7ed5ed55 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -514,6 +514,7 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); } else { auto gfxAllocation = mockKernelObjForAuxTranslation.mockGraphicsAllocation.get(); + gfxAllocation->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); auto ptr = reinterpret_cast(gfxAllocation->getGpuAddressToPatch()); mockKernel.mockKernel->setArgSvmAlloc(0, ptr, gfxAllocation); } @@ -525,6 +526,7 @@ HWTEST2_P(AuxBuiltInTests, givenKernelWithAuxTranslationRequiredWhenEnqueueCalle mockKernel.mockKernel->auxTranslationRequired = true; cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(1u, mockBuiltinKernel->takeOwnershipCalls); EXPECT_EQ(1u, mockBuiltinKernel->releaseOwnershipCalls); } diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp index 19801c5318..93ccc075ce 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_tests.cpp @@ -96,6 +96,35 @@ HWTEST_F(CommandQueueHwTest, WhenConstructingTwoCommandQueuesThenOnlyOneDebugSur EXPECT_EQ(dbgSurface, device->getGpgpuCommandStreamReceiver().getDebugSurfaceAllocation()); } +HWTEST_F(CommandQueueHwTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) { + DebugManagerStateRestore restore; + MockBuffer buffer; + KernelObjsForAuxTranslation kernelObjects; + MultiDispatchInfo multiDispatchInfo; + HardwareInfo *hwInfo = pClDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->getMutableHardwareInfo(); + + DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); + + MockCommandQueueHw mockCmdQueueHw(context, pClDevice, nullptr); + + hwInfo->capabilityTable.blitterOperationsSupported = true; + + EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); + + multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjects); + EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); + + kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, &buffer}); + EXPECT_TRUE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); + + hwInfo->capabilityTable.blitterOperationsSupported = false; + EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); + + hwInfo->capabilityTable.blitterOperationsSupported = true; + DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); + EXPECT_FALSE(mockCmdQueueHw.isBlitAuxTranslationRequired(multiDispatchInfo)); +} + HWTEST_F(CommandQueueHwTest, WhenEnqueuingBlockedMapUnmapOperationThenVirtualEventIsCreated) { CommandQueueHw *pHwQ = reinterpret_cast *>(pCmdQ); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 3e3308f54b..6f233da8da 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -930,10 +930,14 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); pDevice->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + auto hwInfo = pDevice->getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex]->getMutableHardwareInfo(); + MockKernelWithInternals mockKernel(*pClDevice, context); MyCmdQ cmdQ(context, pClDevice); cmdQ.bcsEngine = cmdQ.gpgpuEngine; + hwInfo->capabilityTable.blitterOperationsSupported = true; + size_t gws[3] = {1, 0, 0}; MockBuffer buffer; cl_mem clMem = &buffer; @@ -947,15 +951,7 @@ HWTEST_F(EnqueueAuxKernelTests, givenDebugVariableDisablingBuiltinTranslationWhe mockKernel.mockKernel->setArgBuffer(0, sizeof(cl_mem *), &clMem); cmdQ.enqueueKernel(mockKernel.mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(2u, cmdQ.dispatchAuxTranslationInputs.size()); - - // aux builtin not dispatched before NDR - EXPECT_EQ(0u, std::get(cmdQ.dispatchAuxTranslationInputs.at(0))); - - // only NDR is dispatched - EXPECT_EQ(1u, std::get(cmdQ.dispatchAuxTranslationInputs.at(1))); - auto kernel = std::get(cmdQ.dispatchAuxTranslationInputs.at(1)); - EXPECT_FALSE(kernel->isBuiltIn); + EXPECT_EQ(0u, cmdQ.dispatchAuxTranslationInputs.size()); } HWCMDTEST_F(IGFX_GEN8_CORE, EnqueueKernelTest, givenCacheFlushAfterWalkerEnabledWhenAllocationRequiresCacheFlushThenFlushCommandPresentAfterWalker) { diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 1100438b5a..bea76c6b02 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -734,52 +734,33 @@ HWTEST_F(HwHelperTest, whenQueryingMaxNumSamplersThenReturnSixteen) { EXPECT_EQ(16u, helper.getMaxNumSamplers()); } -HWTEST_F(HwHelperTest, givenMultiDispatchInfoWhenAskingForAuxTranslationThenCheckMemObjectsCountAndDebugFlag) { - DebugManagerStateRestore restore; - MockBuffer buffer; - KernelObjsForAuxTranslation kernelObjects; - MultiDispatchInfo multiDispatchInfo; - HardwareInfo hwInfo = *defaultHwInfo; - hwInfo.capabilityTable.blitterOperationsSupported = true; - - DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); - - EXPECT_FALSE(ClHwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); - - multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjects); - EXPECT_FALSE(ClHwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); - - kernelObjects.insert({KernelObjForAuxTranslation::Type::MEM_OBJ, &buffer}); - EXPECT_TRUE(ClHwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); - - hwInfo.capabilityTable.blitterOperationsSupported = false; - EXPECT_FALSE(ClHwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); - - hwInfo.capabilityTable.blitterOperationsSupported = true; - DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); - EXPECT_FALSE(ClHwHelperHw::isBlitAuxTranslationRequired(hwInfo, multiDispatchInfo)); -} - HWTEST_F(HwHelperTest, givenDebugVariableSetWhenAskingForAuxTranslationModeThenReturnCorrectValue) { DebugManagerStateRestore restore; - EXPECT_EQ(UnitTestHelper::requiredAuxTranslationMode, HwHelperHw::getAuxTranslationMode()); + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.blitterOperationsSupported = true; - if (HwHelperHw::getAuxTranslationMode() == AuxTranslationMode::Blit) { - auto hwInfoConfig = HwInfoConfig::get(productFamily); - HardwareInfo hwInfo = *defaultHwInfo; - hwInfoConfig->configureHardwareCustom(&hwInfo, nullptr); - EXPECT_TRUE(hwInfo.capabilityTable.blitterOperationsSupported); + EXPECT_EQ(UnitTestHelper::requiredAuxTranslationMode, HwHelperHw::getAuxTranslationMode(hwInfo)); + + if (HwHelperHw::getAuxTranslationMode(hwInfo) == AuxTranslationMode::Blit) { + hwInfo.capabilityTable.blitterOperationsSupported = false; + + EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode(hwInfo)); } DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::None)); - EXPECT_EQ(AuxTranslationMode::None, HwHelperHw::getAuxTranslationMode()); + EXPECT_EQ(AuxTranslationMode::None, HwHelperHw::getAuxTranslationMode(hwInfo)); + hwInfo.capabilityTable.blitterOperationsSupported = false; DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); - EXPECT_EQ(AuxTranslationMode::Blit, HwHelperHw::getAuxTranslationMode()); + EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode(hwInfo)); + + hwInfo.capabilityTable.blitterOperationsSupported = true; + DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Blit)); + EXPECT_EQ(AuxTranslationMode::Blit, HwHelperHw::getAuxTranslationMode(hwInfo)); DebugManager.flags.ForceAuxTranslationMode.set(static_cast(AuxTranslationMode::Builtin)); - EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode()); + EXPECT_EQ(AuxTranslationMode::Builtin, HwHelperHw::getAuxTranslationMode(hwInfo)); } HWTEST_F(HwHelperTest, givenHwHelperWhenAskingForTilingSupportThenReturnValidValue) { diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 7f2346e026..92b6446fa1 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -203,6 +203,7 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass::commandQueueProperties; using BaseClass::commandStream; using BaseClass::gpgpuEngine; + using BaseClass::isBlitAuxTranslationRequired; using BaseClass::latestSentEnqueueType; using BaseClass::obtainCommandStream; using BaseClass::obtainNewTimestampPacketNodes; diff --git a/opencl/test/unit_test/mocks/mock_kernel.h b/opencl/test/unit_test/mocks/mock_kernel.h index d570ad2cc6..b5dd3eaadc 100644 --- a/opencl/test/unit_test/mocks/mock_kernel.h +++ b/opencl/test/unit_test/mocks/mock_kernel.h @@ -35,6 +35,7 @@ struct MockKernelObjForAuxTranslation : public KernelObjForAuxTranslation { MockKernelObjForAuxTranslation(Type type) : KernelObjForAuxTranslation(type, nullptr) { if (type == KernelObjForAuxTranslation::Type::MEM_OBJ) { mockBuffer.reset(new MockBuffer); + mockBuffer->getGraphicsAllocation(0)->setAllocationType(GraphicsAllocation::AllocationType::BUFFER_COMPRESSED); this->object = mockBuffer.get(); } else { DEBUG_BREAK_IF(type != KernelObjForAuxTranslation::Type::GFX_ALLOC); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 0c4ae2eb2a..bb0500c149 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -149,7 +149,7 @@ class HwHelper { template class HwHelperHw : public HwHelper { public: - static HwHelper &get() { + static HwHelperHw &get() { static HwHelperHw hwHelper; return hwHelper; } @@ -268,7 +268,7 @@ class HwHelperHw : public HwHelper { uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) override; - static AuxTranslationMode getAuxTranslationMode(); + static AuxTranslationMode getAuxTranslationMode(const HardwareInfo &hwInfo); uint32_t getHwRevIdFromStepping(uint32_t stepping, const HardwareInfo &hwInfo) const override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index e82b80d855..254e3a23fe 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -173,12 +173,19 @@ bool HwHelperHw::getEnableLocalMemory(const HardwareInfo &hwInfo) const } template -AuxTranslationMode HwHelperHw::getAuxTranslationMode() { +AuxTranslationMode HwHelperHw::getAuxTranslationMode(const HardwareInfo &hwInfo) { + auto mode = HwHelperHw::defaultAuxTranslationMode; + if (DebugManager.flags.ForceAuxTranslationMode.get() != -1) { - return static_cast(DebugManager.flags.ForceAuxTranslationMode.get()); + mode = static_cast(DebugManager.flags.ForceAuxTranslationMode.get()); } - return HwHelperHw::defaultAuxTranslationMode; + if (mode == AuxTranslationMode::Blit && !hwInfo.capabilityTable.blitterOperationsSupported) { + DEBUG_BREAK_IF(true); + mode = AuxTranslationMode::Builtin; + } + + return mode; } template