From 134462919df0efd1ed2e8b966383a068eefb1ad6 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 16 Jul 2020 20:00:52 +0200 Subject: [PATCH] Move barrier programming to Encode class Related-To: NEO-4576 Change-Id: I34b93b3118528b449c4e1b81826f9784633377a9 Signed-off-by: Zbigniew Zdanowicz --- .../device_queue/device_queue_hw_bdw_plus.inl | 8 +++-- .../source/helpers/hardware_commands_helper.h | 1 - .../helpers/hardware_commands_helper_base.inl | 5 +-- .../hardware_commands_helper_bdw_plus.inl | 5 --- .../test/unit_test/gen_common/CMakeLists.txt | 1 - .../unit_test/gen_common/hw_cmds_tests.cpp | 32 ------------------- .../command_container/command_encoder.h | 2 ++ .../command_encoder_base.inl | 11 ++++++- shared/source/kernel/kernel_descriptor.h | 1 + .../kernel_descriptor_from_patchtokens.cpp | 1 + .../encoders/test_encode_dispatch_kernel.cpp | 23 +++++++++++++ 11 files changed, 45 insertions(+), 45 deletions(-) delete mode 100644 opencl/test/unit_test/gen_common/hw_cmds_tests.cpp diff --git a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl index 71b2f09613..799019064a 100644 --- a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl +++ b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl @@ -5,6 +5,8 @@ * */ +#include "shared/source/command_container/command_encoder.h" + #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/device_queue/device_queue_hw_base.inl" #include "opencl/source/program/block_kernel_manager.h" @@ -203,9 +205,9 @@ void DeviceQueueHw::setupIndirectState(IndirectHeap &surfaceStateHeap pIDDestination[blockIndex + i].setKernelStartPointerHigh(blockKernelStartPointer >> 32); pIDDestination[blockIndex + i].setKernelStartPointer(static_cast(blockKernelStartPointer)); pIDDestination[blockIndex + i].setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL); - HardwareCommandsHelper::programBarrierEnable(&pIDDestination[blockIndex + i], - pBlockInfo->patchInfo.executionEnvironment->HasBarriers, - parentKernel->getDevice().getHardwareInfo()); + EncodeDispatchKernel::programBarrierEnable(&pIDDestination[blockIndex + i], + pBlockInfo->patchInfo.executionEnvironment->HasBarriers, + parentKernel->getDevice().getHardwareInfo()); // Set offset to sampler states, block's DHSOffset is added by scheduler pIDDestination[blockIndex + i].setSamplerStatePointer(static_cast(pBlockInfo->getBorderColorStateSize())); diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index 3576d60ba1..f2ec188a4e 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -155,7 +155,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { static void programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static void programMiAtomic(MI_ATOMIC &atomic, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); - static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo); static const size_t alignInterfaceDescriptorData = 64 * sizeof(uint8_t); diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index aaf3549c1d..9d12176ebd 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -191,8 +191,9 @@ size_t HardwareCommandsHelper::sendInterfaceDescriptorData( static_cast(HwHelperHw::get().computeSlmValues(kernel.slmTotalSize)); interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize); - programBarrierEnable(&interfaceDescriptor, kernel.getKernelInfo().patchInfo.executionEnvironment->HasBarriers, - kernel.getDevice().getHardwareInfo()); + EncodeDispatchKernel::programBarrierEnable(&interfaceDescriptor, + kernel.getKernelInfo().patchInfo.executionEnvironment->HasBarriers, + kernel.getDevice().getHardwareInfo()); PreemptionHelper::programInterfaceDescriptorDataPreemption(&interfaceDescriptor, preemptionMode); HardwareCommandsHelper::adjustInterfaceDescriptorData(&interfaceDescriptor, kernel.getDevice().getHardwareInfo()); diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl index e3df52ec1f..49a6b9c3c7 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_plus.inl @@ -164,11 +164,6 @@ void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(Line *pipeControl = cmd; } -template -void HardwareCommandsHelper::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { - pInterfaceDescriptor->setBarrierEnable(value); -} - template void HardwareCommandsHelper::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo) {} } // namespace NEO diff --git a/opencl/test/unit_test/gen_common/CMakeLists.txt b/opencl/test/unit_test/gen_common/CMakeLists.txt index 885f98d3d3..d258641ffc 100644 --- a/opencl/test/unit_test/gen_common/CMakeLists.txt +++ b/opencl/test/unit_test/gen_common/CMakeLists.txt @@ -9,7 +9,6 @@ set(IGDRCL_SRCS_tests_gen_common ${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_declare.cpp ${CMAKE_CURRENT_SOURCE_DIR}/exclude_tests/exclude_test_exclude.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gen_commands_common_validation.h - ${CMAKE_CURRENT_SOURCE_DIR}/hw_cmds_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/matchers.h ${NEO_SHARED_TEST_DIRECTORY}/unit_test/test_macros/header${BRANCH_DIR_SUFFIX}/test.h ) diff --git a/opencl/test/unit_test/gen_common/hw_cmds_tests.cpp b/opencl/test/unit_test/gen_common/hw_cmds_tests.cpp deleted file mode 100644 index e479edeceb..0000000000 --- a/opencl/test/unit_test/gen_common/hw_cmds_tests.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2019-2020 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#include "shared/source/helpers/hw_cmds.h" -#include "shared/test/unit_test/mocks/mock_device.h" - -#include "opencl/source/helpers/hardware_commands_helper.h" -#include "test.h" - -using namespace NEO; - -using InterfaceDescriptorDataTests = ::testing::Test; - -HWCMDTEST_F(IGFX_GEN8_CORE, InterfaceDescriptorDataTests, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValueIsSet) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; - MockDevice device; - auto hwInfo = device.getHardwareInfo(); - - HardwareCommandsHelper::programBarrierEnable(&idd, 0, hwInfo); - EXPECT_FALSE(idd.getBarrierEnable()); - - HardwareCommandsHelper::programBarrierEnable(&idd, 1, hwInfo); - EXPECT_TRUE(idd.getBarrierEnable()); - - HardwareCommandsHelper::programBarrierEnable(&idd, 2, hwInfo); - EXPECT_TRUE(idd.getBarrierEnable()); -} diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index cafcee6e6f..1d57fa36e5 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -57,6 +57,8 @@ struct EncodeDispatchKernel { bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder); + + static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo); }; template diff --git a/shared/source/command_container/command_encoder_base.inl b/shared/source/command_container/command_encoder_base.inl index 117b365cc0..fb3ae9574d 100644 --- a/shared/source/command_container/command_encoder_base.inl +++ b/shared/source/command_container/command_encoder_base.inl @@ -66,7 +66,9 @@ void EncodeDispatchKernel::encode(CommandContainer &container, auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup(); idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup); - idd.setBarrierEnable(kernelDescriptor.kernelAttributes.flags.usesBarriers); + EncodeDispatchKernel::programBarrierEnable(&idd, + kernelDescriptor.kernelAttributes.hasBarriers, + container.getDevice()->getHardwareInfo()); auto slmSize = static_cast( HwHelperHw::get().computeSlmValues(dispatchInterface->getSlmTotalSize())); idd.setSharedLocalMemorySize( @@ -339,6 +341,13 @@ void EncodeDispatchKernel::encodeThreadData(WALKER_TYPE &walkerCmd, walkerCmd.setBottomExecutionMask(maxDword); } +template +void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, + uint32_t value, + const HardwareInfo &hwInfo) { + pInterfaceDescriptor->setBarrierEnable(value); +} + template void EncodeMiFlushDW::appendMiFlushDw(MI_FLUSH_DW *miFlushDwCmd) {} diff --git a/shared/source/kernel/kernel_descriptor.h b/shared/source/kernel/kernel_descriptor.h index 7d5ce2f65a..764456fd28 100644 --- a/shared/source/kernel/kernel_descriptor.h +++ b/shared/source/kernel/kernel_descriptor.h @@ -46,6 +46,7 @@ struct KernelDescriptor final { uint32_t perThreadScratchSize[2] = {0U, 0U}; uint32_t perThreadPrivateMemorySize = 0U; uint32_t perThreadSystemThreadSurfaceSize = 0U; + uint32_t hasBarriers = 0u; uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U}; uint16_t crossThreadDataSize = 0U; uint16_t perThreadDataSize = 0U; diff --git a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp index f1690fd3ce..64fe2b7a01 100644 --- a/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp +++ b/shared/source/kernel/kernel_descriptor_from_patchtokens.cpp @@ -49,6 +49,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize; dst.kernelAttributes.flags.usesDeviceSideEnqueue = (0 != execEnv.HasDeviceEnqueue); dst.kernelAttributes.flags.usesBarriers = (0 != execEnv.HasBarriers); + dst.kernelAttributes.hasBarriers = execEnv.HasBarriers; dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption); dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber; dst.kernelAttributes.flags.usesFencesForReadWriteImages = (0 != execEnv.UsesFencesForReadWriteImages); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index a45f883df4..b2e495de28 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -14,10 +14,13 @@ #include "shared/test/unit_test/device_binary_format/patchtokens_tests.h" #include "shared/test/unit_test/fixtures/command_container_fixture.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" +#include "shared/test/unit_test/mocks/mock_device.h" #include "shared/test/unit_test/mocks/mock_dispatch_kernel_encoder_interface.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/test/unit_test/gen_common/matchers.h" +#include "test.h" + using namespace NEO; using CommandEncodeStatesTest = Test; @@ -707,3 +710,23 @@ HWTEST_F(WalkerThreadTest, givenDebugFlagEnabledWhenKernelDescriptorInlineDataDi EXPECT_FALSE(EncodeDispatchKernel::inlineDataProgrammingRequired(kernelDesc)); } + +using namespace NEO; + +using InterfaceDescriptorDataTests = ::testing::Test; + +HWCMDTEST_F(IGFX_GEN8_CORE, InterfaceDescriptorDataTests, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValueIsSet) { + using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; + MockDevice device; + auto hwInfo = device.getHardwareInfo(); + + EncodeDispatchKernel::programBarrierEnable(&idd, 0, hwInfo); + EXPECT_FALSE(idd.getBarrierEnable()); + + EncodeDispatchKernel::programBarrierEnable(&idd, 1, hwInfo); + EXPECT_TRUE(idd.getBarrierEnable()); + + EncodeDispatchKernel::programBarrierEnable(&idd, 2, hwInfo); + EXPECT_TRUE(idd.getBarrierEnable()); +}