From d693d24f272648a1e242940f5bb0eea8bffb1869 Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Tue, 18 May 2021 02:46:21 +0000 Subject: [PATCH] Add StateComputeModeProperties to StreamProperties Related-To: NEO-4940, NEO-4574 Signed-off-by: Filip Hazubski --- level_zero/core/source/cmdlist/cmdlist.h | 3 +- level_zero/core/source/cmdlist/cmdlist_hw.h | 5 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 14 +++- .../core/source/cmdlist/cmdlist_hw_base.inl | 2 +- level_zero/core/source/cmdqueue/cmdqueue_hw.h | 2 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 2 +- .../test_cmdlist_append_launch_kernel.cpp | 3 +- .../device_queue/device_queue_hw_bdw_plus.inl | 3 +- .../command_container/command_encoder.h | 11 ++- .../command_encoder_bdw_plus.inl | 13 ++-- .../encode_compute_mode_tgllp_plus.inl | 12 +-- shared/source/command_stream/CMakeLists.txt | 5 +- .../command_stream_receiver_hw_base.inl | 2 +- .../definitions/stream_properties.h | 22 ------ .../definitions/stream_properties.inl | 22 ++++++ .../command_stream/stream_properties.cpp | 34 +++++++++ .../source/command_stream/stream_properties.h | 28 +++++++ .../source/command_stream/stream_property.h | 25 +++++++ shared/source/gen11/command_encoder_gen11.cpp | 1 + .../gen12lp/command_encoder_gen12lp.cpp | 24 +++++- shared/source/gen8/command_encoder_gen8.cpp | 1 + shared/source/gen9/command_encoder_gen9.cpp | 1 + shared/source/helpers/preamble_bdw_plus.inl | 3 +- .../test/common/gen11/test_preamble_gen11.cpp | 2 +- shared/test/common/gen12lp/CMakeLists.txt | 1 + .../common/gen12lp/test_encode_gen12lp.cpp | 43 +++++++++++ .../common/gen12lp/test_preamble_gen12lp.cpp | 2 +- .../test/common/gen8/test_preamble_gen8.cpp | 3 +- .../common/gen9/skl/test_preamble_skl.cpp | 3 +- .../unit_test/command_stream/CMakeLists.txt | 13 ++++ .../stream_properties_tests.cpp | 19 +++++ .../stream_properties_tests_common.cpp | 75 +++++++++++++++++++ .../stream_properties_tests_common.h | 19 +++++ .../encoders/test_encode_dispatch_kernel.cpp | 11 +-- .../unit_test/encoders/test_encode_states.cpp | 14 ++++ .../unit_test/preamble/preamble_tests.cpp | 2 +- 36 files changed, 368 insertions(+), 77 deletions(-) delete mode 100644 shared/source/command_stream/definitions/stream_properties.h create mode 100644 shared/source/command_stream/definitions/stream_properties.inl create mode 100644 shared/source/command_stream/stream_properties.cpp create mode 100644 shared/source/command_stream/stream_properties.h create mode 100644 shared/source/command_stream/stream_property.h create mode 100644 shared/test/common/gen12lp/test_encode_gen12lp.cpp create mode 100644 shared/test/unit_test/command_stream/CMakeLists.txt create mode 100644 shared/test/unit_test/command_stream/stream_properties_tests.cpp create mode 100644 shared/test/unit_test/command_stream/stream_properties_tests_common.cpp create mode 100644 shared/test/unit_test/command_stream/stream_properties_tests_common.h diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index e843567eff..2d88cac3bf 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -9,6 +9,7 @@ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_stream/preemption_mode.h" +#include "shared/source/command_stream/stream_properties.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" #include "level_zero/core/source/device/device.h" @@ -16,8 +17,6 @@ #include #include -#include "stream_properties.h" - #include struct _ze_command_list_handle_t {}; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 3ab9421cd6..b883e2e0fa 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -7,11 +7,12 @@ #pragma once +#include "shared/source/command_stream/stream_properties.h" + #include "level_zero/core/source/builtin/builtin_functions_lib.h" #include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "igfxfmid.h" -#include "stream_properties.h" namespace NEO { enum class ImageType; @@ -219,7 +220,7 @@ struct CommandListCoreFamily : CommandListImp { bool isCooperative); ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); - void updateStreamProperties(Kernel &kernel); + void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable); void clearCommandsToPatch(); void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 38ff88d3d4..b3a38d2de1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1847,7 +1847,7 @@ ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze } template -void CommandListCoreFamily::updateStreamProperties(Kernel &kernel) { +void CommandListCoreFamily::updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; @@ -1855,7 +1855,6 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel requiredStreamState.setCooperativeKernelProperties(kernel.usesSyncBuffer(), device->getHwInfo()); finalStreamState = requiredStreamState; containsAnyKernel = true; - return; } auto &hwInfo = device->getHwInfo(); @@ -1867,6 +1866,17 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel NEO::AdditionalKernelExecInfo::NotApplicable, finalStreamState); commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState}); } + + auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; + auto &neoDevice = *device->getNEODevice(); + finalStreamState.setStateComputeModeProperties(false, kernelAttributes.numGrfRequired, isMultiOsContextCapable, + kernelAttributes.flags.useGlobalAtomics, + (neoDevice.getNumAvailableDevices() > 1)); + if (finalStreamState.stateComputeMode.isDirty()) { + NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true); + NEO::EncodeComputeMode::adjustComputeMode(*commandContainer.getCommandStream(), nullptr, finalStreamState.stateComputeMode); + NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false); + } } template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index e357ce7e01..d13c287327 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -101,7 +101,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str()); } - updateStreamProperties(*kernel); + updateStreamProperties(*kernel, false); NEO::EncodeDispatchKernel::encode(commandContainer, reinterpret_cast(pThreadGroupDimensions), diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.h b/level_zero/core/source/cmdqueue/cmdqueue_hw.h index 997de5e9de..c17d49d76d 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/scratch_space_controller.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/helpers/constants.h" #include "shared/source/memory_manager/graphics_allocation.h" @@ -16,7 +17,6 @@ #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "igfxfmid.h" -#include "stream_properties.h" namespace L0 { diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index dce56a599c..14f285f757 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -13,6 +13,7 @@ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/device/device.h" #include "shared/source/helpers/hw_helper.h" @@ -34,7 +35,6 @@ #include "level_zero/tools/source/metrics/metric.h" #include "pipe_control_args.h" -#include "stream_properties.h" #include #include diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp index 7e3bbce131..ea240e6dbe 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp @@ -132,11 +132,12 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenAp const auto stream = commandContainer.getCommandStream(); const auto streamCpu = stream->getCpuBase(); + ze_group_count_t groupCount{1, 1, 1}; + commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); auto available = stream->getAvailableSpace(); stream->getSpace(available - sizeof(MI_BATCH_BUFFER_END) - 16); auto bbEndPosition = stream->getSpace(0); - ze_group_count_t groupCount{1, 1, 1}; commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr); auto usedSpaceAfter = commandContainer.getCommandStream()->getUsed(); diff --git a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl index 3164e3e883..56c53a1278 100644 --- a/opencl/source/device_queue/device_queue_hw_bdw_plus.inl +++ b/opencl/source/device_queue/device_queue_hw_bdw_plus.inl @@ -6,13 +6,12 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/stream_properties.h" #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/device_queue/device_queue_hw_base.inl" #include "opencl/source/program/block_kernel_manager.h" -#include "stream_properties.h" - namespace NEO { template diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index ada40ae852..864676f9a2 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -18,10 +18,11 @@ namespace NEO { -class GmmHelper; -struct HardwareInfo; -class IndirectHeap; class BindlessHeapsHelper; +class GmmHelper; +class IndirectHeap; +struct HardwareInfo; +struct StateComputeModeProperties; template struct EncodeDispatchKernel { @@ -267,9 +268,7 @@ struct EncodeSurfaceState { template struct EncodeComputeMode { - using STATE_COMPUTE_MODE = typename GfxFamily::STATE_COMPUTE_MODE; - static void adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, - bool isMultiOsContextCapable, bool useGlobalAtomics, bool areMultipleSubDevicesInContext); + static void adjustComputeMode(LinearStream &csr, void *const stateComputeModePtr, StateComputeModeProperties &properties); static void adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor); }; diff --git a/shared/source/command_container/command_encoder_bdw_plus.inl b/shared/source/command_container/command_encoder_bdw_plus.inl index 920e195ba9..ea0f3fc744 100644 --- a/shared/source/command_container/command_encoder_bdw_plus.inl +++ b/shared/source/command_container/command_encoder_bdw_plus.inl @@ -68,11 +68,6 @@ void EncodeDispatchKernel::encode(CommandContainer &container, idd.setKernelStartPointerHigh(0u); } - EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), true); - EncodeStates::adjustStateComputeMode(*container.getCommandStream(), container.lastSentNumGrfRequired, nullptr, false, false, - kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, device->getNumAvailableDevices() > 1); - EncodeWA::encodeAdditionalPipelineSelect(*container.getDevice(), *container.getCommandStream(), false); - auto numThreadsPerThreadGroup = dispatchInterface->getNumThreadsPerThreadGroup(); idd.setNumberOfThreadsInGpgpuThreadGroup(numThreadsPerThreadGroup); @@ -344,6 +339,14 @@ size_t EncodeDispatchKernel::estimateEncodeDispatchKernelCmdsSize(Device return totalSize; } +template +void EncodeComputeMode::adjustComputeMode(LinearStream &csr, void *const stateComputeModePtr, StateComputeModeProperties &properties) { +} + +template +void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) { +} + template void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd) { auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); diff --git a/shared/source/command_container/encode_compute_mode_tgllp_plus.inl b/shared/source/command_container/encode_compute_mode_tgllp_plus.inl index a243de344e..42bb500527 100644 --- a/shared/source/command_container/encode_compute_mode_tgllp_plus.inl +++ b/shared/source/command_container/encode_compute_mode_tgllp_plus.inl @@ -13,15 +13,9 @@ namespace NEO { template void EncodeStates::adjustStateComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, bool isMultiOsContextCapable, bool requiresCoherency, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { - using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE; - using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT; - STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr != nullptr) ? *(static_cast(stateComputeModePtr)) : Family::cmdInitStateComputeMode; - FORCE_NON_COHERENT coherencyValue = !requiresCoherency ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED; - stateComputeMode.setForceNonCoherent(coherencyValue); - - stateComputeMode.setMaskBits(stateComputeMode.getMaskBits() | Family::stateComputeModeForceNonCoherentMask); - - EncodeComputeMode::adjustComputeMode(csr, numGrfRequired, &stateComputeMode, isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext); + StreamProperties properties{}; + properties.setStateComputeModeProperties(requiresCoherency, numGrfRequired, isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext); + EncodeComputeMode::adjustComputeMode(csr, stateComputeModePtr, properties.stateComputeMode); } template diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt index b985977f6e..10fdeb266d 100644 --- a/shared/source/command_stream/CMakeLists.txt +++ b/shared/source/command_stream/CMakeLists.txt @@ -18,7 +18,7 @@ set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/csr_deps.h ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/command_stream_receiver_hw_ext.inl ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/csr_properties_flags.h - ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/stream_properties.h + ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/stream_properties.inl ${CMAKE_CURRENT_SOURCE_DIR}/device_command_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/experimental_command_buffer.h @@ -33,6 +33,9 @@ set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller.h ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.cpp ${CMAKE_CURRENT_SOURCE_DIR}/scratch_space_controller_base.h + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/stream_properties.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties.h + ${CMAKE_CURRENT_SOURCE_DIR}/stream_property.h ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/submissions_aggregator.h ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index d29f8c2bbb..e987f685e7 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -10,6 +10,7 @@ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller_base.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_hw.h" @@ -32,7 +33,6 @@ #include "shared/source/utilities/tag_allocator.h" #include "command_stream_receiver_hw_ext.inl" -#include "stream_properties.h" namespace NEO { diff --git a/shared/source/command_stream/definitions/stream_properties.h b/shared/source/command_stream/definitions/stream_properties.h deleted file mode 100644 index 8af8d9d3b0..0000000000 --- a/shared/source/command_stream/definitions/stream_properties.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once - -namespace NEO { - -struct StreamProperties { - bool setCooperativeKernelProperties(int32_t cooperativeKernelProperties, const HardwareInfo &hwInfo) { - return false; - } - - int32_t getCooperativeKernelProperties() const { - return -1; - } -}; - -} // namespace NEO diff --git a/shared/source/command_stream/definitions/stream_properties.inl b/shared/source/command_stream/definitions/stream_properties.inl new file mode 100644 index 0000000000..9b85cf7583 --- /dev/null +++ b/shared/source/command_stream/definitions/stream_properties.inl @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/stream_property.h" + +namespace NEO { + +struct StateComputeModeProperties { + StreamProperty isCoherencyRequired{}; + + bool isDirty(); + void clearIsDirty(); +}; + +struct FrontEndProperties { +}; + +} // namespace NEO diff --git a/shared/source/command_stream/stream_properties.cpp b/shared/source/command_stream/stream_properties.cpp new file mode 100644 index 0000000000..7a36c64c53 --- /dev/null +++ b/shared/source/command_stream/stream_properties.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/stream_properties.h" + +using namespace NEO; + +bool StreamProperties::setCooperativeKernelProperties(int32_t cooperativeKernelProperties, const HardwareInfo &hwInfo) { + return false; +} + +int32_t StreamProperties::getCooperativeKernelProperties() const { + return -1; +} + +void StreamProperties::setStateComputeModeProperties(bool requiresCoherency, uint32_t numGrfRequired, bool isMultiOsContextCapable, + bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { + stateComputeMode.clearIsDirty(); + + int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0); + stateComputeMode.isCoherencyRequired.set(isCoherencyRequired); +} + +bool StateComputeModeProperties::isDirty() { + return isCoherencyRequired.isDirty; +} + +void StateComputeModeProperties::clearIsDirty() { + isCoherencyRequired.isDirty = false; +} diff --git a/shared/source/command_stream/stream_properties.h b/shared/source/command_stream/stream_properties.h new file mode 100644 index 0000000000..51e9c5a5e1 --- /dev/null +++ b/shared/source/command_stream/stream_properties.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/hw_info.h" + +#include "stream_properties.inl" + +namespace NEO { + +struct StreamProperties { + bool setCooperativeKernelProperties(int32_t cooperativeKernelProperties, const HardwareInfo &hwInfo); + int32_t getCooperativeKernelProperties() const; + + void setStateComputeModeProperties(bool requiresCoherency, uint32_t numGrfRequired, bool isMultiOsContextCapable, + bool useGlobalAtomics, bool areMultipleSubDevicesInContext); + + StateComputeModeProperties stateComputeMode{}; + FrontEndProperties frontEndState{}; +}; + +} // namespace NEO diff --git a/shared/source/command_stream/stream_property.h b/shared/source/command_stream/stream_property.h new file mode 100644 index 0000000000..2c210c372d --- /dev/null +++ b/shared/source/command_stream/stream_property.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +namespace NEO { + +struct StreamProperty { + int32_t value = -1; + bool isDirty = false; + void set(int32_t newValue) { + if ((value != newValue) && (newValue != -1)) { + value = newValue; + isDirty = true; + } + } +}; + +} // namespace NEO diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index ccc39f37be..474d964778 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -56,4 +56,5 @@ template struct EncodeMiFlushDW; template struct EncodeMemoryPrefetch; template struct EncodeWA; template struct EncodeMiArbCheck; +template struct EncodeComputeMode; } // namespace NEO diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 2227339704..16543e0a89 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/gen12lp/hw_cmds_base.h" #include "shared/source/gen12lp/reg_configs.h" #include "shared/source/helpers/preamble.h" @@ -35,11 +36,25 @@ size_t EncodeStates::getAdjustStateComputeModeSize() { } template <> -void EncodeComputeMode::adjustComputeMode(LinearStream &csr, uint32_t numGrfRequired, void *const stateComputeModePtr, - bool isMultiOsContextCapable, bool useGlobalAtomics, bool areMultipleSubDevicesInContext) { - STATE_COMPUTE_MODE *stateComputeMode = static_cast(stateComputeModePtr); +void EncodeComputeMode::adjustComputeMode(LinearStream &csr, void *const stateComputeModePtr, StateComputeModeProperties &properties) { + using STATE_COMPUTE_MODE = typename Family::STATE_COMPUTE_MODE; + using FORCE_NON_COHERENT = typename STATE_COMPUTE_MODE::FORCE_NON_COHERENT; + + STATE_COMPUTE_MODE stateComputeMode = (stateComputeModePtr) ? *(static_cast(stateComputeModePtr)) + : Family::cmdInitStateComputeMode; + auto maskBits = stateComputeMode.getMaskBits(); + + if (properties.isCoherencyRequired.isDirty) { + FORCE_NON_COHERENT coherencyValue = !properties.isCoherencyRequired.value ? FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT + : FORCE_NON_COHERENT::FORCE_NON_COHERENT_FORCE_DISABLED; + stateComputeMode.setForceNonCoherent(coherencyValue); + maskBits |= Family::stateComputeModeForceNonCoherentMask; + } + + stateComputeMode.setMaskBits(maskBits); + auto buffer = csr.getSpace(sizeof(STATE_COMPUTE_MODE)); - *reinterpret_cast(buffer) = *stateComputeMode; + *reinterpret_cast(buffer) = stateComputeMode; } template <> @@ -96,4 +111,5 @@ template struct EncodeMiFlushDW; template struct EncodeWA; template struct EncodeMemoryPrefetch; template struct EncodeMiArbCheck; +template struct EncodeComputeMode; } // namespace NEO diff --git a/shared/source/gen8/command_encoder_gen8.cpp b/shared/source/gen8/command_encoder_gen8.cpp index b48f8cec0e..11fa7ef029 100644 --- a/shared/source/gen8/command_encoder_gen8.cpp +++ b/shared/source/gen8/command_encoder_gen8.cpp @@ -50,4 +50,5 @@ template struct EncodeMiFlushDW; template struct EncodeMemoryPrefetch; template struct EncodeWA; template struct EncodeMiArbCheck; +template struct EncodeComputeMode; } // namespace NEO diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index 51435303dd..fd02916ec7 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -50,4 +50,5 @@ template struct EncodeMiFlushDW; template struct EncodeMemoryPrefetch; template struct EncodeWA; template struct EncodeMiArbCheck; +template struct EncodeComputeMode; } // namespace NEO diff --git a/shared/source/helpers/preamble_bdw_plus.inl b/shared/source/helpers/preamble_bdw_plus.inl index a05a8a92a5..c1c7cda28c 100644 --- a/shared/source/helpers/preamble_bdw_plus.inl +++ b/shared/source/helpers/preamble_bdw_plus.inl @@ -5,14 +5,13 @@ * */ +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/flat_batch_buffer_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/preamble_base.inl" #include "opencl/source/kernel/kernel_execution_type.h" -#include "stream_properties.h" - namespace NEO { template diff --git a/shared/test/common/gen11/test_preamble_gen11.cpp b/shared/test/common/gen11/test_preamble_gen11.cpp index 5e08195312..9b0712f6ca 100644 --- a/shared/test/common/gen11/test_preamble_gen11.cpp +++ b/shared/test/common/gen11/test_preamble_gen11.cpp @@ -6,11 +6,11 @@ */ #include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" #include "reg_configs_common.h" -#include "stream_properties.h" using namespace NEO; diff --git a/shared/test/common/gen12lp/CMakeLists.txt b/shared/test/common/gen12lp/CMakeLists.txt index ac08f509cf..5be19c3b18 100644 --- a/shared/test/common/gen12lp/CMakeLists.txt +++ b/shared/test/common/gen12lp/CMakeLists.txt @@ -20,6 +20,7 @@ if(TESTS_GEN12LP) ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image_surface_state_tests_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_command_encoder_gen12lp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_encode_math_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_gen12lp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper_gen12lp.cpp diff --git a/shared/test/common/gen12lp/test_encode_gen12lp.cpp b/shared/test/common/gen12lp/test_encode_gen12lp.cpp new file mode 100644 index 0000000000..865ddd9473 --- /dev/null +++ b/shared/test/common/gen12lp/test_encode_gen12lp.cpp @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/stream_properties.h" + +#include "test.h" + +#include "hw_cmds.h" + +using namespace NEO; + +using CommandEncodeGen12LpTest = ::testing::Test; + +GEN12LPTEST_F(CommandEncodeGen12LpTest, whenProgrammingStateComputeModeThenProperFieldsAreSet) { + using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; + uint8_t buffer[64]{}; + + StateComputeModeProperties properties; + auto pLinearStream = std::make_unique(buffer, sizeof(buffer)); + EncodeComputeMode::adjustComputeMode(*pLinearStream, nullptr, properties); + auto pScm = reinterpret_cast(pLinearStream->getCpuBase()); + EXPECT_EQ(0u, pScm->getMaskBits()); + EXPECT_EQ(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED, pScm->getForceNonCoherent()); + + properties.isCoherencyRequired.value = 0; + pLinearStream = std::make_unique(buffer, sizeof(buffer)); + EncodeComputeMode::adjustComputeMode(*pLinearStream, nullptr, properties); + pScm = reinterpret_cast(pLinearStream->getCpuBase()); + EXPECT_EQ(0u, pScm->getMaskBits()); + EXPECT_EQ(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_DISABLED, pScm->getForceNonCoherent()); + + properties.isCoherencyRequired.isDirty = true; + pLinearStream = std::make_unique(buffer, sizeof(buffer)); + EncodeComputeMode::adjustComputeMode(*pLinearStream, nullptr, properties); + pScm = reinterpret_cast(pLinearStream->getCpuBase()); + EXPECT_EQ(FamilyType::stateComputeModeForceNonCoherentMask, pScm->getMaskBits()); + EXPECT_EQ(STATE_COMPUTE_MODE::FORCE_NON_COHERENT_FORCE_GPU_NON_COHERENT, pScm->getForceNonCoherent()); +} diff --git a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp index 533376b856..4ddd6f8204 100644 --- a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp @@ -6,11 +6,11 @@ */ #include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" #include "reg_configs_common.h" -#include "stream_properties.h" using namespace NEO; diff --git a/shared/test/common/gen8/test_preamble_gen8.cpp b/shared/test/common/gen8/test_preamble_gen8.cpp index 62e95f47e0..1720f3dd18 100644 --- a/shared/test/common/gen8/test_preamble_gen8.cpp +++ b/shared/test/common/gen8/test_preamble_gen8.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/gen8/reg_configs.h" #include "shared/source/helpers/preamble.h" @@ -12,8 +13,6 @@ #include "opencl/test/unit_test/fixtures/platform_fixture.h" -#include "stream_properties.h" - using namespace NEO; typedef PreambleFixture BdwSlm; diff --git a/shared/test/common/gen9/skl/test_preamble_skl.cpp b/shared/test/common/gen9/skl/test_preamble_skl.cpp index 8fdc892557..1d7ab03cba 100644 --- a/shared/test/common/gen9/skl/test_preamble_skl.cpp +++ b/shared/test/common/gen9/skl/test_preamble_skl.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/thread_arbitration_policy.h" #include "shared/source/gen9/reg_configs.h" #include "shared/source/helpers/preamble.h" @@ -13,8 +14,6 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/preamble/preamble_fixture.h" -#include "stream_properties.h" - using namespace NEO; typedef PreambleFixture SklSlm; diff --git a/shared/test/unit_test/command_stream/CMakeLists.txt b/shared/test/unit_test/command_stream/CMakeLists.txt new file mode 100644 index 0000000000..21d1531196 --- /dev/null +++ b/shared/test/unit_test/command_stream/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# Copyright (C) 2021 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +target_sources(${TARGET_NAME} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/stream_properties_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties_tests_common.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/stream_properties_tests_common.h +) +add_subdirectories() diff --git a/shared/test/unit_test/command_stream/stream_properties_tests.cpp b/shared/test/unit_test/command_stream/stream_properties_tests.cpp new file mode 100644 index 0000000000..63d88d1b94 --- /dev/null +++ b/shared/test/unit_test/command_stream/stream_properties_tests.cpp @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/stream_properties.h" +#include "shared/test/unit_test/command_stream/stream_properties_tests_common.h" + +namespace NEO { + +std::vector getAllStateComputeModeProperties(StateComputeModeProperties &properties) { + std::vector allProperties; + allProperties.push_back(&properties.isCoherencyRequired); + return allProperties; +} + +} // namespace NEO diff --git a/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp new file mode 100644 index 0000000000..f9c8870f53 --- /dev/null +++ b/shared/test/unit_test/command_stream/stream_properties_tests_common.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/test/unit_test/command_stream/stream_properties_tests_common.h" + +#include "shared/source/command_stream/stream_properties.h" +#include "shared/test/common/helpers/debug_manager_state_restore.h" + +#include "test.h" + +using namespace NEO; + +TEST(StreamPropertiesTests, whenPropertyValueIsChangedThenProperStateIsSet) { + NEO::StreamProperty streamProperty; + + EXPECT_EQ(-1, streamProperty.value); + EXPECT_FALSE(streamProperty.isDirty); + + streamProperty.set(-1); + EXPECT_EQ(-1, streamProperty.value); + EXPECT_FALSE(streamProperty.isDirty); + + int32_t valuesToTest[] = {0, 1}; + for (auto valueToTest : valuesToTest) { + streamProperty.set(valueToTest); + EXPECT_EQ(valueToTest, streamProperty.value); + EXPECT_TRUE(streamProperty.isDirty); + + streamProperty.isDirty = false; + streamProperty.set(valueToTest); + EXPECT_EQ(valueToTest, streamProperty.value); + EXPECT_FALSE(streamProperty.isDirty); + + streamProperty.set(-1); + EXPECT_EQ(valueToTest, streamProperty.value); + EXPECT_FALSE(streamProperty.isDirty); + } +} + +TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValuesAreSet) { + StreamProperties properties; + for (auto requiresCoherency : ::testing::Bool()) { + properties.setStateComputeModeProperties(requiresCoherency, 0u, false, false, false); + EXPECT_EQ(requiresCoherency, properties.stateComputeMode.isCoherencyRequired.value); + } +} + +TEST(StreamPropertiesTests, givenVariousStatesOfThePropertiesWhenIsStateComputeModeDirtyIsQueriedThenCorrectValueIsReturned) { + struct MockStateComputeModeProperties : StateComputeModeProperties { + using StateComputeModeProperties::clearIsDirty; + }; + MockStateComputeModeProperties properties; + + EXPECT_FALSE(properties.isDirty()); + for (auto pProperty : getAllStateComputeModeProperties(properties)) { + pProperty->isDirty = true; + EXPECT_TRUE(properties.isDirty()); + pProperty->isDirty = false; + EXPECT_FALSE(properties.isDirty()); + } + for (auto pProperty : getAllStateComputeModeProperties(properties)) { + pProperty->isDirty = true; + } + EXPECT_TRUE(properties.isDirty()); + + properties.clearIsDirty(); + for (auto pProperty : getAllStateComputeModeProperties(properties)) { + EXPECT_FALSE(pProperty->isDirty); + } + EXPECT_FALSE(properties.isDirty()); +} diff --git a/shared/test/unit_test/command_stream/stream_properties_tests_common.h b/shared/test/unit_test/command_stream/stream_properties_tests_common.h new file mode 100644 index 0000000000..c6f748b4f3 --- /dev/null +++ b/shared/test/unit_test/command_stream/stream_properties_tests_common.h @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include + +namespace NEO { + +struct StateComputeModeProperties; +struct StreamProperty; + +std::vector getAllStateComputeModeProperties(StateComputeModeProperties &properties); + +} // namespace NEO diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index 7464e52a59..0eb0bbf8d1 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -43,7 +43,7 @@ TEST_F(CommandEncodeStatesTest, givenCommandConatinerCreatedWithMaxNumAggregateI delete cmdContainer; } -HWTEST_F(CommandEncodeStatesTest, givenenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) { +HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWalkerCommandProgrammed) { uint32_t dims[] = {2, 1, 1}; std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); bool requiresUncachedMocs = false; @@ -629,13 +629,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotCha GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed()); - if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { - auto itorPC = findAll(commands.begin(), commands.end()); - EXPECT_EQ(2u, itorPC.size()); - } else { - auto itorPC = find(commands.begin(), commands.end()); - ASSERT_EQ(itorPC, commands.end()); - } + auto itorPC = find(commands.begin(), commands.end()); + ASSERT_EQ(itorPC, commands.end()); } HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) { diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index 66aee1abad..7fe193da7a 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -284,3 +284,17 @@ HWTEST_F(CommandEncodeStatesTest, givenAnUnalignedDstPtrThenCorrectAlignedPtrAnd EXPECT_TRUE((ptr & (NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment() - 1)) == 0x0u); EXPECT_NE(0u, offset); } + +HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenAdjustPipelineSelectIsCalledThenNothingHappens) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + auto initialUsed = cmdContainer->getCommandStream()->getUsed(); + NEO::EncodeComputeMode::adjustPipelineSelect(*cmdContainer, descriptor); + EXPECT_EQ(initialUsed, cmdContainer->getCommandStream()->getUsed()); +} + +HWTEST2_F(CommandEncodeStatesTest, whenAdjustStateComputeModeIsCalledThenNothingHappens, IsAtMostGen11) { + using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; + auto initialUsed = cmdContainer->getCommandStream()->getUsed(); + NEO::EncodeStates::adjustStateComputeMode(*cmdContainer->getCommandStream(), 0, nullptr, false, false, false, false); + EXPECT_EQ(initialUsed, cmdContainer->getCommandStream()->getUsed()); +} diff --git a/shared/test/unit_test/preamble/preamble_tests.cpp b/shared/test/unit_test/preamble/preamble_tests.cpp index c0711b8a96..520a7ee54a 100644 --- a/shared/test/unit_test/preamble/preamble_tests.cpp +++ b/shared/test/unit_test/preamble/preamble_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gen_common/reg_configs_common.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" @@ -18,7 +19,6 @@ #include "test.h" -#include "stream_properties.h" #include #include