From 03637499485750b97945fa8ecd3a40aacb0071bd Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 5 Oct 2021 13:01:17 +0000 Subject: [PATCH] Disjoint local work size code from hw command generation Signed-off-by: Zbigniew Zdanowicz --- opencl/source/command_queue/CMakeLists.txt | 3 +- ...l_work_size.cpp => cl_local_work_size.cpp} | 11 +++---- .../source/command_queue/cl_local_work_size.h | 29 +++++++++++++++++++ opencl/source/command_queue/gpgpu_walker.h | 15 +--------- opencl/source/helpers/dispatch_info_builder.h | 2 +- opencl/source/kernel/kernel.cpp | 2 +- ..._suggested_local_work_size_intel_tests.inl | 2 +- .../command_queue/local_work_size_tests.cpp | 3 +- .../context/driver_diagnostics_tests.h | 2 +- 9 files changed, 42 insertions(+), 27 deletions(-) rename opencl/source/command_queue/{local_work_size.cpp => cl_local_work_size.cpp} (95%) create mode 100644 opencl/source/command_queue/cl_local_work_size.h diff --git a/opencl/source/command_queue/CMakeLists.txt b/opencl/source/command_queue/CMakeLists.txt index 39655d3185..a5717fb1dc 100644 --- a/opencl/source/command_queue/CMakeLists.txt +++ b/opencl/source/command_queue/CMakeLists.txt @@ -6,6 +6,8 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/cl_local_work_size.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/cl_local_work_size.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/command_queue.h ${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h @@ -42,7 +44,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl ${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl - ${CMAKE_CURRENT_SOURCE_DIR}/local_work_size.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.cpp ${CMAKE_CURRENT_SOURCE_DIR}/resource_barrier.h ) diff --git a/opencl/source/command_queue/local_work_size.cpp b/opencl/source/command_queue/cl_local_work_size.cpp similarity index 95% rename from opencl/source/command_queue/local_work_size.cpp rename to opencl/source/command_queue/cl_local_work_size.cpp index 82e9bd5361..ad8d9f6e18 100644 --- a/opencl/source/command_queue/local_work_size.cpp +++ b/opencl/source/command_queue/cl_local_work_size.cpp @@ -1,24 +1,21 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2021 Intel Corporation * * SPDX-License-Identifier: MIT * */ -#include "shared/source/helpers/local_work_size.h" +#include "opencl/source/command_queue/cl_local_work_size.h" #include "shared/source/device/device.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/debug_helpers.h" #include "shared/source/helpers/hw_helper.h" -#include "shared/source/program/kernel_info.h" +#include "shared/source/helpers/local_work_size.h" -#include "opencl/source/cl_device/cl_device.h" -#include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/context/context.h" #include "opencl/source/helpers/dispatch_info.h" -#include "opencl/source/kernel/kernel.h" #include #include @@ -72,7 +69,7 @@ Vec3 generateWorkgroupsNumber(const DispatchInfo &dispatchInfo) { return generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getLocalWorkgroupSize()); } -void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo) { +void provideLocalWorkGroupSizeHints(Context *context, const DispatchInfo &dispatchInfo) { if (context != nullptr && context->isProvidingPerformanceHints() && dispatchInfo.getDim() <= 3) { size_t preferredWorkGroupSize[3]; diff --git a/opencl/source/command_queue/cl_local_work_size.h b/opencl/source/command_queue/cl_local_work_size.h new file mode 100644 index 0000000000..2aaa4ecbe0 --- /dev/null +++ b/opencl/source/command_queue/cl_local_work_size.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/helpers/vec.h" +#include "shared/source/program/kernel_info.h" + +namespace NEO { +class Context; +class DispatchInfo; + +Vec3 computeWorkgroupSize( + const DispatchInfo &dispatchInfo); + +Vec3 generateWorkgroupSize( + const DispatchInfo &dispatchInfo); + +Vec3 generateWorkgroupsNumber( + const DispatchInfo &dispatchInfo); + +void provideLocalWorkGroupSizeHints(Context *context, const DispatchInfo &dispatchInfo); + +WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo); + +} // namespace NEO diff --git a/opencl/source/command_queue/gpgpu_walker.h b/opencl/source/command_queue/gpgpu_walker.h index 4410db55d7..6f6f5ce4c2 100644 --- a/opencl/source/command_queue/gpgpu_walker.h +++ b/opencl/source/command_queue/gpgpu_walker.h @@ -12,12 +12,12 @@ #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/timestamp_packet.h" -#include "shared/source/helpers/vec.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/utilities/hw_timestamps.h" #include "shared/source/utilities/perf_counter.h" #include "shared/source/utilities/tag_allocator.h" +#include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue_hw.h" @@ -35,19 +35,6 @@ using WALKER_TYPE = typename GfxFamily::WALKER_TYPE; template using MI_STORE_REG_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM_CMD; -Vec3 computeWorkgroupSize( - const DispatchInfo &dispatchInfo); - -Vec3 generateWorkgroupSize( - const DispatchInfo &dispatchInfo); - -Vec3 generateWorkgroupsNumber( - const DispatchInfo &dispatchInfo); - -void provideLocalWorkGroupSizeHints(Context *context, DispatchInfo dispatchInfo); - -WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo); - template class GpgpuWalkerHelper { public: diff --git a/opencl/source/helpers/dispatch_info_builder.h b/opencl/source/helpers/dispatch_info_builder.h index 67a458738e..ca8a616211 100644 --- a/opencl/source/helpers/dispatch_info_builder.h +++ b/opencl/source/helpers/dispatch_info_builder.h @@ -8,7 +8,7 @@ #pragma once #include "shared/source/helpers/local_work_size.h" -#include "opencl/source/command_queue/gpgpu_walker.h" +#include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/kernel/kernel.h" diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index c437480d97..218fe0718c 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -32,8 +32,8 @@ #include "opencl/source/accelerators/intel_motion_estimation.h" #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/cl_device/cl_device.h" +#include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/source/context/context.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/execution_model/device_enqueue.h" diff --git a/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl index 2d27daa305..47a79b2192 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl @@ -5,8 +5,8 @@ * */ +#include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/command_queue/command_queue.h" -#include "opencl/source/command_queue/gpgpu_walker.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "cl_api_tests.h" diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index 6b2b87419d..abc9eb841e 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -9,7 +9,8 @@ #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" -#include "opencl/source/command_queue/gpgpu_walker.h" +#include "opencl/source/command_queue/cl_local_work_size.h" +#include "opencl/source/helpers/dispatch_info.h" #include "opencl/test/unit_test/mocks/mock_cl_device.h" #include "opencl/test/unit_test/mocks/mock_kernel.h" #include "test.h" diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.h b/opencl/test/unit_test/context/driver_diagnostics_tests.h index 5cb1b5e406..cb5140703a 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.h +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.h @@ -11,7 +11,7 @@ #include "shared/test/common/mocks/mock_device.h" #include "shared/test/common/test_macros/test_checks_shared.h" -#include "opencl/source/command_queue/gpgpu_walker.h" +#include "opencl/source/command_queue/cl_local_work_size.h" #include "opencl/source/context/context.h" #include "opencl/source/kernel/kernel.h" #include "opencl/source/mem_obj/buffer.h"