From af031ee0e3050241bccfee4359fb13d186abd8fd Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Wed, 15 Jan 2025 07:45:01 +0100 Subject: [PATCH] Revert "performance: align structures for 64-bit platforms" This reverts commit 9f07f56f7f3a22f0110cc6db64df0c34aea588eb. Signed-off-by: Compute-Runtime-Validation --- opencl/source/command_queue/copy_engine_state.h | 2 +- .../source/command_queue/csr_selection_args.h | 12 ++++++------ opencl/source/command_queue/enqueue_svm.h | 6 +++--- .../command_queue/blit_enqueue_1_tests.cpp | 2 +- .../command_queue/command_queue_hw_1_tests.cpp | 2 +- .../cl_tbx_command_stream_tests.cpp | 2 +- .../test/unit_test/mem_obj/buffer_bcs_tests.cpp | 2 +- opencl/test/unit_test/mem_obj/image_tests.cpp | 2 +- .../command_stream/command_stream_receiver_hw.h | 6 +++--- ...ommand_stream_receiver_simulated_common_hw.h | 6 +++--- .../command_stream/submissions_aggregator.cpp | 10 +++++----- .../command_stream/submissions_aggregator.h | 17 ++++++++--------- .../source/device_binary_format/zebin/zeinfo.h | 6 +++--- shared/source/helpers/surface_format_info.h | 2 +- shared/source/memory_manager/host_ptr_defines.h | 2 +- .../os_interface/linux/xe/ioctl_helper_xe.h | 2 +- 16 files changed, 40 insertions(+), 41 deletions(-) diff --git a/opencl/source/command_queue/copy_engine_state.h b/opencl/source/command_queue/copy_engine_state.h index deeb71d33f..50d9174daa 100644 --- a/opencl/source/command_queue/copy_engine_state.h +++ b/opencl/source/command_queue/copy_engine_state.h @@ -13,8 +13,8 @@ namespace NEO { struct CopyEngineState { - TaskCountType taskCount = 0; aub_stream::EngineType engineType = aub_stream::EngineType::NUM_ENGINES; + TaskCountType taskCount = 0; bool csrClientRegistered = false; bool isValid() const { diff --git a/opencl/source/command_queue/csr_selection_args.h b/opencl/source/command_queue/csr_selection_args.h index 33397d2a68..642e500ec0 100644 --- a/opencl/source/command_queue/csr_selection_args.h +++ b/opencl/source/command_queue/csr_selection_args.h @@ -24,21 +24,21 @@ struct CsrSelectionArgs { const size_t *imageOrigin = nullptr; }; + cl_command_type cmdType; + const size_t *size = nullptr; Resource srcResource; Resource dstResource; - const size_t *size = nullptr; - cl_command_type cmdType; TransferDirection direction; CsrSelectionArgs(cl_command_type cmdType, const size_t *size) - : size(size), - cmdType(cmdType), + : cmdType(cmdType), + size(size), direction(TransferDirection::hostToHost) {} template CsrSelectionArgs(cl_command_type cmdType, ResourceType *src, ResourceType *dst, uint32_t rootDeviceIndex, const size_t *size) - : size(size), - cmdType(cmdType) { + : cmdType(cmdType), + size(size) { if (src) { processResource(*src, rootDeviceIndex, this->srcResource); } diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h index 8ae2f00172..a2a30fe311 100644 --- a/opencl/source/command_queue/enqueue_svm.h +++ b/opencl/source/command_queue/enqueue_svm.h @@ -25,20 +25,20 @@ using SvmFreeClbT = void(CL_CALLBACK *)(cl_command_queue queue, void *userData); struct SvmFreeUserData { + cl_uint numSvmPointers; void **svmPointers; SvmFreeClbT clb; void *userData; - cl_uint numSvmPointers; bool ownsEventDeletion; SvmFreeUserData(cl_uint numSvmPointers, void **svmPointers, SvmFreeClbT clb, void *userData, bool ownsEventDeletion) - : svmPointers(svmPointers), + : numSvmPointers(numSvmPointers), + svmPointers(svmPointers), clb(clb), userData(userData), - numSvmPointers(numSvmPointers), ownsEventDeletion(ownsEventDeletion){}; }; diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index e53938ff5a..890acdb20c 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -1251,7 +1251,7 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, whenWaitUntilCompletionCalledThenW uint32_t gpgpuTaskCount = 123; uint32_t bcsTaskCount = 123; - CopyEngineState bcsState{bcsTaskCount, bcsCsr->getOsContext().getEngineType()}; + CopyEngineState bcsState{bcsCsr->getOsContext().getEngineType(), bcsTaskCount}; commandQueue->waitUntilComplete(gpgpuTaskCount, Range{&bcsState}, 0, false); EXPECT_EQ(gpgpuTaskCount, static_cast *>(gpgpuCsr)->latestWaitForCompletionWithTimeoutTaskCount.load()); diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index 5a20e8da0e..ba48fbac21 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -55,7 +55,7 @@ HWTEST_F(CommandQueueHwTest, whenCallingIsCompletedThenTestTaskCountValue) { bcsCsr->setupContext(*osContext); bcsCsr->initializeTagAllocation(); EngineControl control(bcsCsr.get(), osContext.get()); - CopyEngineState state{1, aub_stream::EngineType::ENGINE_BCS, false}; + CopyEngineState state{aub_stream::EngineType::ENGINE_BCS, 1, false}; MockCommandQueueHw cmdQ(context, pClDevice, nullptr); diff --git a/opencl/test/unit_test/command_stream/cl_tbx_command_stream_tests.cpp b/opencl/test/unit_test/command_stream/cl_tbx_command_stream_tests.cpp index 21163d374b..d86fd17bd4 100644 --- a/opencl/test/unit_test/command_stream/cl_tbx_command_stream_tests.cpp +++ b/opencl/test/unit_test/command_stream/cl_tbx_command_stream_tests.cpp @@ -47,7 +47,7 @@ HWTEST_F(ClTbxCommandStreamTests, givenTbxCsrWhenDispatchBlitEnqueueThenProcessC cmdQ.clearBcsEngines(); cmdQ.bcsEngines[0] = &engineControl1; - cmdQ.bcsStates[0] = {0, aub_stream::ENGINE_BCS, false}; + cmdQ.bcsStates[0] = {aub_stream::ENGINE_BCS, 0, false}; cl_int error = CL_SUCCESS; std::unique_ptr buffer(Buffer::create(&context, 0, 1, nullptr, error)); diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 4bdae93163..dfebab1a52 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -619,7 +619,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenAllBcsEnginesReadyWhenWaitingForEventThe ultCsr2.initializeTagAllocation(); ultCsr2.setupContext(osContext); - CopyEngineState copyEngineState = {2, aub_stream::EngineType::ENGINE_BCS2, false}; + CopyEngineState copyEngineState = {aub_stream::EngineType::ENGINE_BCS2, 2, false}; EngineControl engineControl = {&ultCsr2, &osContext}; auto bcs2Index = EngineHelpers::getBcsIndex(aub_stream::EngineType::ENGINE_BCS2); mockCmdQ->bcsStates[bcs2Index] = copyEngineState; diff --git a/opencl/test/unit_test/mem_obj/image_tests.cpp b/opencl/test/unit_test/mem_obj/image_tests.cpp index 144d84e145..91f763ff1b 100644 --- a/opencl/test/unit_test/mem_obj/image_tests.cpp +++ b/opencl/test/unit_test/mem_obj/image_tests.cpp @@ -1559,7 +1559,7 @@ TEST(ImageConvertDescriptorTest, givenClImageDescWhenConvertedThenCorrectImageDe } TEST(ImageConvertDescriptorTest, givenImageDescriptorWhenConvertedThenCorrectClImageDescIsReturned) { - ImageDescriptor desc = {16, 24, 1, 1, 1024, 2048, ImageType::image2D, 1, 3, false}; + ImageDescriptor desc = {ImageType::image2D, 16, 24, 1, 1, 1024, 2048, 1, 3, false}; auto clDesc = Image::convertDescriptor(desc); EXPECT_EQ(clDesc.image_type, static_cast(CL_MEM_OBJECT_IMAGE2D)); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 04f75813a3..1b4de4f415 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -25,10 +25,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; struct ImmediateFlushData { - void *endPtr = nullptr; - size_t estimatedSize = 0; - size_t csrStartOffset = 0; PipelineSelectArgs pipelineSelectArgs{}; + size_t estimatedSize = 0; + void *endPtr = nullptr; + size_t csrStartOffset = 0; bool pipelineSelectFullConfigurationNeeded = false; bool pipelineSelectDirty = false; diff --git a/shared/source/command_stream/command_stream_receiver_simulated_common_hw.h b/shared/source/command_stream/command_stream_receiver_simulated_common_hw.h index 405d810845..b0b745d18b 100644 --- a/shared/source/command_stream/command_stream_receiver_simulated_common_hw.h +++ b/shared/source/command_stream/command_stream_receiver_simulated_common_hw.h @@ -81,12 +81,12 @@ class CommandStreamReceiverSimulatedCommonHw : public CommandStreamReceiverHw { diff --git a/shared/source/device_binary_format/zebin/zeinfo.h b/shared/source/device_binary_format/zebin/zeinfo.h index 3a923f7133..4f7585d75b 100644 --- a/shared/source/device_binary_format/zebin/zeinfo.h +++ b/shared/source/device_binary_format/zebin/zeinfo.h @@ -634,12 +634,12 @@ inline constexpr BtiValueT btiValue = -1; } // namespace Defaults struct PayloadArgumentBaseT { + ArgTypeT argType = argTypeUnknown; OffsetT offset = Defaults::offset; SourceOffseT sourceOffset = Defaults::sourceOffset; SizeT size = 0; ArgIndexT argIndex = Defaults::argIndex; BtiValueT btiValue = Defaults::btiValue; - ArgTypeT argType = argTypeUnknown; AddrmodeT addrmode = memoryAddressingModeUnknown; AddrspaceT addrspace = addressSpaceUnknown; AccessTypeT accessType = accessTypeUnknown; @@ -692,9 +692,9 @@ inline constexpr Slot slot = 0U; } // namespace Defaults struct PerThreadMemoryBufferBaseT { - SizeT size = 0U; AllocationType allocationType = AllocationTypeUnknown; MemoryUsageT memoryUsage = MemoryUsageUnknown; + SizeT size = 0U; IsSimtThreadT isSimtThread = Defaults::isSimtThread; Slot slot = Defaults::slot; }; @@ -732,8 +732,8 @@ inline constexpr NormalizedT normalized = false; struct InlineSamplerBaseT { SamplerIndexT samplerIndex = Defaults::samplerIndex; - FilterModeT filterMode = Defaults::filterMode; AddrModeT addrMode = Defaults::addrMode; + FilterModeT filterMode = Defaults::filterMode; NormalizedT normalized = Defaults::normalized; }; } // namespace InlineSamplers diff --git a/shared/source/helpers/surface_format_info.h b/shared/source/helpers/surface_format_info.h index 0d94bf5faf..f76705a019 100644 --- a/shared/source/helpers/surface_format_info.h +++ b/shared/source/helpers/surface_format_info.h @@ -218,13 +218,13 @@ enum class ImageType { }; struct ImageDescriptor { + ImageType imageType; size_t imageWidth; size_t imageHeight; size_t imageDepth; size_t imageArraySize; size_t imageRowPitch; size_t imageSlicePitch; - ImageType imageType; uint32_t numMipLevels; uint32_t numSamples; bool fromParent; diff --git a/shared/source/memory_manager/host_ptr_defines.h b/shared/source/memory_manager/host_ptr_defines.h index af8879bec0..62f57f9ecc 100644 --- a/shared/source/memory_manager/host_ptr_defines.h +++ b/shared/source/memory_manager/host_ptr_defines.h @@ -51,9 +51,9 @@ struct AllocationRequirements { struct FragmentStorage { const void *fragmentCpuPointer = nullptr; size_t fragmentSize = 0; + int refCount = 0; OsHandle *osInternalStorage = nullptr; ResidencyData *residency = nullptr; - int refCount = 0; bool driverAllocation = false; }; diff --git a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h index 48b085393b..a9027656d6 100644 --- a/shared/source/os_interface/linux/xe/ioctl_helper_xe.h +++ b/shared/source/os_interface/linux/xe/ioctl_helper_xe.h @@ -190,9 +190,9 @@ class IoctlHelperXe : public IoctlHelper { std::unique_ptr defaultEngine; struct DebugMetadata { + DrmResourceClass type; uint64_t offset; uint64_t size; - DrmResourceClass type; bool isCookie; };