From 18ccd448f2b07d93e02e9bfe2d675006c0c3c33c Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 1 Oct 2020 11:59:59 +0200 Subject: [PATCH] Unify programming of semaphore command Change-Id: Iae9060935554df366d9687e9f16c3b5dce9155ee Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 31 +++++++++---------- .../source/helpers/hardware_commands_helper.h | 5 --- .../helpers/hardware_commands_helper_base.inl | 17 ---------- .../hardware_commands_helper_tests.cpp | 22 ------------- .../helpers/blit_commands_helper_base.inl | 3 +- shared/source/helpers/timestamp_packet.h | 3 +- 6 files changed, 16 insertions(+), 65 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 9af9cc90cc..c061b52980 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -22,8 +22,6 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" -#include "opencl/source/helpers/hardware_commands_helper.h" - #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/event/event.h" @@ -882,13 +880,13 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d size_t srcSize = 0; if (srcRegion->depth > 1) { - uint hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch) + ((dstRegion->originZ) * dstSlicePitch); - uint hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch) + ((srcRegion->originZ) * srcSlicePitch); + uint32_t hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch) + ((dstRegion->originZ) * dstSlicePitch); + uint32_t hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch) + ((srcRegion->originZ) * srcSlicePitch); dstSize = (dstRegion->width * dstRegion->height * dstRegion->depth) + hostPtrDstOffset; srcSize = (srcRegion->width * srcRegion->height * srcRegion->depth) + hostPtrSrcOffset; } else { - uint hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch); - uint hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch); + uint32_t hostPtrDstOffset = dstRegion->originX + ((dstRegion->originY) * dstPitch); + uint32_t hostPtrSrcOffset = srcRegion->originX + ((srcRegion->originY) * srcPitch); dstSize = (dstRegion->width * dstRegion->height) + hostPtrDstOffset; srcSize = (srcRegion->width * srcRegion->height) + hostPtrSrcOffset; } @@ -976,10 +974,10 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernel3d(NEO:: ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, srcRegion->depth / groupSizeZ}; - uint srcOrigin[3] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY), (srcRegion->originZ)}; - uint dstOrigin[3] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY), (dstRegion->originZ)}; - uint srcPitches[2] = {(srcPitch), (srcSlicePitch)}; - uint dstPitches[2] = {(dstPitch), (dstSlicePitch)}; + uint32_t srcOrigin[3] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY), (srcRegion->originZ)}; + uint32_t dstOrigin[3] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY), (dstRegion->originZ)}; + uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)}; + uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)}; auto dstValPtr = static_cast(dstGA->getGpuAddress()); auto srcValPtr = static_cast(srcGA->getGpuAddress()); @@ -1035,8 +1033,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernel2d(NEO:: ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, 1u}; - uint srcOrigin[2] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY)}; - uint dstOrigin[2] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY)}; + uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY)}; + uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY)}; auto dstValPtr = static_cast(dstGA->getGpuAddress()); auto srcValPtr = static_cast(srcGA->getGpuAddress()); @@ -1374,11 +1372,10 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu if (event->isTimestampEvent) { gpuAddr += offsetof(KernelTimestampEvent, contextEnd); } - - NEO::HardwareCommandsHelper::programMiSemaphoreWait(*(commandContainer.getCommandStream()), - gpuAddr, - eventStateClear, - COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + NEO::EncodeSempahore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), + gpuAddr, + eventStateClear, + COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); bool dcFlushEnable = (!event->waitScope) ? false : true; if (dcFlushEnable) { diff --git a/opencl/source/helpers/hardware_commands_helper.h b/opencl/source/helpers/hardware_commands_helper.h index d192964bf7..b2447493b3 100644 --- a/opencl/source/helpers/hardware_commands_helper.h +++ b/opencl/source/helpers/hardware_commands_helper.h @@ -142,11 +142,6 @@ struct HardwareCommandsHelper : public PerThreadDataHelper { WALKER_TYPE *walkerCmd, uint32_t &interfaceDescriptorIndex); - static void programMiSemaphoreWait(LinearStream &commandStream, - uint64_t compareAddress, - uint32_t compareData, - COMPARE_OPERATION compareMode); - static void programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static void programMiAtomic(MI_ATOMIC &atomic, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, typename MI_ATOMIC::DATA_SIZE dataSize); static void programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress); diff --git a/opencl/source/helpers/hardware_commands_helper_base.inl b/opencl/source/helpers/hardware_commands_helper_base.inl index 14de9568cb..536e728fc4 100644 --- a/opencl/source/helpers/hardware_commands_helper_base.inl +++ b/opencl/source/helpers/hardware_commands_helper_base.inl @@ -382,23 +382,6 @@ void HardwareCommandsHelper::updatePerThreadDataTotal( DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group } -template -void HardwareCommandsHelper::programMiSemaphoreWait(LinearStream &commandStream, - uint64_t compareAddress, - uint32_t compareData, - COMPARE_OPERATION compareMode) { - using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; - - auto miSemaphoreCmd = commandStream.getSpaceForCmd(); - MI_SEMAPHORE_WAIT cmd = GfxFamily::cmdInitMiSemaphoreWait; - - cmd.setCompareOperation(compareMode); - cmd.setSemaphoreDataDword(compareData); - cmd.setSemaphoreGraphicsAddress(compareAddress); - cmd.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); - *miSemaphoreCmd = cmd; -} - template void HardwareCommandsHelper::programMiAtomic(LinearStream &commandStream, uint64_t writeAddress, typename MI_ATOMIC::ATOMIC_OPCODES opcode, diff --git a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp index e94aff5834..a51e16c4e5 100644 --- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp @@ -1053,28 +1053,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd using HardwareCommandsHelperTests = ::testing::Test; -HWTEST_F(HardwareCommandsHelperTests, givenCompareAddressAndDataWhenProgrammingSemaphoreWaitThenSetupAllFields) { - using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; - using COMPARE_OPERATION = typename FamilyType::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; - - uint64_t compareAddress = 0x10000; - uint32_t compareData = 1234; - - uint8_t buffer[1024] = {}; - LinearStream cmdStream(buffer, 1024); - - MI_SEMAPHORE_WAIT referenceCommand = FamilyType::cmdInitMiSemaphoreWait; - referenceCommand.setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); - referenceCommand.setSemaphoreDataDword(compareData); - referenceCommand.setSemaphoreGraphicsAddress(compareAddress); - referenceCommand.setWaitMode(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); - - COMPARE_OPERATION compareMode = COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD; - HardwareCommandsHelper::programMiSemaphoreWait(cmdStream, compareAddress, compareData, compareMode); - EXPECT_EQ(sizeof(MI_SEMAPHORE_WAIT), cmdStream.getUsed()); - EXPECT_EQ(0, memcmp(&referenceCommand, buffer, sizeof(MI_SEMAPHORE_WAIT))); -} - HWTEST_F(HardwareCommandsHelperTests, whenProgrammingMiAtomicThenSetupAllFields) { using MI_ATOMIC = typename FamilyType::MI_ATOMIC; uint64_t writeAddress = 0x10000; diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 99641bf71e..4681675650 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -251,8 +251,7 @@ void BlitCommandsHelper::dispatchDebugPauseCommands(LinearStream &com EncodeMiFlushDW::programMiFlushDw(commandStream, debugPauseStateGPUAddress, static_cast(confirmationTrigger), false, true); - auto miSemaphoreCmd = commandStream.getSpaceForCmd(); - EncodeSempahore::programMiSemaphoreWait(miSemaphoreCmd, debugPauseStateGPUAddress, static_cast(waitCondition), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); + EncodeSempahore::addMiSemaphoreWaitCommand(commandStream, debugPauseStateGPUAddress, static_cast(waitCondition), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD); } template diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index a6332b72d8..14018836d0 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -131,8 +131,7 @@ struct TimestampPacketHelper { for (uint32_t packetId = 0; packetId < timestampPacketNode.tagForCpuAccess->packetsUsed; packetId++) { uint64_t compareOffset = packetId * sizeof(TimestampPacketStorage::Packet); - auto miSemaphoreCmd = cmdStream.getSpaceForCmd(); - EncodeSempahore::programMiSemaphoreWait(miSemaphoreCmd, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + EncodeSempahore::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); } bool trackPostSyncDependencies = true;