From 8f91fcdd731663f0834a2c5d92c90786857d8516 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 4 Mar 2021 17:54:14 +0000 Subject: [PATCH] Add new atomic operation Related-To: NEO-5244 Signed-off-by: Zbigniew Zdanowicz --- opencl/source/gen8/device_queue_gen8.cpp | 2 +- opencl/source/gen9/device_queue_gen9.cpp | 2 +- .../aub_tests/command_stream/CMakeLists.txt | 1 + .../command_stream/aub_mi_atomic_tests.cpp | 147 ++++++++++++++++++ .../test/unit_test/mocks/mock_device_queue.h | 2 +- .../command_container/command_encoder.h | 11 +- .../command_container/command_encoder.inl | 32 +++- .../gen11/hw_cmds_generated_gen11.inl | 1 + .../gen12lp/hw_cmds_generated_gen12lp.inl | 1 + .../generated/gen8/hw_cmds_generated_gen8.inl | 1 + .../generated/gen9/hw_cmds_generated_gen9.inl | 1 + shared/source/helpers/timestamp_packet.h | 2 +- .../unit_test/encoders/test_encode_atomic.cpp | 61 +++++++- 13 files changed, 249 insertions(+), 15 deletions(-) create mode 100644 opencl/test/unit_test/aub_tests/command_stream/aub_mi_atomic_tests.cpp diff --git a/opencl/source/gen8/device_queue_gen8.cpp b/opencl/source/gen8/device_queue_gen8.cpp index 86283d45ad..b224d84eec 100644 --- a/opencl/source/gen8/device_queue_gen8.cpp +++ b/opencl/source/gen8/device_queue_gen8.cpp @@ -38,7 +38,7 @@ void DeviceQueueHw::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) { atomicOpPlaceholder, Family::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT, Family::MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, - 0x1u, 0x1u); + 0x1u, 0x1u, 0x0u, 0x0u); } template <> diff --git a/opencl/source/gen9/device_queue_gen9.cpp b/opencl/source/gen9/device_queue_gen9.cpp index 13f57c056a..6514758b3c 100644 --- a/opencl/source/gen9/device_queue_gen9.cpp +++ b/opencl/source/gen9/device_queue_gen9.cpp @@ -44,7 +44,7 @@ void DeviceQueueHw::addMiAtomicCmdWa(uint64_t atomicOpPlaceholder) { atomicOpPlaceholder, Family::MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT, Family::MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, - 0x1u, 0x1u); + 0x1u, 0x1u, 0x0u, 0x0u); } template <> diff --git a/opencl/test/unit_test/aub_tests/command_stream/CMakeLists.txt b/opencl/test/unit_test/aub_tests/command_stream/CMakeLists.txt index 09d2b7afc4..a155938df6 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/CMakeLists.txt +++ b/opencl/test/unit_test/aub_tests/command_stream/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources(igdrcl_aub_tests PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/aub_command_stream_tests.cpp ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests.h ${CMAKE_CURRENT_SOURCE_DIR}/aub_mem_dump_tests.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/aub_mi_atomic_tests.cpp ) add_subdirectories() diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_mi_atomic_tests.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_mi_atomic_tests.cpp new file mode 100644 index 0000000000..3ca787f476 --- /dev/null +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_mi_atomic_tests.cpp @@ -0,0 +1,147 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/test/common/helpers/dispatch_flags_helper.h" + +#include "opencl/source/command_stream/aub_command_stream_receiver_hw.h" +#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h" +#include "test.h" + +#include + +using namespace NEO; + +struct MiAtomicAubFixture : public AUBFixture { + void SetUp() { + AUBFixture::SetUp(nullptr); + auto memoryManager = this->device->getMemoryManager(); + + AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(), + true, + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::COMMAND_BUFFER, + false, + device->getDeviceBitfield()}; + streamAllocation = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties); + ASSERT_NE(nullptr, streamAllocation); + + AllocationProperties deviceBufferProperties = {device->getRootDeviceIndex(), + true, + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::BUFFER, + false, + device->getDeviceBitfield()}; + deviceSurface = memoryManager->allocateGraphicsMemoryWithProperties(deviceBufferProperties); + ASSERT_NE(nullptr, deviceSurface); + + AllocationProperties systemBufferProperties = {device->getRootDeviceIndex(), + true, + MemoryConstants::pageSize, + GraphicsAllocation::AllocationType::SVM_CPU, + false, + device->getDeviceBitfield()}; + systemSurface = memoryManager->allocateGraphicsMemoryWithProperties(systemBufferProperties); + ASSERT_NE(nullptr, systemSurface); + + taskStream.replaceGraphicsAllocation(streamAllocation); + taskStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), + streamAllocation->getUnderlyingBufferSize()); + } + + void TearDown() { + auto memoryManager = this->device->getMemoryManager(); + memoryManager->freeGraphicsMemory(streamAllocation); + memoryManager->freeGraphicsMemory(deviceSurface); + memoryManager->freeGraphicsMemory(systemSurface); + + AUBFixture::TearDown(); + } + + void flushStream() { + DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + dispatchFlags.guardCommandBufferWithPipeControl = true; + + csr->makeResident(*deviceSurface); + csr->makeResident(*systemSurface); + csr->flushTask(taskStream, 0, + csr->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u), + csr->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u), + csr->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u), + 0u, dispatchFlags, device->getDevice()); + + csr->flushBatchedSubmissions(); + } + + LinearStream taskStream; + GraphicsAllocation *streamAllocation = nullptr; + GraphicsAllocation *deviceSurface = nullptr; + GraphicsAllocation *systemSurface = nullptr; +}; + +using MiAtomicAubTest = Test; + +HWTEST_F(MiAtomicAubTest, WhenDispatchingAtomicMoveOperationThenExpectCorrectEndValues) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + auto atomicAddress = deviceSurface->getGpuAddress(); + + auto expectedGpuAddressDwordOp1 = atomicAddress; + auto expectedGpuAddressDwordOp2 = expectedGpuAddressDwordOp1 + sizeof(uint32_t); + auto expectedGpuAddressQwordOp3 = expectedGpuAddressDwordOp2 + sizeof(uint32_t); + + uint32_t operation1dword0 = 0x10; + EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp1, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, operation1dword0, 0u); + + uint32_t operation2dword0 = 0x22; + EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp2, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, operation2dword0, 0u); + + uint32_t operation3dword0 = 0xF0; + uint32_t operation3dword1 = 0x1F; + EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressQwordOp3, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_MOVE, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, + 0, 0, operation3dword0, operation3dword1); + uint64_t operation3qword = (static_cast(operation3dword1) << 32) | operation3dword0; + + flushStream(); + + expectMemory(reinterpret_cast(expectedGpuAddressDwordOp1), &operation1dword0, sizeof(operation1dword0)); + expectMemory(reinterpret_cast(expectedGpuAddressDwordOp2), &operation2dword0, sizeof(operation2dword0)); + expectMemory(reinterpret_cast(expectedGpuAddressQwordOp3), &operation3qword, sizeof(operation3qword)); +} + +HWTEST_F(MiAtomicAubTest, GivenSystemMemoryWhenDispatchingAtomicMove4BytesOperationThenExpectCorrectEndValues) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + + auto atomicAddress = systemSurface->getGpuAddress(); + + auto expectedGpuAddressDwordOp1 = atomicAddress; + auto expectedGpuAddressDwordOp2 = expectedGpuAddressDwordOp1 + sizeof(uint32_t); + + uint32_t operation1dword0 = 0x15; + EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp1, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, operation1dword0, 0u); + + uint32_t operation2dword0 = 0xFF; + EncodeAtomic::programMiAtomic(taskStream, expectedGpuAddressDwordOp2, + MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_MOVE, + MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, + 0, 0, operation2dword0, 0u); + + flushStream(); + + expectMemory(reinterpret_cast(expectedGpuAddressDwordOp1), &operation1dword0, sizeof(operation1dword0)); + expectMemory(reinterpret_cast(expectedGpuAddressDwordOp2), &operation2dword0, sizeof(operation2dword0)); +} diff --git a/opencl/test/unit_test/mocks/mock_device_queue.h b/opencl/test/unit_test/mocks/mock_device_queue.h index 9bd7413073..468eaf4c98 100644 --- a/opencl/test/unit_test/mocks/mock_device_queue.h +++ b/opencl/test/unit_test/mocks/mock_device_queue.h @@ -115,7 +115,7 @@ class MockDeviceQueueHw : public DeviceQueueHw { placeholder, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_8B_INCREMENT, MI_ATOMIC::DATA_SIZE::DATA_SIZE_QWORD, - 0x1u, 0x1u); + 0x1u, 0x1u, 0x0u, 0x0u); return miAtomic; } diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 466a4cab20..cbf6cd0652 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -318,14 +318,21 @@ struct EncodeAtomic { ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, - uint32_t csStall); + uint32_t csStall, + uint32_t operand1dword0, + uint32_t operand1dword1); static void programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, - uint32_t csStall); + uint32_t csStall, + uint32_t operand1dword0, + uint32_t operand1dword1); + + static void setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t writeAddress); + static uint64_t getMiAtomicAddress(MI_ATOMIC &atomic); }; template diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 471c42699a..e688a65c7d 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -569,20 +569,40 @@ size_t EncodeSempahore::getSizeMiSemaphoreWait() { return sizeof(MI_SEMAPHORE_WAIT); } +template +void EncodeAtomic::setMiAtomicAddress(MI_ATOMIC &atomic, uint64_t writeAddress) { + atomic.setMemoryAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); + atomic.setMemoryAddressHigh(static_cast(writeAddress >> 32)); +} + +template +uint64_t EncodeAtomic::getMiAtomicAddress(MI_ATOMIC &atomic) { + uint64_t address = (static_cast(atomic.getMemoryAddressHigh()) << 32) | (atomic.getMemoryAddress()); + return address; +} + template void EncodeAtomic::programMiAtomic(MI_ATOMIC *atomic, uint64_t writeAddress, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, - uint32_t csStall) { + uint32_t csStall, + uint32_t operand1dword0, + uint32_t operand1dword1) { MI_ATOMIC cmd = Family::cmdInitAtomic; cmd.setAtomicOpcode(opcode); cmd.setDataSize(dataSize); - cmd.setMemoryAddress(static_cast(writeAddress & 0x0000FFFFFFFFULL)); - cmd.setMemoryAddressHigh(static_cast(writeAddress >> 32)); + EncodeAtomic::setMiAtomicAddress(cmd, writeAddress); cmd.setReturnDataControl(returnDataControl); cmd.setCsStall(csStall); + if (opcode == ATOMIC_OPCODES::ATOMIC_4B_MOVE || + opcode == ATOMIC_OPCODES::ATOMIC_8B_MOVE) { + cmd.setDwordLength(MI_ATOMIC::DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1); + cmd.setInlineData(0x1); + cmd.setOperand1DataDword0(operand1dword0); + cmd.setOperand1DataDword1(operand1dword1); + } *atomic = cmd; } @@ -593,9 +613,11 @@ void EncodeAtomic::programMiAtomic(LinearStream &commandStream, ATOMIC_OPCODES opcode, DATA_SIZE dataSize, uint32_t returnDataControl, - uint32_t csStall) { + uint32_t csStall, + uint32_t operand1dword0, + uint32_t operand1dword1) { auto miAtomic = commandStream.getSpaceForCmd(); - EncodeAtomic::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall); + EncodeAtomic::programMiAtomic(miAtomic, writeAddress, opcode, dataSize, returnDataControl, csStall, operand1dword0, operand1dword1); } template diff --git a/shared/source/generated/gen11/hw_cmds_generated_gen11.inl b/shared/source/generated/gen11/hw_cmds_generated_gen11.inl index cc458b388b..a93bf705e2 100644 --- a/shared/source/generated/gen11/hw_cmds_generated_gen11.inl +++ b/shared/source/generated/gen11/hw_cmds_generated_gen11.inl @@ -870,6 +870,7 @@ typedef struct tagMI_ATOMIC { ATOMIC_4B_MOVE = 0x4, ATOMIC_4B_INCREMENT = 0x5, ATOMIC_4B_DECREMENT = 0x6, + ATOMIC_8B_MOVE = 0x24, ATOMIC_8B_INCREMENT = 0x25, ATOMIC_8B_DECREMENT = 0x26, } ATOMIC_OPCODES; diff --git a/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl b/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl index b17aac5965..8b41fe2b82 100644 --- a/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl +++ b/shared/source/generated/gen12lp/hw_cmds_generated_gen12lp.inl @@ -907,6 +907,7 @@ typedef struct tagMI_ATOMIC { ATOMIC_4B_MOVE = 0x4, ATOMIC_4B_INCREMENT = 0x5, ATOMIC_4B_DECREMENT = 0x6, + ATOMIC_8B_MOVE = 0x24, ATOMIC_8B_INCREMENT = 0x25, ATOMIC_8B_DECREMENT = 0x26, } ATOMIC_OPCODES; diff --git a/shared/source/generated/gen8/hw_cmds_generated_gen8.inl b/shared/source/generated/gen8/hw_cmds_generated_gen8.inl index a75be872b5..7f0d8b623b 100644 --- a/shared/source/generated/gen8/hw_cmds_generated_gen8.inl +++ b/shared/source/generated/gen8/hw_cmds_generated_gen8.inl @@ -1008,6 +1008,7 @@ typedef struct tagMI_ATOMIC { ATOMIC_4B_MOVE = 0x4, ATOMIC_4B_INCREMENT = 0x5, ATOMIC_4B_DECREMENT = 0x6, + ATOMIC_8B_MOVE = 0x24, ATOMIC_8B_INCREMENT = 0x25, ATOMIC_8B_DECREMENT = 0x26, } ATOMIC_OPCODES; diff --git a/shared/source/generated/gen9/hw_cmds_generated_gen9.inl b/shared/source/generated/gen9/hw_cmds_generated_gen9.inl index 37a393f0f0..19d5733103 100644 --- a/shared/source/generated/gen9/hw_cmds_generated_gen9.inl +++ b/shared/source/generated/gen9/hw_cmds_generated_gen9.inl @@ -1005,6 +1005,7 @@ typedef struct tagMI_ATOMIC { ATOMIC_4B_MOVE = 0x4, ATOMIC_4B_INCREMENT = 0x5, ATOMIC_4B_DECREMENT = 0x6, + ATOMIC_8B_MOVE = 0x24, ATOMIC_8B_INCREMENT = 0x25, ATOMIC_8B_DECREMENT = 0x26, } ATOMIC_OPCODES; diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index 4536879839..a5672631c9 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -178,7 +178,7 @@ struct TimestampPacketHelper { EncodeAtomic::programMiAtomic(cmdStream, dependenciesCountAddress, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD, - 0u, 0u); + 0u, 0u, 0x0u, 0x0u); } } diff --git a/shared/test/unit_test/encoders/test_encode_atomic.cpp b/shared/test/unit_test/encoders/test_encode_atomic.cpp index 53e4b4997f..5c40f0d6a4 100644 --- a/shared/test/unit_test/encoders/test_encode_atomic.cpp +++ b/shared/test/unit_test/encoders/test_encode_atomic.cpp @@ -21,21 +21,74 @@ HWTEST_F(CommandEncodeAtomic, WhenProgrammingMiAtomicThenExpectAllFieldsSetCorre constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; - + uint64_t address = static_cast(0x123400); LinearStream cmdbuffer(buffer, bufferSize); EncodeAtomic::programMiAtomic(cmdbuffer, - static_cast(0x123400), + address, ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, DATA_SIZE::DATA_SIZE_DWORD, 0x1u, - 0x1u); + 0x1u, + 0x0u, + 0x0u); MI_ATOMIC *miAtomicCmd = reinterpret_cast(cmdbuffer.getCpuBase()); EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_4B_DECREMENT, miAtomicCmd->getAtomicOpcode()); EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomicCmd->getDataSize()); - EXPECT_EQ(0x123400u, miAtomicCmd->getMemoryAddress()); + EXPECT_EQ(address, EncodeAtomic::getMiAtomicAddress(*miAtomicCmd)); EXPECT_EQ(0x1u, miAtomicCmd->getReturnDataControl()); EXPECT_EQ(0x1u, miAtomicCmd->getCsStall()); } + +HWTEST_F(CommandEncodeAtomic, WhenProgrammingMiAtomicMoveOperationThenExpectInlineDataSet) { + using MI_ATOMIC = typename FamilyType::MI_ATOMIC; + using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES; + using DATA_SIZE = typename FamilyType::MI_ATOMIC::DATA_SIZE; + using DWORD_LENGTH = typename FamilyType::MI_ATOMIC::DWORD_LENGTH; + + constexpr size_t bufferSize = 128u; + uint8_t buffer[bufferSize]; + uint64_t address = (static_cast(3) << 32) + 0x123400; + LinearStream cmdbuffer(buffer, bufferSize); + + EncodeAtomic::programMiAtomic(cmdbuffer, + address, + ATOMIC_OPCODES::ATOMIC_4B_MOVE, + DATA_SIZE::DATA_SIZE_DWORD, + 0x0u, + 0x0u, + 0x10u, + 0x20u); + + EncodeAtomic::programMiAtomic(cmdbuffer, + address, + ATOMIC_OPCODES::ATOMIC_8B_MOVE, + DATA_SIZE::DATA_SIZE_QWORD, + 0x0u, + 0x0u, + 0x20u, + 0x1fu); + + MI_ATOMIC *miAtomicCmd = reinterpret_cast(cmdbuffer.getCpuBase()); + + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_4B_MOVE, miAtomicCmd->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_DWORD, miAtomicCmd->getDataSize()); + EXPECT_EQ(address, EncodeAtomic::getMiAtomicAddress(*miAtomicCmd)); + EXPECT_EQ(0x0u, miAtomicCmd->getReturnDataControl()); + EXPECT_EQ(DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1, miAtomicCmd->getDwordLength()); + EXPECT_EQ(0x1u, miAtomicCmd->getInlineData()); + EXPECT_EQ(0x10u, miAtomicCmd->getOperand1DataDword0()); + EXPECT_EQ(0x20u, miAtomicCmd->getOperand1DataDword1()); + + miAtomicCmd++; + EXPECT_EQ(ATOMIC_OPCODES::ATOMIC_8B_MOVE, miAtomicCmd->getAtomicOpcode()); + EXPECT_EQ(DATA_SIZE::DATA_SIZE_QWORD, miAtomicCmd->getDataSize()); + EXPECT_EQ(address, EncodeAtomic::getMiAtomicAddress(*miAtomicCmd)); + EXPECT_EQ(0x0u, miAtomicCmd->getReturnDataControl()); + EXPECT_EQ(DWORD_LENGTH::DWORD_LENGTH_INLINE_DATA_1, miAtomicCmd->getDwordLength()); + EXPECT_EQ(0x1u, miAtomicCmd->getInlineData()); + EXPECT_EQ(0x20u, miAtomicCmd->getOperand1DataDword0()); + EXPECT_EQ(0x1fu, miAtomicCmd->getOperand1DataDword1()); +}