mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add command stream aub tests
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
bf7b2674a5
commit
6ab3d73744
@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@ -14,4 +14,17 @@ target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_mi_atomic_tests.cpp
|
||||
)
|
||||
|
||||
if(TESTS_XEHP_AND_LATER)
|
||||
target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_range_based_flush_tests_xehp_and_later.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_walker_partition_tests_xehp_and_later.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if(TESTS_DG2_AND_LATER)
|
||||
target_sources(igdrcl_aub_tests PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mi_math_aub_tests_dg2_and_later.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
add_subdirectories()
|
||||
|
@ -0,0 +1,231 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/cache_flush_xehp_and_later.inl"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/hello_world_fixture.h"
|
||||
#include "opencl/test/unit_test/helpers/cmd_buffer_validator.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_command_queue.h"
|
||||
#include "opencl/test/unit_test/mocks/mock_context.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
struct RangeBasedFlushTest : public KernelAUBFixture<SimpleKernelFixture>, public ::testing::Test {
|
||||
|
||||
void SetUp() override {
|
||||
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
|
||||
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
|
||||
KernelAUBFixture<SimpleKernelFixture>::SetUp();
|
||||
};
|
||||
|
||||
void TearDown() override {
|
||||
KernelAUBFixture<SimpleKernelFixture>::TearDown();
|
||||
}
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
DebugManagerStateRestore debugSettingsRestore;
|
||||
};
|
||||
|
||||
struct L3ControlSupportedMatcher {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
if constexpr (HwMapper<productFamily>::GfxProduct::supportsCmdSet(IGFX_XE_HP_CORE)) {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::l3ControlSupported;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(RangeBasedFlushTest, givenNoDcFlushInPipeControlWhenL3ControlFlushesCachesThenExpectFlushedCaches, L3ControlSupportedMatcher) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using WALKER = typename FamilyType::WALKER_TYPE;
|
||||
using L3_CONTROL = typename FamilyType::L3_CONTROL;
|
||||
using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE;
|
||||
|
||||
DebugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
|
||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
char bufferAMemory[bufferSize];
|
||||
char bufferBMemory[bufferSize];
|
||||
for (uint32_t i = 0; i < bufferSize / MemoryConstants::pageSize; ++i) {
|
||||
memset(bufferAMemory + i * MemoryConstants::pageSize, 1 + i, MemoryConstants::pageSize);
|
||||
memset(bufferBMemory + i * MemoryConstants::pageSize, 129 + i, MemoryConstants::pageSize);
|
||||
}
|
||||
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferAMemory, retVal));
|
||||
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
auto dstBuffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferBMemory, retVal));
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(),
|
||||
0, 0,
|
||||
bufferSize, numEventsInWaitList,
|
||||
eventWaitList, event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
L3RangesVec ranges;
|
||||
ranges.push_back(L3Range::fromAddressSizeWithPolicy(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), MemoryConstants::pageSize,
|
||||
L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION));
|
||||
size_t requiredSize = getSizeNeededToFlushGpuCache<FamilyType>(ranges, false) + 2 * sizeof(PIPE_CONTROL);
|
||||
LinearStream &l3FlushCmdStream = pCmdQ->getCS(requiredSize);
|
||||
auto offset = l3FlushCmdStream.getUsed();
|
||||
auto pcBeforeFlush = l3FlushCmdStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pcBeforeFlush = FamilyType::cmdInitPipeControl;
|
||||
|
||||
flushGpuCache<FamilyType>(&l3FlushCmdStream, ranges, 0U, device->getHardwareInfo());
|
||||
|
||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
flags.blocking = true;
|
||||
|
||||
DebugManager.flags.DisableDcFlushInEpilogue.set(true);
|
||||
csr.flushTask(l3FlushCmdStream, offset,
|
||||
pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE, 0),
|
||||
pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT, 0),
|
||||
pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE, 0),
|
||||
pCmdQ->taskLevel,
|
||||
flags,
|
||||
pCmdQ->getDevice());
|
||||
|
||||
std::string err;
|
||||
|
||||
std::vector<MatchCmd *> expectedCommands{
|
||||
new MatchAnyCmd(AnyNumber),
|
||||
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
|
||||
new MatchHwCmd<FamilyType, L3_CONTROL>(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}),
|
||||
};
|
||||
if (MemorySynchronizationCommands<FamilyType>::isPipeControlWArequired(device->getHardwareInfo())) {
|
||||
expectedCommands.push_back(new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}));
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(device->getHardwareInfo()) > 0) {
|
||||
expectedCommands.push_back(new MatchHwCmd<FamilyType, MI_SEMAPHORE_WAIT>(1, Expects{EXPECT_MEMBER(MI_SEMAPHORE_WAIT, getSemaphoreDataDword, EncodeSempahore<FamilyType>::invalidHardwareTag)}));
|
||||
}
|
||||
}
|
||||
expectedCommands.push_back(new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}));
|
||||
expectedCommands.push_back(new MatchAnyCmd(AnyNumber));
|
||||
expectedCommands.push_back(new MatchHwCmd<FamilyType, PIPE_CONTROL>(0));
|
||||
|
||||
auto cmdBuffOk = expectCmdBuff<FamilyType>(l3FlushCmdStream, 0, std::move(expectedCommands), &err);
|
||||
EXPECT_TRUE(cmdBuffOk) << err;
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()),
|
||||
bufferAMemory, bufferSize);
|
||||
}
|
||||
|
||||
HWTEST2_F(RangeBasedFlushTest, givenL3ControlWhenPostSyncIsSetThenExpectPostSyncWrite, L3ControlSupportedMatcher) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using WALKER = typename FamilyType::WALKER_TYPE;
|
||||
using L3_CONTROL = typename FamilyType::L3_CONTROL;
|
||||
using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE;
|
||||
|
||||
if (MemorySynchronizationCommands<FamilyType>::isPipeControlWArequired(device->getHardwareInfo())) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
char bufferAMemory[bufferSize];
|
||||
char bufferBMemory[bufferSize];
|
||||
for (uint32_t i = 0; i < bufferSize / MemoryConstants::pageSize; ++i) {
|
||||
memset(bufferAMemory + i * MemoryConstants::pageSize, 1 + i, MemoryConstants::pageSize);
|
||||
memset(bufferBMemory + i * MemoryConstants::pageSize, 129 + i, MemoryConstants::pageSize);
|
||||
}
|
||||
|
||||
auto retVal = CL_INVALID_VALUE;
|
||||
auto srcBuffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferAMemory, retVal));
|
||||
|
||||
ASSERT_NE(nullptr, srcBuffer);
|
||||
auto dstBuffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferBMemory, retVal));
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
|
||||
auto postSyncBuffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
sizeof(uint64_t), bufferAMemory, retVal));
|
||||
ASSERT_NE(nullptr, dstBuffer);
|
||||
|
||||
uint64_t expectedPostSyncData = 0;
|
||||
|
||||
cl_uint numEventsInWaitList = 0;
|
||||
cl_event *eventWaitList = nullptr;
|
||||
cl_event *event = nullptr;
|
||||
|
||||
retVal = pCmdQ->enqueueCopyBuffer(srcBuffer.get(), dstBuffer.get(),
|
||||
0, 0,
|
||||
bufferSize, numEventsInWaitList,
|
||||
eventWaitList, event);
|
||||
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
L3RangesVec ranges;
|
||||
ranges.push_back(L3Range::fromAddressSizeWithPolicy(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(),
|
||||
MemoryConstants::pageSize, L3_FLUSH_ADDRESS_RANGE::L3_FLUSH_EVICTION_POLICY_FLUSH_L3_WITH_EVICTION));
|
||||
size_t requiredSize = getSizeNeededToFlushGpuCache<FamilyType>(ranges, true) + 2 * sizeof(PIPE_CONTROL);
|
||||
LinearStream &l3FlushCmdStream = pCmdQ->getCS(requiredSize);
|
||||
auto offset = l3FlushCmdStream.getUsed();
|
||||
auto pcBeforeFlush = l3FlushCmdStream.getSpaceForCmd<PIPE_CONTROL>();
|
||||
*pcBeforeFlush = FamilyType::cmdInitPipeControl;
|
||||
|
||||
flushGpuCache<FamilyType>(&l3FlushCmdStream, ranges, postSyncBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), device->getHardwareInfo());
|
||||
|
||||
auto &csr = pCmdQ->getGpgpuCommandStreamReceiver();
|
||||
auto flags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
flags.blocking = true;
|
||||
|
||||
DebugManager.flags.DisableDcFlushInEpilogue.set(true);
|
||||
csr.makeResident(*postSyncBuffer->getGraphicsAllocation(rootDeviceIndex));
|
||||
csr.flushTask(l3FlushCmdStream, offset,
|
||||
pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::DYNAMIC_STATE, 0),
|
||||
pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::INDIRECT_OBJECT, 0),
|
||||
pCmdQ->getIndirectHeap(NEO::IndirectHeap::Type::SURFACE_STATE, 0),
|
||||
pCmdQ->taskLevel,
|
||||
flags,
|
||||
pCmdQ->getDevice());
|
||||
|
||||
std::string err;
|
||||
auto cmdBuffOk = expectCmdBuff<FamilyType>(l3FlushCmdStream, 0,
|
||||
std::vector<MatchCmd *>{
|
||||
new MatchAnyCmd(AnyNumber),
|
||||
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}),
|
||||
new MatchHwCmd<FamilyType, L3_CONTROL>(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA)}),
|
||||
new MatchHwCmd<FamilyType, PIPE_CONTROL>(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), // epilogue
|
||||
new MatchAnyCmd(AnyNumber),
|
||||
new MatchHwCmd<FamilyType, PIPE_CONTROL>(0),
|
||||
},
|
||||
&err);
|
||||
EXPECT_TRUE(cmdBuffOk) << err;
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(dstBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()),
|
||||
bufferAMemory, bufferSize);
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(postSyncBuffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()),
|
||||
&expectedPostSyncData, sizeof(expectedPostSyncData));
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,516 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/test/common/helpers/dispatch_flags_helper.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/test/unit_test/aub_tests/fixtures/aub_fixture.h"
|
||||
#include "opencl/test/unit_test/fixtures/cl_device_fixture.h"
|
||||
|
||||
namespace NEO {
|
||||
enum class NewAluOpcodes : uint32_t {
|
||||
OPCODE_LOAD = 0x080,
|
||||
OPCODE_LOAD0 = 0x081,
|
||||
OPCODE_LOAD1 = 0x481,
|
||||
OPCODE_LOADIND = 0x082,
|
||||
OPCODE_STOREIND = 0x181,
|
||||
OPCODE_SHL = 0x105,
|
||||
OPCODE_SHR = 0x106,
|
||||
OPCODE_SAR = 0x107,
|
||||
OPCODE_FENCE = 0x001
|
||||
};
|
||||
|
||||
struct MiMath : public AUBFixture, public ::testing::Test {
|
||||
void SetUp() override {
|
||||
AUBFixture::SetUp(defaultHwInfo.get());
|
||||
|
||||
streamAllocation = this->device->getMemoryManager()->allocateGraphicsMemoryWithProperties({device->getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::COMMAND_BUFFER, device->getDeviceBitfield()});
|
||||
taskStream = std::make_unique<LinearStream>(streamAllocation);
|
||||
}
|
||||
void TearDown() override {
|
||||
this->device->getMemoryManager()->freeGraphicsMemory(streamAllocation);
|
||||
AUBFixture::TearDown();
|
||||
}
|
||||
|
||||
void flushStream() {
|
||||
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
dispatchFlags.guardCommandBufferWithPipeControl = true;
|
||||
|
||||
csr->flushTask(*taskStream, 0,
|
||||
csr->getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u),
|
||||
csr->getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u),
|
||||
csr->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
0u, dispatchFlags, device->getDevice());
|
||||
|
||||
csr->flushBatchedSubmissions();
|
||||
}
|
||||
uint32_t getPartOfGPUAddress(uint64_t address, bool lowPart) {
|
||||
constexpr uint32_t shift = 32u;
|
||||
constexpr uint32_t mask = 0xffffffff;
|
||||
if (lowPart) {
|
||||
return static_cast<uint32_t>(address & mask);
|
||||
} else {
|
||||
return static_cast<uint32_t>(address >> shift);
|
||||
}
|
||||
}
|
||||
template <typename FamilyType>
|
||||
void loadValueToRegister(int32_t value, int32_t reg) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
MI_LOAD_REGISTER_IMM cmd = FamilyType::cmdInitLoadRegisterImm;
|
||||
cmd.setDataDword(value);
|
||||
cmd.setRegisterOffset(reg);
|
||||
cmd.setMmioRemapEnable(1);
|
||||
auto buffer = taskStream->getSpace(sizeof(MI_LOAD_REGISTER_IMM));
|
||||
*static_cast<MI_LOAD_REGISTER_IMM *>(buffer) = cmd;
|
||||
}
|
||||
template <typename FamilyType>
|
||||
void storeValueInRegisterToMemory(int64_t address, int32_t reg) {
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
MI_STORE_REGISTER_MEM cmd2 = FamilyType::cmdInitStoreRegisterMem;
|
||||
cmd2.setRegisterAddress(reg);
|
||||
cmd2.setMemoryAddress(address);
|
||||
cmd2.setMmioRemapEnable(1);
|
||||
auto buffer2 = taskStream->getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
*static_cast<MI_STORE_REGISTER_MEM *>(buffer2) = cmd2;
|
||||
}
|
||||
template <typename FamilyType>
|
||||
void loadAddressToRegisters(uint32_t registerWithLowPart, uint32_t registerWithHighPart, uint32_t registerWithShift, uint64_t address) {
|
||||
loadValueToRegister<FamilyType>(getPartOfGPUAddress(address, true), registerWithLowPart); // low part to R0
|
||||
loadValueToRegister<FamilyType>(getPartOfGPUAddress(address, false), registerWithHighPart); // high part to R1
|
||||
loadValueToRegister<FamilyType>(32u, registerWithShift); // value to shift address
|
||||
}
|
||||
template <typename FamilyType>
|
||||
void loadAddressToMiMathAccu(uint32_t lowAddressRegister, uint32_t highAddressRegister, uint32_t shiftReg) {
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(numberOfOperationToLoadAddressToMiMathAccu * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load high part of address from register with older to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = highAddressRegister;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load 32 - value from shiftReg , to SRCB (to shift high part in register)
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = shiftReg;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SHL); // shift high part
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // move result to highAddressRegister
|
||||
pAluParam->DW0.BitField.Operand1 = highAddressRegister;
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load highAddressRegister to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = highAddressRegister;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load low part of address to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = lowAddressRegister;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_OR); // join parts of address and locate in ACCU
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
}
|
||||
|
||||
static constexpr size_t bufferSize = MemoryConstants::pageSize;
|
||||
const uint32_t numberOfOperationToLoadAddressToMiMathAccu = 7;
|
||||
std::unique_ptr<LinearStream> taskStream;
|
||||
GraphicsAllocation *streamAllocation = nullptr;
|
||||
};
|
||||
|
||||
using MatcherIsDg2OrPvc = IsWithinProducts<IGFX_DG2, IGFX_PVC>;
|
||||
|
||||
HWTEST2_F(MiMath, givenLoadIndirectFromMemoryWhenUseMiMathToSimpleOperationThenStoreStateOfRegisterInirectToMemory, MatcherIsDg2OrPvc) {
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
uint64_t bufferMemory[bufferSize] = {};
|
||||
bufferMemory[0] = 1u;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferMemory, retVal));
|
||||
ASSERT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
csr->makeResident(*allocation);
|
||||
|
||||
uint32_t valueToAdd = 5u;
|
||||
uint64_t valueAfterMiMathOperation = bufferMemory[0] + valueToAdd;
|
||||
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, allocation->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadValueToRegister<FamilyType>(valueToAdd, CS_GPR_R3);
|
||||
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.DwordLength = numberOfOperationToLoadAddressToMiMathAccu + 13 - 1;
|
||||
loadAddressToMiMathAccu<FamilyType>(static_cast<uint32_t>(AluRegisters::R_0), static_cast<uint32_t>(AluRegisters::R_1), static_cast<uint32_t>(AluRegisters::R_2)); // GPU address of buffer load to ACCU register
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(13 * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_LOADIND); // load dword from memory address located in ACCU
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_0);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // copy address from ACCU to R2
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // R0 to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_0);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // R3 to SRCB where is value of 'valueToAdd'
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_3);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_ADD); // do simple add on registers SRCA and SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // R3 to SRCB where is value of 'valueToAdd'
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_1);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load address from R2 where is copy of address to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_LOAD0);
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_ADD); // move address to ACCU
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_STOREIND); // store to memory from ACCU, value from register R1
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_1);
|
||||
pAluParam++;
|
||||
|
||||
flushStream();
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress()), &valueAfterMiMathOperation, sizeof(valueAfterMiMathOperation));
|
||||
}
|
||||
HWTEST2_F(MiMath, givenLoadIndirectFromMemoryWhenUseMiMathThenStoreIndirectToAnotherMemory, MatcherIsDg2OrPvc) {
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
uint64_t bufferMemory[bufferSize] = {};
|
||||
bufferMemory[0] = 1u;
|
||||
uint64_t bufferBMemory[bufferSize] = {};
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferMemory, retVal));
|
||||
ASSERT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
auto bufferB = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferBMemory, retVal));
|
||||
ASSERT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex));
|
||||
csr->makeResident(*bufferB->getGraphicsAllocation(rootDeviceIndex));
|
||||
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R3, CS_GPR_R4, CS_GPR_R2, bufferB->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation
|
||||
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.DwordLength = numberOfOperationToLoadAddressToMiMathAccu * 2 + 6 - 1;
|
||||
|
||||
loadAddressToMiMathAccu<FamilyType>(static_cast<uint32_t>(AluRegisters::R_0), static_cast<uint32_t>(AluRegisters::R_1), static_cast<uint32_t>(AluRegisters::R_2)); // GPU address of buffer load to ACCU register
|
||||
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(4 * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_LOADIND); // load dword from memory address located in ACCU to R0
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_0);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
|
||||
loadAddressToMiMathAccu<FamilyType>(static_cast<uint32_t>(AluRegisters::R_3), static_cast<uint32_t>(AluRegisters::R_4), static_cast<uint32_t>(AluRegisters::R_2)); // GPU address of bufferB load to ACCU register
|
||||
|
||||
pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(2 * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_STOREIND); // store to memory from ACCU, value from register R0
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_0);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
|
||||
flushStream();
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(bufferB->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), bufferMemory, sizeof(uint64_t));
|
||||
}
|
||||
HWTEST2_F(MiMath, givenValueToMakeLeftLogicalShiftWhenUseMiMathThenShiftIsDoneProperly, MatcherIsDg2OrPvc) {
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
uint64_t bufferMemory[bufferSize] = {};
|
||||
bufferMemory[0] = 1u;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferMemory, retVal));
|
||||
ASSERT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex));
|
||||
|
||||
uint32_t value = 1u;
|
||||
uint32_t shift = 2u;
|
||||
uint32_t notPowerOfTwoShift = 5u;
|
||||
uint32_t expectedUsedShift = 4u;
|
||||
|
||||
loadValueToRegister<FamilyType>(value, CS_GPR_R0);
|
||||
loadValueToRegister<FamilyType>(shift, CS_GPR_R1);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, CS_GPR_R2);
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.DwordLength = 7 - 1;
|
||||
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(7 * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value from R0 to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_0);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_1);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SHL); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_1);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SHL); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
|
||||
storeValueInRegisterToMemory<FamilyType>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), CS_GPR_R1);
|
||||
storeValueInRegisterToMemory<FamilyType>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4, CS_GPR_R2);
|
||||
flushStream();
|
||||
|
||||
uint32_t firstShift = value << shift;
|
||||
uint32_t secondShift = value << notPowerOfTwoShift;
|
||||
uint32_t executeSecondShift = value << expectedUsedShift;
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()), &firstShift, sizeof(firstShift));
|
||||
expectNotEqualMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4), &secondShift, sizeof(secondShift));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4), &executeSecondShift, sizeof(executeSecondShift));
|
||||
}
|
||||
HWTEST2_F(MiMath, givenValueToMakeRightLogicalShiftWhenUseMiMathThenShiftIsDoneProperly, MatcherIsDg2OrPvc) {
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
uint64_t bufferMemory[bufferSize] = {};
|
||||
bufferMemory[0] = 1u;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferMemory, retVal));
|
||||
ASSERT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
csr->makeResident(*allocation);
|
||||
|
||||
uint32_t value = 32u;
|
||||
uint32_t shift = 2u;
|
||||
uint32_t notPowerOfTwoShift = 5u;
|
||||
uint32_t expectedUsedShift = 4u;
|
||||
|
||||
loadValueToRegister<FamilyType>(value, CS_GPR_R0);
|
||||
loadValueToRegister<FamilyType>(shift, CS_GPR_R1);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, CS_GPR_R2);
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.DwordLength = 7 - 1;
|
||||
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(7 * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value from R0 to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_0);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_1);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SHR); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_1);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SHR); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress(), CS_GPR_R1);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress() + 4, CS_GPR_R2);
|
||||
flushStream();
|
||||
|
||||
uint32_t firstShift = value >> shift;
|
||||
uint32_t secondShift = value >> notPowerOfTwoShift;
|
||||
uint32_t executeSecondShift = value >> expectedUsedShift;
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress()), &firstShift, sizeof(firstShift));
|
||||
expectNotEqualMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress() + 4), &secondShift, sizeof(secondShift));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(executeSecondShift));
|
||||
}
|
||||
HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDoneProperly, MatcherIsDg2OrPvc) {
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
|
||||
int64_t bufferMemory[bufferSize] = {};
|
||||
bufferMemory[0] = -32;
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
|
||||
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
bufferSize, bufferMemory, retVal));
|
||||
ASSERT_NE(nullptr, buffer);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto allocation = buffer->getGraphicsAllocation(rootDeviceIndex);
|
||||
csr->makeResident(*allocation);
|
||||
|
||||
uint32_t shift = 2u;
|
||||
uint32_t notPowerOfTwoShift = 5u;
|
||||
uint32_t expectedUsedShift = 4u;
|
||||
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, allocation->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadValueToRegister<FamilyType>(shift, CS_GPR_R4);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, CS_GPR_R5);
|
||||
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.DwordLength = numberOfOperationToLoadAddressToMiMathAccu + 9 - 1;
|
||||
loadAddressToMiMathAccu<FamilyType>(static_cast<uint32_t>(AluRegisters::R_0), static_cast<uint32_t>(AluRegisters::R_1), static_cast<uint32_t>(AluRegisters::R_2)); // GPU address of buffer load to ACCU register
|
||||
MI_MATH_ALU_INST_INLINE *pAluParam = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(taskStream->getSpace(9 * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_LOADIND); // load value from R0 to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_3);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_FENCE); // to be sure that all writes and reads are completed
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value from R0 to SRCA
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCA);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_3);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_4);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SAR); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_4);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_LOAD); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_SRCB);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_5);
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(NewAluOpcodes::OPCODE_SAR); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = 0;
|
||||
pAluParam->DW0.BitField.Operand2 = 0;
|
||||
pAluParam++;
|
||||
pAluParam->DW0.BitField.ALUOpcode = static_cast<uint32_t>(AluRegisters::OPCODE_STORE); // load value to shift to SRCB
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_5);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
pAluParam++;
|
||||
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress(), CS_GPR_R4);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress() + 4, CS_GPR_R5);
|
||||
flushStream();
|
||||
|
||||
int64_t firstShift = bufferMemory[0];
|
||||
for (uint32_t i = 0; i < shift; i++) {
|
||||
firstShift /= 2;
|
||||
}
|
||||
int64_t secondShift = bufferMemory[0];
|
||||
for (uint32_t i = 0; i < notPowerOfTwoShift; i++) {
|
||||
secondShift /= 2;
|
||||
}
|
||||
int64_t executeSecondShift = bufferMemory[0];
|
||||
for (uint32_t i = 0; i < expectedUsedShift; i++) {
|
||||
executeSecondShift /= 2;
|
||||
}
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress()), &firstShift, sizeof(uint32_t));
|
||||
expectNotEqualMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress() + 4), &secondShift, sizeof(uint32_t));
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(allocation->getGpuAddress() + 4), &executeSecondShift, sizeof(uint32_t));
|
||||
}
|
||||
} // namespace NEO
|
Reference in New Issue
Block a user