Split test_cmdlist files and remove not needed includes

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Realted-To: NEO-4692
This commit is contained in:
Kamil Kopryk
2021-06-27 21:20:58 +00:00
committed by Compute-Runtime-Automation
parent 76f7070917
commit 37271e7b85
5 changed files with 1280 additions and 1163 deletions

View File

@@ -10,10 +10,12 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_5.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_barrier.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_event_reset.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_launch_kernel.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_memory.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_signal_event.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_append_wait_on_events.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_blit.cpp

View File

@@ -6,26 +6,14 @@
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_compilers.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "opencl/test/unit_test/mocks/mock_memory_manager.h"
#include "test.h"
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl_timestamps.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_context.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
@@ -946,786 +934,6 @@ HWTEST_F(CommandListCreate, givenCommandListWhenSetBarrierThenPipeControlIsProgr
EXPECT_NE(cmdList.end(), itor);
}
class MockEvent : public ::L0::Event {
public:
MockEvent() {
mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages));
gpuAddress = mockAllocation->getGpuAddress();
}
NEO::GraphicsAllocation &getAllocation(L0::Device *device) override {
return *mockAllocation.get();
}
uint64_t getGpuAddress(L0::Device *device) override {
return mockAllocation.get()->getGpuAddress();
}
ze_result_t destroy() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t hostSignal() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t hostSynchronize(uint64_t timeout) override {
return ZE_RESULT_SUCCESS;
};
ze_result_t queryStatus() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t reset() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override {
return ZE_RESULT_SUCCESS;
};
size_t getTimestampSizeInDw() const override {
return 1;
}
size_t getContextStartOffset() const override { return 0; }
size_t getContextEndOffset() const override { return 4; }
size_t getGlobalStartOffset() const override { return 8; }
size_t getGlobalEndOffset() const override { return 12; }
size_t getSinglePacketSize() const override { return 16; };
uint32_t getPacketsInUse() override { return 1; }
void resetPackets() override{};
void setPacketsInUse(uint32_t value) override{};
uint64_t getPacketAddress(L0::Device *) override { return 0; }
std::unique_ptr<NEO::GraphicsAllocation> mockAllocation;
};
HWTEST_F(CommandListCreate, givenCommandListWithInvalidWaitEventArgWhenAppendQueryKernelTimestampsThenProperErrorRetruned) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
device->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto eventHandle = event.toHandle();
result = commandList->appendQueryKernelTimestamps(1u, &eventHandle, alloc, nullptr, nullptr, 1u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
context->freeMem(alloc);
}
struct CmdListHelper {
NEO::GraphicsAllocation *isaAllocation = nullptr;
NEO::ResidencyContainer residencyContainer;
ze_group_count_t threadGroupDimensions;
const uint32_t *groupSize = nullptr;
uint32_t useOnlyGlobalTimestamp = std::numeric_limits<uint32_t>::max();
};
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
CmdListHelper cmdListHelper;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
const auto kernel = Kernel::fromHandle(hKernel);
cmdListHelper.isaAllocation = kernel->getIsaAllocation();
cmdListHelper.residencyContainer = kernel->getResidencyContainer();
cmdListHelper.groupSize = kernel->getGroupSize();
cmdListHelper.threadGroupDimensions = *pThreadGroupDimensions;
auto kernelName = kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName;
NEO::ArgDescriptor arg;
if (kernelName == "QueryKernelTimestamps") {
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[2u];
} else if (kernelName == "QueryKernelTimestampsWithOffsets") {
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[3u];
} else {
return ZE_RESULT_SUCCESS;
}
auto crossThreadData = kernel->getCrossThreadData();
auto element = arg.as<NEO::ArgDescValue>().elements[0];
auto pDst = ptrOffset(crossThreadData, element.offset);
cmdListHelper.useOnlyGlobalTimestamp = *(uint32_t *)(pDst);
return ZE_RESULT_SUCCESS;
}
};
using AppendQueryKernelTimestamps = CommandListCreate;
using TestPlatforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithoutOffsetsThenProperBuiltinWasAdded, TestPlatforms) {
std::unique_ptr<MockDeviceForSpv<false, false>> testDevice = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns()));
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestampsWithOffsets);
device = testDevice.get();
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(device->toHandle(), device));
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
bool containsDstPtr = false;
bool gpuTimeStampAlloc = false;
for (auto &residentGfxAlloc : commandList.cmdListHelper.residencyContainer) {
if (residentGfxAlloc != nullptr) {
if (residentGfxAlloc->getGpuAddress() ==
reinterpret_cast<uint64_t>(alloc)) {
containsDstPtr = true;
}
if (residentGfxAlloc->getAllocationType() ==
NEO::GraphicsAllocation::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER) {
gpuTimeStampAlloc = true;
}
}
}
EXPECT_TRUE(containsDstPtr);
EXPECT_TRUE(gpuTimeStampAlloc);
EXPECT_EQ(testDevice->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress());
EXPECT_EQ(2u, commandList.cmdListHelper.groupSize[0]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]);
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithOffsetsThenProperBuiltinWasAdded, TestPlatforms) {
std::unique_ptr<MockDeviceForSpv<false, false>> testDevice = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns()));
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestampsWithOffsets);
device = testDevice.get();
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(device->toHandle(), device));
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
void *offsetAlloc;
result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &offsetAlloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
auto offsetSizes = reinterpret_cast<size_t *>(offsetAlloc);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, offsetSizes, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
bool containsDstPtr = false;
for (auto &a : commandList.cmdListHelper.residencyContainer) {
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(alloc)) {
containsDstPtr = true;
}
}
EXPECT_TRUE(containsDstPtr);
bool containOffsetPtr = false;
for (auto &a : commandList.cmdListHelper.residencyContainer) {
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAlloc)) {
containOffsetPtr = true;
}
}
EXPECT_TRUE(containOffsetPtr);
EXPECT_EQ(device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress());
EXPECT_EQ(2u, commandList.cmdListHelper.groupSize[0]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]);
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
context->freeMem(alloc);
context->freeMem(offsetAlloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithEventsNumberBiggerThanMaxWorkItemSizeThenProperGroupSizeAndGroupCountIsSet, TestPlatforms) {
std::unique_ptr<MockDeviceForSpv<false, false>> testDevice = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns()));
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
device = testDevice.get();
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(device->toHandle(), device));
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
size_t eventCount = device->getNEODevice()->getDeviceInfo().maxWorkItemSizes[0] * 2u;
std::unique_ptr<ze_event_handle_t[]> events = std::make_unique<ze_event_handle_t[]>(eventCount);
for (size_t i = 0u; i < eventCount; ++i) {
events[i] = event.toHandle();
}
result = commandList.appendQueryKernelTimestamps(static_cast<uint32_t>(eventCount), events.get(), alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress());
uint32_t groupSizeX = static_cast<uint32_t>(eventCount);
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
EXPECT_EQ(groupSizeX, commandList.cmdListHelper.groupSize[0]);
EXPECT_EQ(groupSizeY, commandList.cmdListHelper.groupSize[1]);
EXPECT_EQ(groupSizeZ, commandList.cmdListHelper.groupSize[2]);
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
EXPECT_EQ(static_cast<uint32_t>(eventCount) / groupSizeX, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsAndInvalidResultSuggestGroupSizeThenUnknownResultReturned, TestPlatforms) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,
uint32_t globalSizeZ, uint32_t *groupSizeX,
uint32_t *groupSizeY, uint32_t *groupSizeZ) override {
return ZE_RESULT_ERROR_UNKNOWN;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
};
struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
using BuiltinFunctionsLibImpl::builtins;
using BuiltinFunctionsLibImpl::getFunction;
using BuiltinFunctionsLibImpl::imageBuiltins;
MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {}
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpMockKernel = new MockQueryKernelTimestampsKernel;
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel;
}
~MockBuiltinFunctionsForQueryKernelTimestamps() override {
delete tmpMockKernel;
}
MockQueryKernelTimestampsKernel *tmpMockKernel = nullptr;
};
class MockDeviceHandle : public L0::DeviceImp {
public:
MockDeviceHandle() {
}
void initialize(L0::Device *device) {
neoDevice = device->getNEODevice();
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr};
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib;
}
~MockDeviceHandle() override {
delete tmpMockBultinLib;
}
MockBuiltinFunctionsForQueryKernelTimestamps *tmpMockBultinLib = nullptr;
};
MockDeviceHandle mockDevice;
mockDevice.initialize(device);
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute);
MockEvent event;
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice));
auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsAndInvalidResultSetGroupSizeThenUnknownResultReturned, TestPlatforms) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,
uint32_t globalSizeZ, uint32_t *groupSizeX,
uint32_t *groupSizeY, uint32_t *groupSizeZ) override {
*groupSizeX = static_cast<uint32_t>(1u);
*groupSizeY = static_cast<uint32_t>(1u);
*groupSizeZ = static_cast<uint32_t>(1u);
return ZE_RESULT_SUCCESS;
}
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) override {
return ZE_RESULT_ERROR_UNKNOWN;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
};
struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
using BuiltinFunctionsLibImpl::builtins;
using BuiltinFunctionsLibImpl::getFunction;
using BuiltinFunctionsLibImpl::imageBuiltins;
MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {}
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpMockKernel = new MockQueryKernelTimestampsKernel;
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel;
}
~MockBuiltinFunctionsForQueryKernelTimestamps() override {
delete tmpMockKernel;
}
MockQueryKernelTimestampsKernel *tmpMockKernel = nullptr;
};
class MockDeviceHandle : public L0::DeviceImp {
public:
MockDeviceHandle() {
}
void initialize(L0::Device *device) {
neoDevice = device->getNEODevice();
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr};
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib;
}
~MockDeviceHandle() override {
delete tmpMockBultinLib;
}
MockBuiltinFunctionsForQueryKernelTimestamps *tmpMockBultinLib = nullptr;
};
MockDeviceHandle mockDevice;
mockDevice.initialize(device);
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute);
MockEvent event;
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice));
auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, TestPlatforms) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
MockQueryKernelTimestampsKernel(L0::Module *module) : KernelImp(module) {
mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor;
this->kernelImmData = &mockKernelImmutableData;
}
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override {
if (argIndex == 0) {
index0Allocation = allocation;
}
return ZE_RESULT_SUCCESS;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
NEO::GraphicsAllocation *index0Allocation = nullptr;
KernelDescriptor mockKernelDescriptor = {};
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpModule = std::make_unique<MockModule>(device, nullptr, ModuleType::Builtin);
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(static_cast<L0::ModuleImp *>(tmpModule.get()));
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel.get();
}
std::unique_ptr<MockModule> tmpModule;
std::unique_ptr<MockQueryKernelTimestampsKernel> tmpMockKernel;
};
class MockDeviceHandle : public L0::DeviceImp {
public:
MockDeviceHandle() {
}
void initialize(L0::Device *device) {
neoDevice = device->getNEODevice();
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = std::make_unique<MockBuiltinFunctionsForQueryKernelTimestamps>(this, nullptr);
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib.get();
}
std::unique_ptr<MockBuiltinFunctionsForQueryKernelTimestamps> tmpMockBultinLib;
};
MockDeviceHandle mockDevice;
mockDevice.initialize(device);
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute);
MockEvent event;
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice));
auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto index0Allocation = mockDevice.tmpMockBultinLib->tmpMockKernel->index0Allocation;
EXPECT_NE(nullptr, index0Allocation);
EventData *eventData = reinterpret_cast<EventData *>(index0Allocation->getUnderlyingBuffer());
EXPECT_EQ(eventData[0].address, event.getGpuAddress(&mockDevice));
EXPECT_EQ(eventData[0].packetsInUse, event.getPacketsInUse());
EXPECT_EQ(eventData[0].timestampSizeInDw, event.getTimestampSizeInDw());
EXPECT_EQ(eventData[1].address, event.getGpuAddress(&mockDevice));
EXPECT_EQ(eventData[1].packetsInUse, event.getPacketsInUse());
EXPECT_EQ(eventData[1].timestampSizeInDw, event.getTimestampSizeInDw());
context->freeMem(alloc);
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->appendSignalEvent(event.toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventWithScopeThenPipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->appendSignalEvent(event.toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammedOnlyOnce) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
commandList->appendWaitOnEvents(2, events);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itor2 = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor2);
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithHostScopeThenPipeControlAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenPipeControlAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = 0;
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto event_object = L0::Event::fromHandle(events[0]);
auto event_object2 = L0::Event::fromHandle(events[1]);
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
EXPECT_EQ(true, event_object->updateTaskCountEnabled);
EXPECT_EQ(true, event_object2->updateTaskCountEnabled);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = 0;
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto event_object = L0::Event::fromHandle(events[0]);
auto event_object2 = L0::Event::fromHandle(events[1]);
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
EXPECT_EQ(true, event_object->updateTaskCountEnabled);
EXPECT_EQ(true, event_object2->updateTaskCountEnabled);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenSyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAdded) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

View File

@@ -5,22 +5,14 @@
*
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "test.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
namespace L0 {
namespace ult {
@@ -243,219 +235,6 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall
EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u);
}
using AppendMemoryCopy = CommandListCreate;
template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryCopy : public MockCommandListHw<gfxCoreFamily> {
public:
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize);
}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
NEO::GraphicsAllocation *dstAllocation,
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcBlitCopyRegionOffset = srcOffset;
dstBlitCopyRegionOffset = dstOffset;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
}
uintptr_t srcAlignedPtr;
uintptr_t dstAlignedPtr;
size_t srcBlitCopyRegionOffset = 0;
size_t dstBlitCopyRegionOffset = 0;
};
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_EQ(cmdList.hostPtrMap.size(), 2u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion2DCalledThenSrcDstPointersArePageAligned, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto sshAlignmentMask = NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignmentMask();
EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask));
EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask));
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion3DCalledThenSrcDstPointersArePageAligned, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto sshAlignmentMask = NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignmentMask();
EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask));
EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask));
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion2DCalledThenSrcDstNotZeroOffsetsArePassed, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = reinterpret_cast<void *>(0x1233);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstNotZeroOffsetsArePassed, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = reinterpret_cast<void *>(0x1233);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndAlignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstZeroOffsetsArePassed, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = alignDown(reinterpret_cast<void *>(0x1233), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
void *dstPtr = alignDown(reinterpret_cast<void *>(0x2345), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_EQ(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_EQ(cmdList.dstBlitCopyRegionOffset, 0u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAdded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
}
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
}
HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, Platforms) {
Mock<CommandQueue> cmdQueue;
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
commandList->device = device;
commandList->cmdQImmediate = &cmdQueue;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS));
EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS));
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->cmdQImmediate = nullptr;
}
HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned, Platforms) {
Mock<CommandQueue> cmdQueue;
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
commandList->device = device;
commandList->cmdQImmediate = &cmdQueue;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr);
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
commandList->cmdQImmediate = nullptr;
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
}
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
}
HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, Platforms) {
MockCommandListHw<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
@@ -1078,75 +857,6 @@ HWTEST2_F(CommandListCreate, givenPitchAndSlicePitchWhenMemoryCopyRegionCalledTh
EXPECT_EQ(cmdList.srcSize.x, pitch);
EXPECT_EQ(cmdList.srcSize.y, slicePitch / pitch);
}
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
MockCommandListForMemFill() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return {0, 0, nullptr, true};
}
ze_result_t appendMemoryCopyBlit(NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset, uint64_t size) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
uint32_t appendMemoryCopyBlitCalledTimes = 0;
};
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = 0;
eventDesc.wait = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyBlitCalledTimes, 1u);
EXPECT_EQ(1u, event->getPacketsInUse());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
itor = find<MI_FLUSH_DW *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
itor++;
EXPECT_EQ(cmdList.end(), itor);
}
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
HWTEST2_F(CommandListCreate, givenCommandListThenSshCorrectlyReserved, SupportedPlatforms) {
@@ -1156,86 +866,5 @@ HWTEST2_F(CommandListCreate, givenCommandListThenSshCorrectlyReserved, Supported
auto size = helper.getRenderSurfaceStateSize();
EXPECT_EQ(commandList.getReserveSshSize(), size);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_EQ(commandList.appendMemoryCopyBlitCalledTimes, 0u);
EXPECT_EQ(1u, event->getPacketsInUse());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_FALSE(cmd->getDcFlushEnable());
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
auto temp = itor;
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
//we should have only one PC with dcFlush added
ASSERT_EQ(1u, numPCs.size());
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
}
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,808 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "test.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_builtin_functions_lib_impl_timestamps.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device_for_spirv.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
namespace ult {
using CommandListCreate = Test<DeviceFixture>;
class MockEvent : public ::L0::Event {
public:
MockEvent() {
mockAllocation.reset(new NEO::MockGraphicsAllocation(0, NEO::GraphicsAllocation::AllocationType::INTERNAL_HOST_MEMORY,
reinterpret_cast<void *>(0x1234), 0x1000, 0, sizeof(uint32_t),
MemoryPool::System4KBPages));
gpuAddress = mockAllocation->getGpuAddress();
}
NEO::GraphicsAllocation &getAllocation(L0::Device *device) override {
return *mockAllocation.get();
}
uint64_t getGpuAddress(L0::Device *device) override {
return mockAllocation.get()->getGpuAddress();
}
ze_result_t destroy() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t hostSignal() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t hostSynchronize(uint64_t timeout) override {
return ZE_RESULT_SUCCESS;
};
ze_result_t queryStatus() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t reset() override {
return ZE_RESULT_SUCCESS;
};
ze_result_t queryKernelTimestamp(ze_kernel_timestamp_result_t *dstptr) override {
return ZE_RESULT_SUCCESS;
};
size_t getTimestampSizeInDw() const override {
return 1;
}
size_t getContextStartOffset() const override { return 0; }
size_t getContextEndOffset() const override { return 4; }
size_t getGlobalStartOffset() const override { return 8; }
size_t getGlobalEndOffset() const override { return 12; }
size_t getSinglePacketSize() const override { return 16; };
uint32_t getPacketsInUse() override { return 1; }
void resetPackets() override{};
void setPacketsInUse(uint32_t value) override{};
uint64_t getPacketAddress(L0::Device *) override { return 0; }
std::unique_ptr<NEO::GraphicsAllocation> mockAllocation;
};
HWTEST_F(CommandListCreate, givenCommandListWithInvalidWaitEventArgWhenAppendQueryKernelTimestampsThenProperErrorRetruned) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
device->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
auto eventHandle = event.toHandle();
result = commandList->appendQueryKernelTimestamps(1u, &eventHandle, alloc, nullptr, nullptr, 1u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
context->freeMem(alloc);
}
struct CmdListHelper {
NEO::GraphicsAllocation *isaAllocation = nullptr;
NEO::ResidencyContainer residencyContainer;
ze_group_count_t threadGroupDimensions;
const uint32_t *groupSize = nullptr;
uint32_t useOnlyGlobalTimestamp = std::numeric_limits<uint32_t>::max();
};
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandListForAppendLaunchKernel : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
CmdListHelper cmdListHelper;
ze_result_t appendLaunchKernel(ze_kernel_handle_t hKernel,
const ze_group_count_t *pThreadGroupDimensions,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
const auto kernel = Kernel::fromHandle(hKernel);
cmdListHelper.isaAllocation = kernel->getIsaAllocation();
cmdListHelper.residencyContainer = kernel->getResidencyContainer();
cmdListHelper.groupSize = kernel->getGroupSize();
cmdListHelper.threadGroupDimensions = *pThreadGroupDimensions;
auto kernelName = kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName;
NEO::ArgDescriptor arg;
if (kernelName == "QueryKernelTimestamps") {
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[2u];
} else if (kernelName == "QueryKernelTimestampsWithOffsets") {
arg = kernel->getImmutableData()->getDescriptor().payloadMappings.explicitArgs[3u];
} else {
return ZE_RESULT_SUCCESS;
}
auto crossThreadData = kernel->getCrossThreadData();
auto element = arg.as<NEO::ArgDescValue>().elements[0];
auto pDst = ptrOffset(crossThreadData, element.offset);
cmdListHelper.useOnlyGlobalTimestamp = *(uint32_t *)(pDst);
return ZE_RESULT_SUCCESS;
}
};
using AppendQueryKernelTimestamps = CommandListCreate;
using TestPlatforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithoutOffsetsThenProperBuiltinWasAdded, TestPlatforms) {
std::unique_ptr<MockDeviceForSpv<false, false>> testDevice = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns()));
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestampsWithOffsets);
device = testDevice.get();
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(device->toHandle(), device));
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
bool containsDstPtr = false;
bool gpuTimeStampAlloc = false;
for (auto &residentGfxAlloc : commandList.cmdListHelper.residencyContainer) {
if (residentGfxAlloc != nullptr) {
if (residentGfxAlloc->getGpuAddress() ==
reinterpret_cast<uint64_t>(alloc)) {
containsDstPtr = true;
}
if (residentGfxAlloc->getAllocationType() ==
NEO::GraphicsAllocation::AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER) {
gpuTimeStampAlloc = true;
}
}
}
EXPECT_TRUE(containsDstPtr);
EXPECT_TRUE(gpuTimeStampAlloc);
EXPECT_EQ(testDevice->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress());
EXPECT_EQ(2u, commandList.cmdListHelper.groupSize[0]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]);
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithOffsetsThenProperBuiltinWasAdded, TestPlatforms) {
std::unique_ptr<MockDeviceForSpv<false, false>> testDevice = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns()));
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestampsWithOffsets);
device = testDevice.get();
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(device->toHandle(), device));
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
void *offsetAlloc;
result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &offsetAlloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
auto offsetSizes = reinterpret_cast<size_t *>(offsetAlloc);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, offsetSizes, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
bool containsDstPtr = false;
for (auto &a : commandList.cmdListHelper.residencyContainer) {
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(alloc)) {
containsDstPtr = true;
}
}
EXPECT_TRUE(containsDstPtr);
bool containOffsetPtr = false;
for (auto &a : commandList.cmdListHelper.residencyContainer) {
if (a != nullptr && a->getGpuAddress() == reinterpret_cast<uint64_t>(offsetAlloc)) {
containOffsetPtr = true;
}
}
EXPECT_TRUE(containOffsetPtr);
EXPECT_EQ(device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress());
EXPECT_EQ(2u, commandList.cmdListHelper.groupSize[0]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[1]);
EXPECT_EQ(1u, commandList.cmdListHelper.groupSize[2]);
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
context->freeMem(alloc);
context->freeMem(offsetAlloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsWithEventsNumberBiggerThanMaxWorkItemSizeThenProperGroupSizeAndGroupCountIsSet, TestPlatforms) {
std::unique_ptr<MockDeviceForSpv<false, false>> testDevice = std::unique_ptr<MockDeviceForSpv<false, false>>(new MockDeviceForSpv<false, false>(device->getNEODevice(), device->getNEODevice()->getExecutionEnvironment(), driverHandle.get()));
testDevice->builtins.reset(new MockBuiltinFunctionsLibImplTimestamps(testDevice.get(), testDevice->getNEODevice()->getBuiltIns()));
testDevice->getBuiltinFunctionsLib()->initBuiltinKernel(L0::Builtin::QueryKernelTimestamps);
device = testDevice.get();
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(device->toHandle(), device));
auto result = context->allocDeviceMem(device, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
size_t eventCount = device->getNEODevice()->getDeviceInfo().maxWorkItemSizes[0] * 2u;
std::unique_ptr<ze_event_handle_t[]> events = std::make_unique<ze_event_handle_t[]>(eventCount);
for (size_t i = 0u; i < eventCount; ++i) {
events[i] = event.toHandle();
}
result = commandList.appendQueryKernelTimestamps(static_cast<uint32_t>(eventCount), events.get(), alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->getIsaAllocation()->getGpuAddress(), commandList.cmdListHelper.isaAllocation->getGpuAddress());
uint32_t groupSizeX = static_cast<uint32_t>(eventCount);
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestamps)->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ);
EXPECT_EQ(groupSizeX, commandList.cmdListHelper.groupSize[0]);
EXPECT_EQ(groupSizeY, commandList.cmdListHelper.groupSize[1]);
EXPECT_EQ(groupSizeZ, commandList.cmdListHelper.groupSize[2]);
EXPECT_EQ(NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).useOnlyGlobalTimestamps() ? 1u : 0u, commandList.cmdListHelper.useOnlyGlobalTimestamp);
EXPECT_EQ(static_cast<uint32_t>(eventCount) / groupSizeX, commandList.cmdListHelper.threadGroupDimensions.groupCountX);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountY);
EXPECT_EQ(1u, commandList.cmdListHelper.threadGroupDimensions.groupCountZ);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsAndInvalidResultSuggestGroupSizeThenUnknownResultReturned, TestPlatforms) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,
uint32_t globalSizeZ, uint32_t *groupSizeX,
uint32_t *groupSizeY, uint32_t *groupSizeZ) override {
return ZE_RESULT_ERROR_UNKNOWN;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
};
struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
using BuiltinFunctionsLibImpl::builtins;
using BuiltinFunctionsLibImpl::getFunction;
using BuiltinFunctionsLibImpl::imageBuiltins;
MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {}
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpMockKernel = new MockQueryKernelTimestampsKernel;
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel;
}
~MockBuiltinFunctionsForQueryKernelTimestamps() override {
delete tmpMockKernel;
}
MockQueryKernelTimestampsKernel *tmpMockKernel = nullptr;
};
class MockDeviceHandle : public L0::DeviceImp {
public:
MockDeviceHandle() {
}
void initialize(L0::Device *device) {
neoDevice = device->getNEODevice();
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr};
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib;
}
~MockDeviceHandle() override {
delete tmpMockBultinLib;
}
MockBuiltinFunctionsForQueryKernelTimestamps *tmpMockBultinLib = nullptr;
};
MockDeviceHandle mockDevice;
mockDevice.initialize(device);
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute);
MockEvent event;
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice));
auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenCommandListWhenAppendQueryKernelTimestampsAndInvalidResultSetGroupSizeThenUnknownResultReturned, TestPlatforms) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,
uint32_t globalSizeZ, uint32_t *groupSizeX,
uint32_t *groupSizeY, uint32_t *groupSizeZ) override {
*groupSizeX = static_cast<uint32_t>(1u);
*groupSizeY = static_cast<uint32_t>(1u);
*groupSizeZ = static_cast<uint32_t>(1u);
return ZE_RESULT_SUCCESS;
}
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) override {
return ZE_RESULT_ERROR_UNKNOWN;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
};
struct MockBuiltinFunctionsLibImpl : BuiltinFunctionsLibImpl {
using BuiltinFunctionsLibImpl::builtins;
using BuiltinFunctionsLibImpl::getFunction;
using BuiltinFunctionsLibImpl::imageBuiltins;
MockBuiltinFunctionsLibImpl(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {}
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpMockKernel = new MockQueryKernelTimestampsKernel;
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel;
}
~MockBuiltinFunctionsForQueryKernelTimestamps() override {
delete tmpMockKernel;
}
MockQueryKernelTimestampsKernel *tmpMockKernel = nullptr;
};
class MockDeviceHandle : public L0::DeviceImp {
public:
MockDeviceHandle() {
}
void initialize(L0::Device *device) {
neoDevice = device->getNEODevice();
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = new MockBuiltinFunctionsForQueryKernelTimestamps{nullptr, nullptr};
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib;
}
~MockDeviceHandle() override {
delete tmpMockBultinLib;
}
MockBuiltinFunctionsForQueryKernelTimestamps *tmpMockBultinLib = nullptr;
};
MockDeviceHandle mockDevice;
mockDevice.initialize(device);
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute);
MockEvent event;
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice));
auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, result);
context->freeMem(alloc);
}
HWTEST2_F(AppendQueryKernelTimestamps, givenEventWhenAppendQueryIsCalledThenSetAllEventData, TestPlatforms) {
class MockQueryKernelTimestampsKernel : public L0::KernelImp {
public:
MockQueryKernelTimestampsKernel(L0::Module *module) : KernelImp(module) {
mockKernelImmutableData.kernelDescriptor = &mockKernelDescriptor;
this->kernelImmData = &mockKernelImmutableData;
}
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override {
if (argIndex == 0) {
index0Allocation = allocation;
}
return ZE_RESULT_SUCCESS;
}
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
return;
}
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
return;
}
std::unique_ptr<Kernel> clone() const override { return nullptr; }
NEO::GraphicsAllocation *index0Allocation = nullptr;
KernelDescriptor mockKernelDescriptor = {};
WhiteBox<::L0::KernelImmutableData> mockKernelImmutableData = {};
};
struct MockBuiltinFunctionsForQueryKernelTimestamps : BuiltinFunctionsLibImpl {
MockBuiltinFunctionsForQueryKernelTimestamps(L0::Device *device, NEO::BuiltIns *builtInsLib) : BuiltinFunctionsLibImpl(device, builtInsLib) {
tmpModule = std::make_unique<MockModule>(device, nullptr, ModuleType::Builtin);
tmpMockKernel = std::make_unique<MockQueryKernelTimestampsKernel>(static_cast<L0::ModuleImp *>(tmpModule.get()));
}
MockQueryKernelTimestampsKernel *getFunction(Builtin func) override {
return tmpMockKernel.get();
}
std::unique_ptr<MockModule> tmpModule;
std::unique_ptr<MockQueryKernelTimestampsKernel> tmpMockKernel;
};
class MockDeviceHandle : public L0::DeviceImp {
public:
MockDeviceHandle() {
}
void initialize(L0::Device *device) {
neoDevice = device->getNEODevice();
neoDevice->incRefInternal();
execEnvironment = device->getExecEnvironment();
driverHandle = device->getDriverHandle();
tmpMockBultinLib = std::make_unique<MockBuiltinFunctionsForQueryKernelTimestamps>(this, nullptr);
}
MockBuiltinFunctionsForQueryKernelTimestamps *getBuiltinFunctionsLib() override {
return tmpMockBultinLib.get();
}
std::unique_ptr<MockBuiltinFunctionsForQueryKernelTimestamps> tmpMockBultinLib;
};
MockDeviceHandle mockDevice;
mockDevice.initialize(device);
MockCommandListForAppendLaunchKernel<gfxCoreFamily> commandList;
commandList.initialize(&mockDevice, NEO::EngineGroupType::RenderCompute);
MockEvent event;
ze_event_handle_t events[2] = {event.toHandle(), event.toHandle()};
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
void *alloc;
ze_device_mem_alloc_desc_t deviceDesc = {};
context->getDevices().insert(std::make_pair(mockDevice.toHandle(), &mockDevice));
auto result = context->allocDeviceMem(&mockDevice, &deviceDesc, 128, 1, &alloc);
EXPECT_EQ(result, ZE_RESULT_SUCCESS);
result = commandList.appendQueryKernelTimestamps(2u, events, alloc, nullptr, nullptr, 0u, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto index0Allocation = mockDevice.tmpMockBultinLib->tmpMockKernel->index0Allocation;
EXPECT_NE(nullptr, index0Allocation);
EventData *eventData = reinterpret_cast<EventData *>(index0Allocation->getUnderlyingBuffer());
EXPECT_EQ(eventData[0].address, event.getGpuAddress(&mockDevice));
EXPECT_EQ(eventData[0].packetsInUse, event.getPacketsInUse());
EXPECT_EQ(eventData[0].timestampSizeInDw, event.getTimestampSizeInDw());
EXPECT_EQ(eventData[1].address, event.getGpuAddress(&mockDevice));
EXPECT_EQ(eventData[1].packetsInUse, event.getPacketsInUse());
EXPECT_EQ(eventData[1].timestampSizeInDw, event.getTimestampSizeInDw());
context->freeMem(alloc);
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->appendSignalEvent(event.toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventWithScopeThenPipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST;
commandList->appendSignalEvent(event.toHandle());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) {
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_FLUSH_DW *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
auto eventHandle = event.toHandle();
commandList->appendWaitOnEvents(1, &eventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListWhenAppendWaitEventsWithDcFlushThenPipeControlIsProgrammedOnlyOnce) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
commandList->appendWaitOnEvents(2, events);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor++;
auto itor2 = find<SEMAPHORE_WAIT *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor2);
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithHostScopeThenPipeControlAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event2.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenPipeControlAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = 0;
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto event_object = L0::Event::fromHandle(events[0]);
auto event_object2 = L0::Event::fromHandle(events[1]);
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
EXPECT_EQ(true, event_object->updateTaskCountEnabled);
EXPECT_EQ(true, event_object2->updateTaskCountEnabled);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithHostScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST;
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
HWTEST_F(CommandListCreate, givenAsyncCmdQueueAndCopyOnlyImmediateCommandListWhenAppendWaitEventsWithSubdeviceScopeThenMiFlushAndSemWaitAreAddedViaFlushTask) {
using SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::Copy, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_EQ(device, commandList->device);
EXPECT_EQ(1u, commandList->cmdListType);
EXPECT_NE(nullptr, commandList->cmdQImmediate);
auto &commandContainer = commandList->commandContainer;
MockEvent event, event2;
event.signalScope = 0;
event.waitScope = 0;
event2.waitScope = 0;
ze_event_handle_t events[] = {&event, &event2};
auto event_object = L0::Event::fromHandle(events[0]);
auto event_object2 = L0::Event::fromHandle(events[1]);
auto used = commandContainer.getCommandStream()->getUsed();
commandList->appendWaitOnEvents(2, events);
EXPECT_EQ(true, event_object->updateTaskCountEnabled);
EXPECT_EQ(true, event_object2->updateTaskCountEnabled);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itor);
EXPECT_EQ(used, commandContainer.getCommandStream()->getUsed());
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,470 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
namespace L0 {
namespace ult {
using CommandListCreate = Test<DeviceFixture>;
template <GFXCORE_FAMILY gfxCoreFamily>
class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>> {
public:
MockCommandListHw() : WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>() {}
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return {0, 0, nullptr, true};
}
ze_result_t appendMemoryCopyKernelWithGA(void *dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset,
void *srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size,
uint64_t elementSize,
Builtin builtin,
ze_event_handle_t hSignalEvent,
bool isStateless) override {
appendMemoryCopyKernelWithGACalledTimes++;
if (isStateless)
appendMemoryCopyKernelWithGAStatelessCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendMemoryCopyBlit(uintptr_t dstPtr,
NEO::GraphicsAllocation *dstPtrAlloc,
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
NEO::GraphicsAllocation *dstAllocation,
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyBlitRegionCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyKernel2dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
appendMemoryCopyKernel3dCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendBlitFill(void *ptr, const void *pattern,
size_t patternSize, size_t size,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
appendBlitFillCalledTimes++;
return ZE_RESULT_SUCCESS;
}
ze_result_t appendCopyImageBlit(NEO::GraphicsAllocation *src,
NEO::GraphicsAllocation *dst,
Vec3<size_t> srcOffsets, Vec3<size_t> dstOffsets,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, Vec3<size_t> copySize,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent) override {
appendCopyImageBlitCalledTimes++;
appendImageRegionCopySize = copySize;
appendImageRegionSrcOrigin = srcOffsets;
appendImageRegionDstOrigin = dstOffsets;
return ZE_RESULT_SUCCESS;
}
uint32_t appendMemoryCopyKernelWithGACalledTimes = 0;
uint32_t appendMemoryCopyKernelWithGAStatelessCalledTimes = 0;
uint32_t appendMemoryCopyBlitCalledTimes = 0;
uint32_t appendMemoryCopyBlitRegionCalledTimes = 0;
uint32_t appendMemoryCopyKernel2dCalledTimes = 0;
uint32_t appendMemoryCopyKernel3dCalledTimes = 0;
uint32_t appendBlitFillCalledTimes = 0;
uint32_t appendCopyImageBlitCalledTimes = 0;
Vec3<size_t> appendImageRegionCopySize = {0, 0, 0};
Vec3<size_t> appendImageRegionSrcOrigin = {9, 9, 9};
Vec3<size_t> appendImageRegionDstOrigin = {9, 9, 9};
};
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
using AppendMemoryCopy = CommandListCreate;
template <GFXCORE_FAMILY gfxCoreFamily>
class MockAppendMemoryCopy : public MockCommandListHw<gfxCoreFamily> {
public:
AlignedAllocationData getAlignedAllocation(L0::Device *device, const void *buffer, uint64_t bufferSize) override {
return L0::CommandListCoreFamily<gfxCoreFamily>::getAlignedAllocation(device, buffer, bufferSize);
}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
size_t srcOffset, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstOffset, srcRegion, srcPitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyKernel3d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, uint32_t dstSlicePitch, size_t dstOffset,
const ze_copy_region_t *srcRegion, uint32_t srcPitch,
uint32_t srcSlicePitch, size_t srcOffset,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override {
srcAlignedPtr = srcAlignedAllocation->alignedAllocationPtr;
dstAlignedPtr = dstAlignedAllocation->alignedAllocationPtr;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(dstAlignedAllocation, srcAlignedAllocation, builtin, dstRegion, dstPitch, dstSlicePitch, dstOffset, srcRegion, srcPitch, srcSlicePitch, srcOffset, hSignalEvent, numWaitEvents, phWaitEvents);
}
ze_result_t appendMemoryCopyBlitRegion(NEO::GraphicsAllocation *srcAllocation,
NEO::GraphicsAllocation *dstAllocation,
size_t srcOffset,
size_t dstOffset,
ze_copy_region_t srcRegion,
ze_copy_region_t dstRegion, Vec3<size_t> copySize,
size_t srcRowPitch, size_t srcSlicePitch,
size_t dstRowPitch, size_t dstSlicePitch,
Vec3<size_t> srcSize, Vec3<size_t> dstSize, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override {
srcBlitCopyRegionOffset = srcOffset;
dstBlitCopyRegionOffset = dstOffset;
return L0::CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(srcAllocation, dstAllocation, srcOffset, dstOffset, srcRegion, dstRegion, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, srcSize, dstSize, hSignalEvent, numWaitEvents, phWaitEvents);
}
uintptr_t srcAlignedPtr;
uintptr_t dstAlignedPtr;
size_t srcBlitCopyRegionOffset = 0;
size_t dstBlitCopyRegionOffset = 0;
};
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenTwoNewAllocationAreAddedToHostMapPtr, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_EQ(cmdList.hostPtrMap.size(), 2u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion2DCalledThenSrcDstPointersArePageAligned, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto sshAlignmentMask = NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignmentMask();
EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask));
EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask));
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenMemoryCopyRegion3DCalledThenSrcDstPointersArePageAligned, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto sshAlignmentMask = NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignmentMask();
EXPECT_TRUE(cmdList.srcAlignedPtr == (cmdList.srcAlignedPtr & sshAlignmentMask));
EXPECT_TRUE(cmdList.dstAlignedPtr == (cmdList.dstAlignedPtr & sshAlignmentMask));
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion2DCalledThenSrcDstNotZeroOffsetsArePassed, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = reinterpret_cast<void *>(0x1233);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 0, 2, 2, 0};
ze_copy_region_t srcRegion = {4, 4, 0, 2, 2, 0};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndUnalignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstNotZeroOffsetsArePassed, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = reinterpret_cast<void *>(0x1233);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_GT(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_GT(cmdList.dstBlitCopyRegionOffset, 0u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndAlignedHostPointersWhenBlitMemoryCopyRegion3DCalledThenSrcDstZeroOffsetsArePassed, Platforms) {
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = alignDown(reinterpret_cast<void *>(0x1233), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
void *dstPtr = alignDown(reinterpret_cast<void *>(0x2345), NEO::EncodeSurfaceState<FamilyType>::getSurfaceBaseAddressAlignment());
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
EXPECT_EQ(cmdList.srcBlitCopyRegionOffset, 0u);
EXPECT_EQ(cmdList.dstBlitCopyRegionOffset, 0u);
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionCalledThenPipeControlWithDcFlushAdded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_copy_region_t dstRegion = {4, 4, 4, 2, 2, 2};
ze_copy_region_t srcRegion = {4, 4, 4, 2, 2, 2};
cmdList.appendMemoryCopyRegion(dstPtr, &dstRegion, 0, 0, srcPtr, &srcRegion, 0, 0, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
}
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
}
HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, Platforms) {
Mock<CommandQueue> cmdQueue;
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
commandList->device = device;
commandList->cmdQImmediate = &cmdQueue;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS));
EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS));
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->cmdQImmediate = nullptr;
}
HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyWithInvalidEventThenInvalidArgumentErrorIsReturned, Platforms) {
Mock<CommandQueue> cmdQueue;
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
ze_result_t ret = commandList->initialize(device, NEO::EngineGroupType::RenderCompute);
ASSERT_EQ(ZE_RESULT_SUCCESS, ret);
commandList->device = device;
commandList->cmdQImmediate = &cmdQueue;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
auto result = commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 1, nullptr);
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
commandList->cmdQImmediate = nullptr;
}
HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledThenPipeControlWithDcFlushAdded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> cmdList;
cmdList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
cmdList.appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, 0, nullptr);
auto &commandContainer = cmdList.commandContainer;
GenCmdList genCmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
genCmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itor = find<PIPE_CONTROL *>(genCmdList.begin(), genCmdList.end());
ASSERT_NE(genCmdList.end(), itor);
PIPE_CONTROL *cmd = nullptr;
while (itor != genCmdList.end()) {
cmd = genCmdCast<PIPE_CONTROL *>(*itor);
itor = find<PIPE_CONTROL *>(++itor, genCmdList.end());
}
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
}
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, Platforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_FLUSH_DW = typename GfxFamily::MI_FLUSH_DW;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::Copy);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
eventPoolDesc.count = 1;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
eventDesc.signal = 0;
eventDesc.wait = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyBlitCalledTimes, 1u);
EXPECT_EQ(1u, event->getPacketsInUse());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), commandList.commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
itor = find<MI_FLUSH_DW *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
itor++;
EXPECT_NE(cmdList.end(), itor);
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
itor++;
EXPECT_EQ(cmdList.end(), itor);
}
using SupportedPlatforms = IsWithinProducts<IGFX_SKYLAKE, IGFX_DG1>;
HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, SupportedPlatforms) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
MockAppendMemoryCopy<gfxCoreFamily> commandList;
commandList.initialize(device, NEO::EngineGroupType::RenderCompute);
void *srcPtr = reinterpret_cast<void *>(0x1234);
void *dstPtr = reinterpret_cast<void *>(0x2345);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr);
EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalledTimes, 0u);
EXPECT_EQ(commandList.appendMemoryCopyBlitCalledTimes, 0u);
EXPECT_EQ(1u, event->getPacketsInUse());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0),
commandList.commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
itor++;
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_FALSE(cmd->getDcFlushEnable());
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
}
itor++;
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
}
auto temp = itor;
auto numPCs = findAll<PIPE_CONTROL *>(temp, cmdList.end());
//we should have only one PC with dcFlush added
ASSERT_EQ(1u, numPCs.size());
itor = find<PIPE_CONTROL *>(itor, cmdList.end());
EXPECT_NE(cmdList.end(), itor);
{
auto cmd = genCmdCast<PIPE_CONTROL *>(*itor);
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::isDcFlushAllowed(), cmd->getDcFlushEnable());
}
}
} // namespace ult
} // namespace L0