mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add copy tracing black box test
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
e55cd5961b
commit
b04a337b3f
@ -12,6 +12,7 @@ set(TEST_TARGETS
|
||||
zello_copy_image
|
||||
zello_copy_kernel_printf
|
||||
zello_copy_only
|
||||
zello_copy_tracing
|
||||
zello_debug_info
|
||||
zello_dynamic_link
|
||||
zello_dyn_local_arg
|
||||
|
799
level_zero/core/test/black_box_tests/zello_copy_tracing.cpp
Normal file
799
level_zero/core/test/black_box_tests/zello_copy_tracing.cpp
Normal file
@ -0,0 +1,799 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include <level_zero/ze_ddi.h>
|
||||
#include <level_zero/zet_api.h>
|
||||
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <ctime>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
bool verbose = false;
|
||||
|
||||
struct UserTracerData {
|
||||
uint32_t tracerData;
|
||||
};
|
||||
|
||||
UserTracerData tracerData0 = {};
|
||||
|
||||
struct UserInstanceData {
|
||||
std::clock_t startTime;
|
||||
uint32_t allocCount;
|
||||
};
|
||||
|
||||
uint32_t initCount;
|
||||
uint32_t initPrologCount;
|
||||
uint32_t initEpilogCount;
|
||||
|
||||
struct TmpInitParams {
|
||||
ze_init_flag_t flags;
|
||||
};
|
||||
|
||||
TmpInitParams initParams;
|
||||
|
||||
void setInitParams(ze_init_flag_t flags) {
|
||||
initParams.flags = flags;
|
||||
initCount++;
|
||||
}
|
||||
|
||||
void checkInitParams(ze_init_params_t *traceParams, TmpInitParams *checkParams) {
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->pflags) == checkParams->flags);
|
||||
}
|
||||
|
||||
void onEnterInit(
|
||||
ze_init_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
checkInitParams(tracerParams, &initParams);
|
||||
UserInstanceData *instanceData = new UserInstanceData;
|
||||
instanceData->startTime = clock();
|
||||
instanceData->allocCount = initCount;
|
||||
*tracerInstanceUserData = reinterpret_cast<void *>(instanceData);
|
||||
initPrologCount++;
|
||||
}
|
||||
|
||||
void onExitInit(
|
||||
ze_init_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
clock_t endTime = clock();
|
||||
SUCCESS_OR_WARNING_BOOL(result == ZE_RESULT_SUCCESS);
|
||||
UserInstanceData *instanceData = reinterpret_cast<UserInstanceData *>(*tracerInstanceUserData);
|
||||
SUCCESS_OR_WARNING_BOOL(instanceData->allocCount = initCount);
|
||||
float time = 1000.f * (endTime - instanceData->startTime) / CLOCKS_PER_SEC;
|
||||
if (verbose) {
|
||||
std::cout << "zeInit event " << instanceData->allocCount << " " << time << std::endl;
|
||||
}
|
||||
delete instanceData;
|
||||
checkInitParams(tracerParams, &initParams);
|
||||
initEpilogCount++;
|
||||
}
|
||||
|
||||
uint32_t driverGetCount;
|
||||
uint32_t driverGetPrologCount;
|
||||
uint32_t driverGetEpilogCount;
|
||||
|
||||
struct TmpDriverGetParams {
|
||||
uint32_t *count;
|
||||
ze_driver_handle_t *drivers;
|
||||
};
|
||||
|
||||
TmpDriverGetParams driverGetParams;
|
||||
|
||||
void setDriverGetParams(uint32_t *count, ze_driver_handle_t *drivers) {
|
||||
driverGetParams.count = count;
|
||||
driverGetParams.drivers = drivers;
|
||||
driverGetCount++;
|
||||
}
|
||||
|
||||
void checkDriverGetParams(ze_driver_get_params_t *traceParams, TmpDriverGetParams *checkParams) {
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->ppCount) == checkParams->count);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->pphDrivers) == checkParams->drivers);
|
||||
}
|
||||
|
||||
void onEnterDriverGet(
|
||||
ze_driver_get_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
checkDriverGetParams(tracerParams, &driverGetParams);
|
||||
UserInstanceData *instanceData = new UserInstanceData;
|
||||
instanceData->startTime = clock();
|
||||
instanceData->allocCount = initCount;
|
||||
*tracerInstanceUserData = reinterpret_cast<void *>(instanceData);
|
||||
driverGetPrologCount++;
|
||||
}
|
||||
|
||||
void onExitDriverGet(
|
||||
ze_driver_get_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
clock_t endTime = clock();
|
||||
SUCCESS_OR_WARNING_BOOL(result == ZE_RESULT_SUCCESS);
|
||||
UserInstanceData *instanceData = reinterpret_cast<UserInstanceData *>(*tracerInstanceUserData);
|
||||
SUCCESS_OR_WARNING_BOOL(instanceData->allocCount = initCount);
|
||||
float time = 1000.f * (endTime - instanceData->startTime) / CLOCKS_PER_SEC;
|
||||
if (verbose) {
|
||||
std::cout << "zeDriverGet event " << instanceData->allocCount << " " << time << std::endl;
|
||||
}
|
||||
delete instanceData;
|
||||
checkDriverGetParams(tracerParams, &driverGetParams);
|
||||
driverGetEpilogCount++;
|
||||
}
|
||||
|
||||
uint32_t memAllocDeviceCount;
|
||||
uint32_t memAllocDevicePrologCount;
|
||||
uint32_t memAllocDeviceEpilogCount;
|
||||
|
||||
struct TmpMemAllocDeviceParams {
|
||||
ze_context_handle_t context;
|
||||
ze_device_mem_alloc_desc_t *deviceDesc;
|
||||
size_t size;
|
||||
size_t alignment;
|
||||
ze_device_handle_t device;
|
||||
void *buffer;
|
||||
};
|
||||
|
||||
TmpMemAllocDeviceParams allocMemDeviceParams;
|
||||
|
||||
void setMemAllocDeviceParams(ze_context_handle_t context, ze_device_mem_alloc_desc_t *deviceDesc,
|
||||
size_t allocSize, size_t alignment, ze_device_handle_t device, void *buffer) {
|
||||
allocMemDeviceParams.context = context;
|
||||
allocMemDeviceParams.deviceDesc = deviceDesc;
|
||||
allocMemDeviceParams.size = allocSize;
|
||||
allocMemDeviceParams.alignment = alignment;
|
||||
allocMemDeviceParams.device = device;
|
||||
allocMemDeviceParams.buffer = buffer;
|
||||
memAllocDeviceCount++;
|
||||
}
|
||||
|
||||
void checkMemAllocDeviceParams(ze_mem_alloc_device_params_t *traceParams, TmpMemAllocDeviceParams *checkParams) {
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phContext) == checkParams->context);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->pdevice_desc) == checkParams->deviceDesc);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->psize) == checkParams->size);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->palignment) == checkParams->alignment);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phDevice) == checkParams->device);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->ppptr) == checkParams->buffer);
|
||||
}
|
||||
|
||||
void onEnterMemAllocDevice(
|
||||
ze_mem_alloc_device_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
checkMemAllocDeviceParams(tracerParams, &allocMemDeviceParams);
|
||||
UserInstanceData *instanceData = new UserInstanceData;
|
||||
instanceData->startTime = clock();
|
||||
instanceData->allocCount = memAllocDeviceCount;
|
||||
*tracerInstanceUserData = reinterpret_cast<void *>(instanceData);
|
||||
memAllocDevicePrologCount++;
|
||||
}
|
||||
|
||||
void onExitMemAllocDevice(
|
||||
ze_mem_alloc_device_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
clock_t endTime = clock();
|
||||
SUCCESS_OR_WARNING_BOOL(result == ZE_RESULT_SUCCESS);
|
||||
UserInstanceData *instanceData = reinterpret_cast<UserInstanceData *>(*tracerInstanceUserData);
|
||||
SUCCESS_OR_WARNING_BOOL(instanceData->allocCount == memAllocDeviceCount);
|
||||
float time = 1000.f * (endTime - instanceData->startTime) / CLOCKS_PER_SEC;
|
||||
if (verbose) {
|
||||
std::cout << "zeDriverAllocDeviceMem event " << instanceData->allocCount << " " << time << std::endl;
|
||||
}
|
||||
delete instanceData;
|
||||
checkMemAllocDeviceParams(tracerParams, &allocMemDeviceParams);
|
||||
memAllocDeviceEpilogCount++;
|
||||
}
|
||||
|
||||
uint32_t memAllocHostCount;
|
||||
uint32_t memAllocHostPrologCount;
|
||||
uint32_t memAllocHostEpilogCount;
|
||||
|
||||
struct TmpMemAllocHostParams {
|
||||
ze_context_handle_t context;
|
||||
ze_host_mem_alloc_desc_t *hostDesc;
|
||||
size_t size;
|
||||
size_t alignment;
|
||||
void *buffer;
|
||||
};
|
||||
|
||||
TmpMemAllocHostParams memAllocHostParams;
|
||||
|
||||
void setMemAllocHostParams(ze_context_handle_t context, ze_host_mem_alloc_desc_t *hostDesc,
|
||||
size_t allocSize, size_t alignment, void *buffer) {
|
||||
memAllocHostParams.context = context;
|
||||
memAllocHostParams.hostDesc = hostDesc;
|
||||
memAllocHostParams.size = allocSize;
|
||||
memAllocHostParams.alignment = alignment;
|
||||
memAllocHostParams.buffer = buffer;
|
||||
memAllocHostCount++;
|
||||
}
|
||||
|
||||
void checkMemAllocHostParams(ze_mem_alloc_host_params_t *traceParams, TmpMemAllocHostParams *checkParams) {
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phContext) == checkParams->context);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phost_desc) == checkParams->hostDesc);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->psize) == checkParams->size);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->palignment) == checkParams->alignment);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->ppptr) == checkParams->buffer);
|
||||
}
|
||||
|
||||
void onEnterMemAllocHost(
|
||||
ze_mem_alloc_host_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
checkMemAllocHostParams(tracerParams, &memAllocHostParams);
|
||||
UserInstanceData *instanceData = new UserInstanceData;
|
||||
instanceData->startTime = clock();
|
||||
instanceData->allocCount = memAllocHostCount;
|
||||
*tracerInstanceUserData = reinterpret_cast<void *>(instanceData);
|
||||
memAllocHostPrologCount++;
|
||||
}
|
||||
|
||||
void onExitMemAllocHost(
|
||||
ze_mem_alloc_host_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
clock_t endTime = clock();
|
||||
SUCCESS_OR_WARNING_BOOL(result == ZE_RESULT_SUCCESS);
|
||||
UserInstanceData *instanceData = reinterpret_cast<UserInstanceData *>(*tracerInstanceUserData);
|
||||
SUCCESS_OR_WARNING_BOOL(instanceData->allocCount == memAllocHostCount);
|
||||
float time = 1000.f * (endTime - instanceData->startTime) / CLOCKS_PER_SEC;
|
||||
if (verbose) {
|
||||
std::cout << "zeMemAllocHost event " << instanceData->allocCount << " " << time << std::endl;
|
||||
}
|
||||
delete instanceData;
|
||||
checkMemAllocHostParams(tracerParams, &memAllocHostParams);
|
||||
memAllocHostEpilogCount++;
|
||||
}
|
||||
|
||||
uint32_t memAllocSharedCount = 0;
|
||||
uint32_t memAllocSharedPrologCount = 0;
|
||||
uint32_t memAllocSharedEpilogCount = 0;
|
||||
|
||||
struct tmpMemAllocSharedParams {
|
||||
ze_context_handle_t context;
|
||||
ze_device_mem_alloc_desc_t *deviceDesc;
|
||||
ze_host_mem_alloc_desc_t *hostDesc;
|
||||
size_t size;
|
||||
size_t alignment;
|
||||
ze_device_handle_t device;
|
||||
void *buffer;
|
||||
};
|
||||
|
||||
tmpMemAllocSharedParams memAllocSharedParams;
|
||||
|
||||
void setMemAllocSharedParams(ze_context_handle_t context, ze_device_mem_alloc_desc_t *deviceDesc,
|
||||
ze_host_mem_alloc_desc_t *hostDesc, size_t allocSize, size_t alignment,
|
||||
ze_device_handle_t device, void *buffer) {
|
||||
memAllocSharedParams.context = context;
|
||||
memAllocSharedParams.deviceDesc = deviceDesc;
|
||||
memAllocSharedParams.hostDesc = hostDesc;
|
||||
memAllocSharedParams.size = allocSize;
|
||||
memAllocSharedParams.alignment = alignment;
|
||||
memAllocSharedParams.device = device;
|
||||
memAllocSharedParams.buffer = buffer;
|
||||
memAllocSharedCount++;
|
||||
}
|
||||
|
||||
void checkMemAllocShared(ze_mem_alloc_shared_params_t *traceParams, tmpMemAllocSharedParams *checkParams) {
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phContext) == checkParams->context);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->pdevice_desc) == checkParams->deviceDesc);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phost_desc) == checkParams->hostDesc);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->psize) == checkParams->size);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->palignment) == checkParams->alignment);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->phDevice) == checkParams->device);
|
||||
SUCCESS_OR_WARNING_BOOL(*(traceParams->ppptr) == checkParams->buffer);
|
||||
}
|
||||
|
||||
void onEnterMemAllocShared(
|
||||
ze_mem_alloc_shared_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
checkMemAllocShared(tracerParams, &memAllocSharedParams);
|
||||
UserInstanceData *instanceData = new UserInstanceData;
|
||||
instanceData->startTime = clock();
|
||||
instanceData->allocCount = memAllocSharedCount;
|
||||
*tracerInstanceUserData = reinterpret_cast<void *>(instanceData);
|
||||
memAllocSharedPrologCount++;
|
||||
}
|
||||
|
||||
void onExitMemAllocShared(
|
||||
ze_mem_alloc_shared_params_t *tracerParams,
|
||||
ze_result_t result,
|
||||
void *traceUserData,
|
||||
void **tracerInstanceUserData) {
|
||||
|
||||
clock_t endTime = clock();
|
||||
SUCCESS_OR_WARNING_BOOL(result == ZE_RESULT_SUCCESS);
|
||||
UserInstanceData *instanceData = reinterpret_cast<UserInstanceData *>(*tracerInstanceUserData);
|
||||
SUCCESS_OR_WARNING_BOOL(instanceData->allocCount == memAllocSharedCount);
|
||||
float time = 1000.f * (endTime - instanceData->startTime) / CLOCKS_PER_SEC;
|
||||
if (verbose) {
|
||||
std::cout << "zeMemAllocShared event " << instanceData->allocCount << " " << time << std::endl;
|
||||
}
|
||||
delete instanceData;
|
||||
checkMemAllocShared(tracerParams, &memAllocSharedParams);
|
||||
memAllocSharedEpilogCount++;
|
||||
}
|
||||
|
||||
void testAppendMemoryCopy0(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet,
|
||||
ze_device_dditable_t &deviceDdiTable,
|
||||
ze_command_queue_dditable_t cmdQueueDdiTable,
|
||||
ze_command_list_dditable_t &cmdListDdiTable,
|
||||
ze_mem_dditable_t &memDdiTable) {
|
||||
const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder
|
||||
char *heapBuffer = new char[allocSize];
|
||||
void *zeBuffer = nullptr;
|
||||
char stackBuffer[allocSize];
|
||||
|
||||
// Create command queue
|
||||
uint32_t numQueueGroups = 0;
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||
if (numQueueGroups == 0) {
|
||||
std::cout << "No queue groups found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||
queueProperties.data()));
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_queue_desc_t cmdQueueDesc = {
|
||||
ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY,
|
||||
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
|
||||
ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
|
||||
|
||||
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||
cmdQueueDesc.ordinal = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
|
||||
ze_command_list_handle_t cmdList;
|
||||
ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, nullptr};
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnCreate(context, device, &cmdListDesc, &cmdList));
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
|
||||
|
||||
setMemAllocDeviceParams(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer);
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer));
|
||||
|
||||
for (size_t i = 0; i < allocSize; ++i) {
|
||||
heapBuffer[i] = static_cast<char>(i + 1);
|
||||
}
|
||||
memset(stackBuffer, 0, allocSize);
|
||||
|
||||
// Copy from heap to device-allocated memory
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryCopy(cmdList, zeBuffer, heapBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Copy from device-allocated memory to stack
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
// Validate stack and xe buffers have the original data from heapBuffer
|
||||
validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize));
|
||||
|
||||
delete[] heapBuffer;
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnFree(context, zeBuffer));
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void testAppendMemoryCopy1(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet,
|
||||
ze_device_dditable_t &deviceDdiTable,
|
||||
ze_command_queue_dditable_t cmdQueueDdiTable,
|
||||
ze_command_list_dditable_t cmdListDdiTable,
|
||||
ze_mem_dditable_t &memDdiTable) {
|
||||
const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder
|
||||
char *hostBuffer;
|
||||
void *zeBuffer = nullptr;
|
||||
char stackBuffer[allocSize];
|
||||
|
||||
// Create command queue
|
||||
uint32_t numQueueGroups = 0;
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||
if (numQueueGroups == 0) {
|
||||
std::cout << "No queue groups found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||
queueProperties.data()));
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_queue_desc_t cmdQueueDesc = {
|
||||
ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY,
|
||||
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
|
||||
ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
|
||||
|
||||
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||
cmdQueueDesc.ordinal = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
|
||||
ze_command_list_handle_t cmdList;
|
||||
ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, nullptr};
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnCreate(context, device, &cmdListDesc, &cmdList));
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
|
||||
|
||||
setMemAllocHostParams(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer));
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer)));
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
|
||||
|
||||
setMemAllocDeviceParams(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer);
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer));
|
||||
|
||||
for (size_t i = 0; i < allocSize; ++i) {
|
||||
hostBuffer[i] = static_cast<char>(i + 1);
|
||||
}
|
||||
memset(stackBuffer, 0, allocSize);
|
||||
|
||||
// Copy from host-allocated to device-allocated memory
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryCopy(cmdList, zeBuffer, hostBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Copy from device-allocated memory to stack
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
// Validate stack and xe buffers have the original data from hostBuffer
|
||||
validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize));
|
||||
|
||||
//delete[] heapBuffer;
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnFree(context, hostBuffer));
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnFree(context, zeBuffer));
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void testAppendMemoryCopy2(ze_context_handle_t &context, ze_device_handle_t &device, bool &validRet,
|
||||
ze_device_dditable_t &deviceDdiTable,
|
||||
ze_command_queue_dditable_t cmdQueueDdiTable,
|
||||
ze_command_list_dditable_t cmdListDdiTable,
|
||||
ze_mem_dditable_t &memDdiTable) {
|
||||
validRet = true;
|
||||
|
||||
// Create command queue
|
||||
uint32_t numQueueGroups = 0;
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||
if (numQueueGroups == 0) {
|
||||
std::cout << "No queue groups found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||
queueProperties.data()));
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_queue_desc_t cmdQueueDesc = {
|
||||
ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC,
|
||||
nullptr,
|
||||
0,
|
||||
0,
|
||||
ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY,
|
||||
ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS,
|
||||
ZE_COMMAND_QUEUE_PRIORITY_NORMAL};
|
||||
|
||||
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||
cmdQueueDesc.ordinal = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
|
||||
ze_command_list_handle_t cmdList;
|
||||
ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC, nullptr};
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnCreate(context, device, &cmdListDesc, &cmdList));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
uint32_t dstWidth = verbose ? 16 : 1024; // width of the dst 2D buffer in bytes
|
||||
uint32_t dstHeight = verbose ? 32 : 512; // height of the dst 2D buffer in bytes
|
||||
uint32_t dstOriginX = verbose ? 8 : 128; // Offset in bytes
|
||||
uint32_t dstOriginY = verbose ? 8 : 144; // Offset in rows
|
||||
uint32_t dstSize = dstHeight * dstWidth; // Size of the dst buffer
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
uint32_t srcWidth = verbose ? 24 : 256; // width of the src 2D buffer in bytes
|
||||
uint32_t srcHeight = verbose ? 16 : 384; // height of the src 2D buffer in bytes
|
||||
uint32_t srcOriginX = verbose ? 4 : 64; // Offset in bytes
|
||||
uint32_t srcOriginY = verbose ? 4 : 128; // Offset in rows
|
||||
uint32_t srcSize = srcHeight * srcWidth; // Size of the src buffer
|
||||
|
||||
uint32_t width = verbose ? 8 : 144; // width of the region to copy
|
||||
uint32_t height = verbose ? 12 : 96; // height of the region to copy
|
||||
const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, 0, width, height, 0};
|
||||
const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, 0, width, height, 0};
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc0 = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
|
||||
ze_host_mem_alloc_desc_t hostDesc0 = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
|
||||
setMemAllocSharedParams(context, &deviceDesc0, &hostDesc0, srcSize, 1, device, &srcBuffer);
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnAllocShared(context, &deviceDesc0, &hostDesc0, srcSize, 1, device, &srcBuffer));
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc1 = {ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC};
|
||||
ze_host_mem_alloc_desc_t hostDesc1 = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC};
|
||||
setMemAllocSharedParams(context, &deviceDesc1, &hostDesc1, dstSize, 1, device, &dstBuffer);
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnAllocShared(context, &deviceDesc1, &hostDesc1, dstSize, 1, device, &dstBuffer));
|
||||
|
||||
// Initialize buffers
|
||||
uint8_t *stackBuffer = new uint8_t[srcSize];
|
||||
for (uint32_t i = 0; i < srcHeight; i++) {
|
||||
for (uint32_t j = 0; j < srcWidth; j++) {
|
||||
stackBuffer[i * srcWidth + j] = static_cast<uint8_t>(i * srcWidth + j);
|
||||
}
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryCopy(cmdList, srcBuffer, stackBuffer, srcSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
int value = 0;
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast<void *>(&value),
|
||||
sizeof(value), dstSize, nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Perform the copy
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, 0,
|
||||
const_cast<const void *>(srcBuffer), &srcRegion, srcWidth, 0,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
uint8_t *dstBufferChar = reinterpret_cast<uint8_t *>(dstBuffer);
|
||||
if (verbose) {
|
||||
std::cout << "stackBuffer\n";
|
||||
for (uint32_t i = 0; i < srcHeight; i++) {
|
||||
for (uint32_t j = 0; j < srcWidth; j++) {
|
||||
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(stackBuffer[i * srcWidth + j]) << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
std::cout << "dstBuffer\n";
|
||||
for (uint32_t i = 0; i < dstHeight; i++) {
|
||||
for (uint32_t j = 0; j < dstWidth; j++) {
|
||||
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(dstBufferChar[i * dstWidth + j]) << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth;
|
||||
uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth;
|
||||
for (uint32_t i = 0; i < height; i++) {
|
||||
for (uint32_t j = 0; j < width; j++) {
|
||||
uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth) + j];
|
||||
uint8_t srcVal = stackBuffer[srcOffset + (i * srcWidth) + j];
|
||||
if (dstVal != srcVal) {
|
||||
validRet = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] stackBuffer;
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnFree(context, srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(memDdiTable.pfnFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(cmdListDdiTable.pfnDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(cmdQueueDdiTable.pfnDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void setEnvironmentVariable(const char *variableName, const char *variableValue) {
|
||||
#ifdef _WIN64
|
||||
SetEnvironmentVariableA(variableName, variableValue);
|
||||
#else
|
||||
setenv(variableName, variableValue, 1);
|
||||
#endif
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
|
||||
setEnvironmentVariable("ZET_ENABLE_API_TRACING_EXP", "1");
|
||||
|
||||
ze_api_version_t apiVersion = ZE_API_VERSION_CURRENT;
|
||||
|
||||
ze_global_dditable_t globalDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetGlobalProcAddrTable(apiVersion, &globalDdiTable));
|
||||
|
||||
ze_driver_dditable_t driverDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetDriverProcAddrTable(apiVersion, &driverDdiTable));
|
||||
|
||||
ze_device_dditable_t deviceDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetDeviceProcAddrTable(apiVersion, &deviceDdiTable));
|
||||
|
||||
ze_context_dditable_t contextDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetContextProcAddrTable(apiVersion, &contextDdiTable));
|
||||
|
||||
ze_command_queue_dditable_t cmdQueueDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetCommandQueueProcAddrTable(apiVersion, &cmdQueueDdiTable));
|
||||
|
||||
ze_command_list_dditable_t cmdListDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetCommandListProcAddrTable(apiVersion, &cmdListDdiTable));
|
||||
|
||||
ze_mem_dditable_t memDdiTable;
|
||||
SUCCESS_OR_TERMINATE(zeGetMemProcAddrTable(apiVersion, &memDdiTable));
|
||||
|
||||
SUCCESS_OR_TERMINATE(globalDdiTable.pfnInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||
|
||||
uint32_t driverCount = 0;
|
||||
SUCCESS_OR_TERMINATE(driverDdiTable.pfnGet(&driverCount, nullptr));
|
||||
if (driverCount == 0)
|
||||
std::terminate();
|
||||
|
||||
ze_driver_handle_t driver;
|
||||
driverCount = 1;
|
||||
SUCCESS_OR_TERMINATE(driverDdiTable.pfnGet(&driverCount, &driver));
|
||||
|
||||
uint32_t deviceCount = 0;
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGet(driver, &deviceCount, nullptr));
|
||||
if (deviceCount == 0)
|
||||
std::terminate();
|
||||
|
||||
ze_device_handle_t device;
|
||||
deviceCount = 1;
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGet(driver, &deviceCount, &device));
|
||||
|
||||
ze_context_handle_t context;
|
||||
ze_context_desc_t contextDesc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
|
||||
SUCCESS_OR_TERMINATE(contextDdiTable.pfnCreate(driver, &contextDesc, &context));
|
||||
|
||||
zet_tracer_exp_desc_t tracerDesc = {ZET_STRUCTURE_TYPE_TRACER_EXP_DESC, nullptr, &tracerData0};
|
||||
zet_tracer_exp_handle_t tracer;
|
||||
SUCCESS_OR_TERMINATE(zetTracerExpCreate(context, &tracerDesc, &tracer));
|
||||
|
||||
ze_callbacks_t prologCbs = {};
|
||||
prologCbs.Global.pfnInitCb = onEnterInit;
|
||||
prologCbs.Driver.pfnGetCb = onEnterDriverGet;
|
||||
prologCbs.Mem.pfnAllocDeviceCb = onEnterMemAllocDevice;
|
||||
prologCbs.Mem.pfnAllocHostCb = onEnterMemAllocHost;
|
||||
prologCbs.Mem.pfnAllocSharedCb = onEnterMemAllocShared;
|
||||
SUCCESS_OR_TERMINATE(zetTracerExpSetPrologues(tracer, &prologCbs));
|
||||
|
||||
ze_callbacks_t epilogCbs = {};
|
||||
epilogCbs.Global.pfnInitCb = onExitInit;
|
||||
epilogCbs.Driver.pfnGetCb = onExitDriverGet;
|
||||
epilogCbs.Mem.pfnAllocDeviceCb = onExitMemAllocDevice;
|
||||
epilogCbs.Mem.pfnAllocHostCb = onExitMemAllocHost;
|
||||
epilogCbs.Mem.pfnAllocSharedCb = onExitMemAllocShared;
|
||||
SUCCESS_OR_TERMINATE(zetTracerExpSetEpilogues(tracer, &epilogCbs));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zetTracerExpSetEnabled(tracer, true));
|
||||
|
||||
setInitParams(ZE_INIT_FLAG_GPU_ONLY);
|
||||
SUCCESS_OR_TERMINATE(globalDdiTable.pfnInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||
|
||||
ze_driver_handle_t driverTest;
|
||||
setDriverGetParams(&driverCount, &driverTest);
|
||||
SUCCESS_OR_TERMINATE(driverDdiTable.pfnGet(&driverCount, &driverTest));
|
||||
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
SUCCESS_OR_TERMINATE(deviceDdiTable.pfnGetProperties(device, &deviceProperties));
|
||||
printDeviceProperties(deviceProperties);
|
||||
|
||||
bool outputValidationSuccessful;
|
||||
testAppendMemoryCopy0(context, device, outputValidationSuccessful,
|
||||
deviceDdiTable, cmdQueueDdiTable, cmdListDdiTable, memDdiTable);
|
||||
if (outputValidationSuccessful) {
|
||||
testAppendMemoryCopy1(context, device, outputValidationSuccessful,
|
||||
deviceDdiTable, cmdQueueDdiTable, cmdListDdiTable, memDdiTable);
|
||||
}
|
||||
if (outputValidationSuccessful) {
|
||||
testAppendMemoryCopy2(context, device, outputValidationSuccessful,
|
||||
deviceDdiTable, cmdQueueDdiTable, cmdListDdiTable, memDdiTable);
|
||||
}
|
||||
|
||||
/* tear down tracing environemt and test epilog/prolg counts */
|
||||
SUCCESS_OR_TERMINATE(zetTracerExpSetEnabled(tracer, false));
|
||||
SUCCESS_OR_TERMINATE(zetTracerExpDestroy(tracer));
|
||||
|
||||
if (verbose) {
|
||||
std::cout << "initCount: " << initCount
|
||||
<< " initPrologCount: " << initPrologCount
|
||||
<< " initEpilogCount: " << initEpilogCount
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "driverGetCount: " << driverGetCount
|
||||
<< " driverGetPrologCount: " << driverGetPrologCount
|
||||
<< " driverGetEpilogCount: " << driverGetEpilogCount
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "memAllocDeviceCount: " << memAllocDeviceCount
|
||||
<< " memAllocDevicePrologCount: " << memAllocDevicePrologCount
|
||||
<< " memAllocDeviceEpilogCount: " << memAllocDeviceEpilogCount
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "memAllocHostCount: " << memAllocHostCount
|
||||
<< " memAllocHostPrologCount: " << memAllocHostPrologCount
|
||||
<< " memAllocHostEpilogCount: " << memAllocHostEpilogCount
|
||||
<< std::endl;
|
||||
|
||||
std::cout << "memAllocSharedCount: " << memAllocSharedCount
|
||||
<< " memAllocSharedPrologCount: " << memAllocSharedPrologCount
|
||||
<< " memAllocSharedEpilogCount: " << memAllocSharedEpilogCount
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE_BOOL((initCount == initPrologCount) &&
|
||||
(initCount == initEpilogCount));
|
||||
SUCCESS_OR_TERMINATE_BOOL((driverGetCount == driverGetPrologCount) &&
|
||||
(driverGetCount == driverGetEpilogCount));
|
||||
SUCCESS_OR_TERMINATE_BOOL((memAllocDeviceCount == memAllocDevicePrologCount) &&
|
||||
(memAllocDeviceCount == memAllocDeviceEpilogCount));
|
||||
SUCCESS_OR_TERMINATE_BOOL((memAllocHostCount == memAllocHostPrologCount) &&
|
||||
(memAllocHostCount == memAllocHostEpilogCount));
|
||||
SUCCESS_OR_TERMINATE_BOOL((memAllocSharedCount == memAllocSharedPrologCount) &&
|
||||
(memAllocSharedCount == memAllocSharedEpilogCount));
|
||||
|
||||
SUCCESS_OR_TERMINATE(contextDdiTable.pfnDestroy(context));
|
||||
|
||||
bool aubMode = isAubMode(argc, argv);
|
||||
if (aubMode == false) {
|
||||
std::cout << "\nZello Copy Tracing Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED")
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
int resultOnFailure = aubMode ? 0 : 1;
|
||||
return outputValidationSuccessful ? 0 : resultOnFailure;
|
||||
}
|
Reference in New Issue
Block a user