mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Add zello_copy black_box test
Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
6b2cae1909
commit
7883fa331a
@@ -12,8 +12,11 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||
zello_world_jitc_ocloc
|
||||
zello_ipc_copy_dma_buf
|
||||
zello_ipc_copy_dma_buf_p2p
|
||||
zello_copy
|
||||
)
|
||||
|
||||
include_directories(common)
|
||||
|
||||
foreach(TEST_NAME ${TEST_TARGETS})
|
||||
if(MSVC)
|
||||
if(${TEST_NAME} STREQUAL "zello_ipc_copy_dma_buf")
|
||||
|
||||
137
level_zero/core/test/black_box_tests/common/zello_common.h
Normal file
137
level_zero/core/test/black_box_tests/common/zello_common.h
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
extern bool verbose;
|
||||
|
||||
template <bool TerminateOnFailure, typename ResulT>
|
||||
inline void validate(ResulT result, const char *message) {
|
||||
if (result == ZE_RESULT_SUCCESS) {
|
||||
if (verbose) {
|
||||
std::cerr << "SUCCESS : " << message << std::endl;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
if (TerminateOnFailure) {
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
|
||||
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
|
||||
#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate<true>(!(FLAG), #FLAG)
|
||||
#define SUCCESS_OR_WARNING(CALL) validate<false>(CALL, #CALL)
|
||||
#define SUCCESS_OR_WARNING_BOOL(FLAG) validate<false>(!(FLAG), #FLAG)
|
||||
|
||||
inline bool isParamEnabled(int argc, char *argv[], const char *shortName, const char *longName) {
|
||||
char **arg = &argv[1];
|
||||
char **argE = &argv[argc];
|
||||
|
||||
for (; arg != argE; ++arg) {
|
||||
if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
inline bool isVerbose(int argc, char *argv[]) {
|
||||
bool enabled = isParamEnabled(argc, argv, "-v", "--verbose");
|
||||
if (enabled == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::cerr << "Verbose mode detected";
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint32_t getCommandQueueOrdinal(ze_device_handle_t &device) {
|
||||
uint32_t numQueueGroups = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||
if (numQueueGroups == 0) {
|
||||
std::cout << "No queue groups found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||
queueProperties.data()));
|
||||
uint32_t computeQueueGroupOrdinal = numQueueGroups;
|
||||
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||
computeQueueGroupOrdinal = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return computeQueueGroupOrdinal;
|
||||
}
|
||||
|
||||
ze_result_t createCommandQueue(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue) {
|
||||
ze_command_queue_desc_t descriptor = {};
|
||||
descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
|
||||
|
||||
descriptor.pNext = nullptr;
|
||||
descriptor.flags = 0;
|
||||
descriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;
|
||||
descriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
|
||||
descriptor.ordinal = getCommandQueueOrdinal(device);
|
||||
descriptor.index = 0;
|
||||
return zeCommandQueueCreate(context, device, &descriptor, &cmdQueue);
|
||||
}
|
||||
|
||||
ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList) {
|
||||
ze_command_list_desc_t descriptor = {};
|
||||
descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
|
||||
|
||||
descriptor.pNext = nullptr;
|
||||
descriptor.flags = 0;
|
||||
descriptor.commandQueueGroupOrdinal = getCommandQueueOrdinal(device);
|
||||
|
||||
return zeCommandListCreate(context, device, &descriptor, &cmdList);
|
||||
}
|
||||
|
||||
ze_device_handle_t zelloInitContextAndGetDevices(ze_context_handle_t &context) {
|
||||
SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||
|
||||
uint32_t driverCount = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr));
|
||||
if (driverCount == 0) {
|
||||
std::cout << "No driver handle found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
ze_driver_handle_t driverHandle;
|
||||
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle));
|
||||
ze_context_desc_t context_desc = {};
|
||||
context_desc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC;
|
||||
SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &context_desc, &context));
|
||||
|
||||
uint32_t deviceCount = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr));
|
||||
if (deviceCount == 0) {
|
||||
std::cout << "No device found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_device_handle_t> devices(deviceCount, nullptr);
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, devices.data()));
|
||||
return devices[0];
|
||||
}
|
||||
474
level_zero/core/test/black_box_tests/zello_copy.cpp
Normal file
474
level_zero/core/test/black_box_tests/zello_copy.cpp
Normal file
@@ -0,0 +1,474 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <iomanip>
|
||||
|
||||
extern bool verbose;
|
||||
bool verbose = false;
|
||||
|
||||
void testAppendMemoryCopyFromHeapToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
|
||||
const size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder
|
||||
char *heapBuffer = new char[allocSize];
|
||||
void *zeBuffer = nullptr;
|
||||
char stackBuffer[allocSize];
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
deviceDesc.ordinal = 0;
|
||||
deviceDesc.flags = 0;
|
||||
deviceDesc.pNext = nullptr;
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer));
|
||||
|
||||
for (size_t i = 0; i < allocSize; ++i) {
|
||||
heapBuffer[i] = static_cast<char>(i + 1);
|
||||
}
|
||||
memset(stackBuffer, 0, allocSize);
|
||||
|
||||
// Copy from heap to device-allocated memory
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, heapBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Copy from device-allocated memory to stack
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
|
||||
|
||||
// Validate stack and ze buffers have the original data from heapBuffer
|
||||
validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize));
|
||||
|
||||
delete[] heapBuffer;
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void testAppendMemoryCopyFromHostToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
|
||||
const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder
|
||||
char *hostBuffer;
|
||||
void *zeBuffer = nullptr;
|
||||
char stackBuffer[allocSize];
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
hostDesc.pNext = nullptr;
|
||||
hostDesc.flags = 0;
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer)));
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
deviceDesc.ordinal = 0;
|
||||
deviceDesc.flags = 0;
|
||||
deviceDesc.pNext = nullptr;
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer));
|
||||
|
||||
for (size_t i = 0; i < allocSize; ++i) {
|
||||
hostBuffer[i] = static_cast<char>(i + 1);
|
||||
}
|
||||
memset(stackBuffer, 0, allocSize);
|
||||
|
||||
// Copy from host-allocated to device-allocated memory
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, hostBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Copy from device-allocated memory to stack
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
|
||||
|
||||
// Validate stack and ze buffers have the original data from hostBuffer
|
||||
validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void testAppendMemoryCopy2DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
|
||||
validRet = true;
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
uint32_t dstWidth = verbose ? 16 : 256; // width of the dst 2D buffer in bytes
|
||||
uint32_t dstHeight = verbose ? 32 : 128; // height of the dst 2D buffer in bytes
|
||||
uint32_t dstOriginX = verbose ? 8 : 32; // Offset in bytes
|
||||
uint32_t dstOriginY = verbose ? 8 : 64; // Offset in rows
|
||||
uint32_t dstSize = dstHeight * dstWidth; // Size of the dst buffer
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
uint32_t srcWidth = verbose ? 24 : 128; // width of the src 2D buffer in bytes
|
||||
uint32_t srcHeight = verbose ? 16 : 96; // height of the src 2D buffer in bytes
|
||||
uint32_t srcOriginX = verbose ? 4 : 16; // Offset in bytes
|
||||
uint32_t srcOriginY = verbose ? 4 : 32; // Offset in rows
|
||||
uint32_t srcSize = srcHeight * srcWidth; // Size of the src buffer
|
||||
|
||||
uint32_t width = verbose ? 8 : 64; // width of the region to copy
|
||||
uint32_t height = verbose ? 12 : 32; // height of the region to copy
|
||||
const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, 0, width, height, 0};
|
||||
const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, 0, width, height, 0};
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
deviceDesc.pNext = nullptr;
|
||||
deviceDesc.ordinal = 0;
|
||||
deviceDesc.flags = 0;
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
hostDesc.pNext = nullptr;
|
||||
hostDesc.flags = 0;
|
||||
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
srcSize, 1, device, &srcBuffer));
|
||||
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
dstSize, 1, device, &dstBuffer));
|
||||
|
||||
// Initialize buffers
|
||||
// dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device
|
||||
// and there's no need to perform explicit copies
|
||||
uint8_t *srcBufferChar = reinterpret_cast<uint8_t *>(srcBuffer);
|
||||
for (uint32_t i = 0; i < srcHeight; i++) {
|
||||
for (uint32_t j = 0; j < srcWidth; j++) {
|
||||
srcBufferChar[i * srcWidth + j] = static_cast<uint8_t>(i * srcWidth + j);
|
||||
}
|
||||
}
|
||||
|
||||
int value = 0;
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast<void *>(&value),
|
||||
sizeof(value), dstSize, nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Perform the copy
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, 0,
|
||||
const_cast<const void *>(srcBuffer), &srcRegion, srcWidth, 0,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
|
||||
|
||||
uint8_t *dstBufferChar = reinterpret_cast<uint8_t *>(dstBuffer);
|
||||
if (verbose) {
|
||||
std::cout << "srcBufferChar\n";
|
||||
for (uint32_t i = 0; i < srcHeight; i++) {
|
||||
for (uint32_t j = 0; j < srcWidth; j++) {
|
||||
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(srcBufferChar[i * srcWidth + j]) << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
std::cout << "dstBuffer\n";
|
||||
for (uint32_t i = 0; i < dstHeight; i++) {
|
||||
for (uint32_t j = 0; j < dstWidth; j++) {
|
||||
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(dstBufferChar[i * dstWidth + j]) << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth;
|
||||
uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth;
|
||||
for (uint32_t i = 0; i < height; i++) {
|
||||
for (uint32_t j = 0; j < width; j++) {
|
||||
uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth) + j];
|
||||
uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth) + j];
|
||||
if (dstVal != srcVal) {
|
||||
validRet = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void testAppendMemoryFillWithSomePattern(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
|
||||
const size_t allocSize = 4096 + 7;
|
||||
|
||||
char pattern0 = 5;
|
||||
const size_t pattern1Size = 9;
|
||||
char *pattern1 = new char[pattern1Size];
|
||||
void *zeBuffer0 = nullptr;
|
||||
void *zeBuffer1 = nullptr;
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
|
||||
// Initialize buffers
|
||||
// zeBuffer0 and zeBuffer1 are shared allocations, so they have UVA between host and device
|
||||
// and there's no need to perform explicit copies
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
deviceDesc.pNext = nullptr;
|
||||
deviceDesc.ordinal = 0;
|
||||
deviceDesc.flags = 0;
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
hostDesc.pNext = nullptr;
|
||||
hostDesc.flags = 0;
|
||||
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
allocSize, 1, device, &zeBuffer0));
|
||||
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
allocSize, 1, device, &zeBuffer1));
|
||||
|
||||
// Fibonacci
|
||||
pattern1[0] = 1;
|
||||
pattern1[1] = 2;
|
||||
for (size_t i = 2; i < pattern1Size; i++) {
|
||||
pattern1[i] = pattern1[i - 1] + pattern1[i - 2];
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer0, &pattern0, sizeof(pattern0), allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer1, pattern1, pattern1Size, allocSize,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
|
||||
|
||||
validRet = true;
|
||||
uint8_t *zeBufferChar0 = reinterpret_cast<uint8_t *>(zeBuffer0);
|
||||
for (size_t i = 0; i < allocSize; ++i) {
|
||||
if (zeBufferChar0[i] != pattern0) {
|
||||
validRet = false;
|
||||
if (verbose) {
|
||||
std::cout << "dstBufferChar0[" << i << " ] "
|
||||
<< static_cast<unsigned int>(zeBufferChar0[i])
|
||||
<< "!= pattern0 " << pattern0 << "\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (validRet == true) {
|
||||
uint8_t *zeBufferChar1 = reinterpret_cast<uint8_t *>(zeBuffer1);
|
||||
size_t j = 0;
|
||||
for (size_t i = 0; i < allocSize; i++) {
|
||||
if (zeBufferChar1[i] != pattern1[j]) {
|
||||
validRet = false;
|
||||
if (verbose) {
|
||||
std::cout << "dstBufferChar1[" << i << " ] "
|
||||
<< static_cast<unsigned int>(zeBufferChar1[i])
|
||||
<< "!= pattern1[" << j << " ] "
|
||||
<< static_cast<unsigned int>(pattern1[j]) << "\n";
|
||||
}
|
||||
break;
|
||||
}
|
||||
j++;
|
||||
if (j >= pattern1Size) {
|
||||
j = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] pattern1;
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer0));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer1));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
void testAppendMemoryCopy3DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
|
||||
validRet = true;
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
uint32_t dstWidth = verbose ? 8 : 64; // width of the dst 3D buffer in bytes
|
||||
uint32_t dstHeight = verbose ? 8 : 64; // height of the dst 3D buffer in bytes
|
||||
uint32_t dstDepth = verbose ? 2 : 4; // depth of the dst 3D buffer in bytes
|
||||
uint32_t dstOriginX = 0; // Offset in bytes
|
||||
uint32_t dstOriginY = 0; // Offset in rows
|
||||
uint32_t dstOriginZ = 0; // Offset in rows
|
||||
uint32_t dstSize = dstHeight * dstWidth * dstDepth; // Size of the dst buffer
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
uint32_t srcWidth = verbose ? 8 : 64; // width of the src 3D buffer in bytes
|
||||
uint32_t srcHeight = verbose ? 8 : 64; // height of the src 3D buffer in bytes
|
||||
uint32_t srcDepth = verbose ? 2 : 4; // depth of the src 3D buffer in bytes
|
||||
uint32_t srcOriginX = 0; // Offset in bytes
|
||||
uint32_t srcOriginY = 0; // Offset in rows
|
||||
uint32_t srcOriginZ = 0; // Offset in rows
|
||||
uint32_t srcSize = srcHeight * srcWidth * srcDepth; // Size of the src buffer
|
||||
|
||||
uint32_t width = verbose ? 8 : 64; // width of the region to copy
|
||||
uint32_t height = verbose ? 8 : 64; // height of the region to copy
|
||||
uint32_t depth = verbose ? 2 : 4; // height of the region to copy
|
||||
const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, dstOriginZ, width, height, depth};
|
||||
const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, dstOriginZ, width, height, depth};
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
deviceDesc.pNext = nullptr;
|
||||
deviceDesc.ordinal = 0;
|
||||
deviceDesc.flags = 0;
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
hostDesc.pNext = nullptr;
|
||||
hostDesc.flags = 0;
|
||||
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
srcSize, 1, device, &srcBuffer));
|
||||
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
dstSize, 1, device, &dstBuffer));
|
||||
|
||||
// Initialize buffers
|
||||
// dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device
|
||||
// and there's no need to perform explicit copies
|
||||
uint8_t *srcBufferChar = reinterpret_cast<uint8_t *>(srcBuffer);
|
||||
for (uint32_t i = 0; i < srcDepth; i++) {
|
||||
for (uint32_t j = 0; j < srcHeight; j++) {
|
||||
for (uint32_t k = 0; k < srcWidth; k++) {
|
||||
size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k;
|
||||
srcBufferChar[index] = static_cast<uint8_t>(index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int value = 0;
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast<void *>(&value),
|
||||
sizeof(value), dstSize, nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
|
||||
|
||||
// Perform the copy
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, (dstWidth * dstHeight),
|
||||
const_cast<const void *>(srcBuffer), &srcRegion, srcWidth, (srcWidth * srcHeight),
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
|
||||
|
||||
uint8_t *dstBufferChar = reinterpret_cast<uint8_t *>(dstBuffer);
|
||||
if (verbose) {
|
||||
std::cout << "srcBufferChar\n";
|
||||
for (uint32_t i = 0; i < srcDepth; i++) {
|
||||
for (uint32_t j = 0; j < srcHeight; j++) {
|
||||
for (uint32_t k = 0; k < srcWidth; k++) {
|
||||
size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k;
|
||||
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(srcBufferChar[index]) << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
|
||||
std::cout << "dstBuffer\n";
|
||||
for (uint32_t i = 0; i < dstDepth; i++) {
|
||||
for (uint32_t j = 0; j < dstHeight; j++) {
|
||||
for (uint32_t k = 0; k < dstWidth; k++) {
|
||||
size_t index = (i * dstWidth * dstHeight) + (j * dstWidth) + k;
|
||||
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(dstBufferChar[index]) << " ";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
std::cout << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth + dstOriginZ * dstDepth * dstWidth;
|
||||
uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth + srcOriginZ * srcDepth * srcWidth;
|
||||
for (uint32_t i = 0; i < depth; i++) {
|
||||
for (uint32_t j = 0; j < height; j++) {
|
||||
for (uint32_t k = 0; k < width; k++) {
|
||||
uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth * dstHeight) + (j * dstWidth) + k];
|
||||
uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth * srcHeight) + (j * srcWidth) + k];
|
||||
if (dstVal != srcVal) {
|
||||
validRet = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
|
||||
ze_context_handle_t context = nullptr;
|
||||
auto device = zelloInitContextAndGetDevices(context);
|
||||
bool outputValidationSuccessful = false;
|
||||
|
||||
if (verbose) {
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
|
||||
std::cout << deviceProperties.name << std::endl;
|
||||
}
|
||||
testAppendMemoryCopyFromHeapToDeviceToStack(context, device, outputValidationSuccessful);
|
||||
if (outputValidationSuccessful)
|
||||
testAppendMemoryCopyFromHostToDeviceToStack(context, device, outputValidationSuccessful);
|
||||
if (outputValidationSuccessful)
|
||||
testAppendMemoryCopy2DRegion(context, device, outputValidationSuccessful);
|
||||
if (outputValidationSuccessful)
|
||||
testAppendMemoryFillWithSomePattern(context, device, outputValidationSuccessful);
|
||||
if (outputValidationSuccessful)
|
||||
testAppendMemoryCopy3DRegion(context, device, outputValidationSuccessful);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
|
||||
std::cout << "\nZello Copy Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
|
||||
return outputValidationSuccessful;
|
||||
}
|
||||
@@ -5,36 +5,15 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
|
||||
template <bool TerminateOnFailure, typename ResulT>
|
||||
inline void validate(ResulT result, const char *message) {
|
||||
if (result == ZE_RESULT_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (TerminateOnFailure) {
|
||||
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
|
||||
<< std::endl;
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
|
||||
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
|
||||
#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate<true>(!(FLAG), #FLAG)
|
||||
#define SUCCESS_OR_WARNING(CALL) validate<false>(CALL, #CALL)
|
||||
#define SUCCESS_OR_WARNING_BOOL(FLAG) validate<false>(!(FLAG), #FLAG)
|
||||
extern bool verbose;
|
||||
bool verbose = false;
|
||||
|
||||
size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder
|
||||
|
||||
@@ -270,6 +249,7 @@ void run_server(int commSocket, bool &validRet) {
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
bool outputValidationSuccessful;
|
||||
|
||||
int sv[2];
|
||||
|
||||
@@ -5,36 +5,15 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
|
||||
template <bool TerminateOnFailure, typename ResulT>
|
||||
inline void validate(ResulT result, const char *message) {
|
||||
if (result == ZE_RESULT_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (TerminateOnFailure) {
|
||||
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
|
||||
<< std::endl;
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
|
||||
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
|
||||
#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate<true>(!(FLAG), #FLAG)
|
||||
#define SUCCESS_OR_WARNING(CALL) validate<false>(CALL, #CALL)
|
||||
#define SUCCESS_OR_WARNING_BOOL(FLAG) validate<false>(!(FLAG), #FLAG)
|
||||
extern bool verbose;
|
||||
bool verbose = false;
|
||||
|
||||
uint8_t uinitializedPattern = 1;
|
||||
uint8_t expectedPattern = 7;
|
||||
@@ -298,6 +277,7 @@ void run_server(int commSocket, bool &validRet) {
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
bool outputValidationSuccessful;
|
||||
|
||||
int sv[2];
|
||||
|
||||
@@ -5,15 +5,10 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
extern bool verbose;
|
||||
bool verbose = false;
|
||||
|
||||
inline std::vector<uint8_t> loadBinaryFile(const std::string &filePath) {
|
||||
std::ifstream stream(filePath, std::ios::in);
|
||||
@@ -31,22 +26,6 @@ inline std::vector<uint8_t> loadBinaryFile(const std::string &filePath) {
|
||||
return binary_file;
|
||||
}
|
||||
|
||||
template <bool TerminateOnFailure, typename ResulT>
|
||||
inline void validate(ResulT result, const char *message) {
|
||||
if (result == 0) { // assumption 0 is success
|
||||
std::cerr << "SUCCESS : " << message << std::endl;
|
||||
return;
|
||||
}
|
||||
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
|
||||
<< std::endl;
|
||||
|
||||
if (TerminateOnFailure) {
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
|
||||
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
|
||||
|
||||
void createCmdQueueAndCmdList(ze_context_handle_t &context,
|
||||
ze_device_handle_t &device,
|
||||
ze_command_queue_handle_t &cmdQueue,
|
||||
@@ -270,7 +249,6 @@ bool testKernelTimestampHostQuery(ze_context_handle_t &context,
|
||||
}
|
||||
|
||||
bool testKernelTimestampApendQuery(ze_context_handle_t &context,
|
||||
ze_driver_handle_t &driver,
|
||||
ze_device_handle_t &device) {
|
||||
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
@@ -385,42 +363,26 @@ void printResult(bool result, std::string ¤tTest) {
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
ze_context_handle_t context = nullptr;
|
||||
auto device = zelloInitContextAndGetDevices(context);
|
||||
|
||||
if (verbose) {
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
|
||||
std::cout << deviceProperties.name << std::endl;
|
||||
std::cout << "Device : \n"
|
||||
<< " * name : " << deviceProperties.name << "\n"
|
||||
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
|
||||
}
|
||||
|
||||
bool result;
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||
|
||||
uint32_t driverCount = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr));
|
||||
if (driverCount == 0)
|
||||
std::terminate();
|
||||
|
||||
ze_driver_handle_t driverHandle;
|
||||
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle));
|
||||
|
||||
ze_context_desc_t contextDesc = {};
|
||||
ze_context_handle_t context;
|
||||
SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context));
|
||||
|
||||
uint32_t deviceCount = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr));
|
||||
if (deviceCount == 0)
|
||||
std::terminate();
|
||||
|
||||
ze_device_handle_t device;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, &device));
|
||||
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
|
||||
std::cout << "Device : \n"
|
||||
<< " * name : " << deviceProperties.name << "\n"
|
||||
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
|
||||
<< " * vendorId : " << deviceProperties.vendorId << "\n";
|
||||
|
||||
std::string currentTest;
|
||||
|
||||
currentTest = "Test Append Write of Global Timestamp";
|
||||
result = testKernelTimestampApendQuery(context, driverHandle, device);
|
||||
result = testKernelTimestampApendQuery(context, device);
|
||||
printResult(result, currentTest);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
|
||||
|
||||
return result ? 0 : 1;
|
||||
|
||||
@@ -5,86 +5,21 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#define VALIDATECALL(myZeCall) \
|
||||
do { \
|
||||
if (myZeCall != ZE_RESULT_SUCCESS) { \
|
||||
std::cout << "Error at " \
|
||||
<< #myZeCall << ": " \
|
||||
<< __FUNCTION__ << ": " \
|
||||
<< __LINE__ << "\n"; \
|
||||
std::terminate(); \
|
||||
} \
|
||||
} while (0);
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Initialize driver
|
||||
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||
|
||||
// Retrieve driver
|
||||
uint32_t driverCount = 0;
|
||||
VALIDATECALL(zeDriverGet(&driverCount, nullptr));
|
||||
|
||||
ze_driver_handle_t driverHandle;
|
||||
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle));
|
||||
|
||||
ze_context_desc_t contextDesc = {};
|
||||
ze_context_handle_t context;
|
||||
VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context));
|
||||
|
||||
// Retrieve device
|
||||
uint32_t deviceCount = 0;
|
||||
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr));
|
||||
|
||||
ze_device_handle_t device;
|
||||
deviceCount = 1;
|
||||
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device));
|
||||
|
||||
// Print some properties
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties));
|
||||
|
||||
std::cout << "Device : \n"
|
||||
<< " * name : " << deviceProperties.name << "\n"
|
||||
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
|
||||
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
|
||||
|
||||
// Create command queue
|
||||
uint32_t numQueueGroups = 0;
|
||||
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||
if (numQueueGroups == 0) {
|
||||
std::cout << "No queue groups found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||
queueProperties.data()));
|
||||
extern bool verbose;
|
||||
bool verbose = false;
|
||||
|
||||
void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) {
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_queue_desc_t cmdQueueDesc = {};
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||
cmdQueueDesc.ordinal = i;
|
||||
}
|
||||
}
|
||||
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device);
|
||||
cmdQueueDesc.index = 0;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
|
||||
// Create command list
|
||||
ze_command_list_handle_t cmdList;
|
||||
ze_command_list_desc_t cmdListDesc = {};
|
||||
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
|
||||
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
|
||||
// Create two shared buffers
|
||||
constexpr size_t allocSize = 4096;
|
||||
@@ -96,10 +31,10 @@ int main(int argc, char *argv[]) {
|
||||
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
|
||||
|
||||
// Initialize memory
|
||||
constexpr uint8_t val = 55;
|
||||
@@ -136,46 +71,46 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
free(strLog);
|
||||
}
|
||||
VALIDATECALL(zeModuleBuildLogDestroy(buildlog));
|
||||
SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog));
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "CopyBufferToBufferBytes";
|
||||
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel));
|
||||
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
|
||||
|
||||
uint32_t groupSizeX = 32u;
|
||||
uint32_t groupSizeY = 1u;
|
||||
uint32_t groupSizeZ = 1u;
|
||||
VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
|
||||
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
|
||||
|
||||
uint32_t offset = 0;
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = allocSize / groupSizeX;
|
||||
dispatchTraits.groupCountY = 1u;
|
||||
dispatchTraits.groupCountZ = 1u;
|
||||
|
||||
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
|
||||
nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
|
||||
nullptr, 0, nullptr));
|
||||
file.close();
|
||||
} else {
|
||||
// Perform a GPU copy
|
||||
VALIDATECALL(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr));
|
||||
}
|
||||
|
||||
// Close list and submit for execution
|
||||
VALIDATECALL(zeCommandListClose(cmdList));
|
||||
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
|
||||
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
// Validate
|
||||
bool outputValidationSuccessful = true;
|
||||
outputValidationSuccessful = true;
|
||||
if (memcmp(dstBuffer, srcBuffer, allocSize)) {
|
||||
outputValidationSuccessful = false;
|
||||
uint8_t *srcCharBuffer = static_cast<uint8_t *>(srcBuffer);
|
||||
@@ -190,11 +125,30 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
VALIDATECALL(zeMemFree(context, dstBuffer));
|
||||
VALIDATECALL(zeMemFree(context, srcBuffer));
|
||||
VALIDATECALL(zeCommandListDestroy(cmdList));
|
||||
VALIDATECALL(zeCommandQueueDestroy(cmdQueue));
|
||||
VALIDATECALL(zeContextDestroy(context));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
ze_context_handle_t context = nullptr;
|
||||
auto device = zelloInitContextAndGetDevices(context);
|
||||
bool outputValidationSuccessful;
|
||||
|
||||
if (verbose) {
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
|
||||
std::cout << deviceProperties.name << std::endl;
|
||||
std::cout << "Device : \n"
|
||||
<< " * name : " << deviceProperties.name << "\n"
|
||||
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
|
||||
}
|
||||
|
||||
executeGpuKernelAndValidate(context, device, outputValidationSuccessful);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
|
||||
|
||||
std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
|
||||
|
||||
|
||||
@@ -7,26 +7,12 @@
|
||||
|
||||
#include "shared/offline_compiler/source/ocloc_api.h"
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include "zello_common.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
|
||||
#define VALIDATECALL(myZeCall) \
|
||||
do { \
|
||||
if (myZeCall != ZE_RESULT_SUCCESS) { \
|
||||
std::cout << "Error at " \
|
||||
<< #myZeCall << ": " \
|
||||
<< __FUNCTION__ << ": " \
|
||||
<< __LINE__ << "\n"; \
|
||||
std::terminate(); \
|
||||
} \
|
||||
} while (0);
|
||||
extern bool verbose;
|
||||
bool verbose = false;
|
||||
|
||||
const char *module = R"===(
|
||||
__kernel void kernel_copy(__global int *dst, __global char *src){
|
||||
@@ -85,67 +71,16 @@ std::vector<uint8_t> compileToSpirV(const std::string &src, const std::string &o
|
||||
return ret;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
// Initialize driver
|
||||
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||
|
||||
// Retrieve driver
|
||||
uint32_t driverCount = 0;
|
||||
VALIDATECALL(zeDriverGet(&driverCount, nullptr));
|
||||
|
||||
ze_driver_handle_t driverHandle;
|
||||
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle));
|
||||
|
||||
ze_context_desc_t contextDesc = {};
|
||||
ze_context_handle_t context;
|
||||
VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context));
|
||||
|
||||
// Retrieve device
|
||||
uint32_t deviceCount = 0;
|
||||
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr));
|
||||
|
||||
ze_device_handle_t device;
|
||||
deviceCount = 1;
|
||||
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device));
|
||||
|
||||
// Print some properties
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties));
|
||||
|
||||
std::cout << "Device : \n"
|
||||
<< " * name : " << deviceProperties.name << "\n"
|
||||
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
|
||||
<< " * vendorId : " << std::hex << deviceProperties.vendorId << std::dec << "\n";
|
||||
|
||||
// Create command queue
|
||||
uint32_t numQueueGroups = 0;
|
||||
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||
if (numQueueGroups == 0) {
|
||||
std::cout << "No queue groups found!\n";
|
||||
std::terminate();
|
||||
}
|
||||
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||
queueProperties.data()));
|
||||
|
||||
void executeKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) {
|
||||
ze_command_queue_handle_t cmdQueue;
|
||||
ze_command_queue_desc_t cmdQueueDesc = {};
|
||||
ze_command_list_handle_t cmdList;
|
||||
|
||||
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||
cmdQueueDesc.ordinal = i;
|
||||
}
|
||||
}
|
||||
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device);
|
||||
cmdQueueDesc.index = 0;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
|
||||
// Create command list
|
||||
ze_command_list_handle_t cmdList;
|
||||
ze_command_list_desc_t cmdListDesc = {};
|
||||
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
|
||||
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
|
||||
// Create two shared buffers
|
||||
constexpr size_t allocSize = 4096;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
@@ -156,10 +91,10 @@ int main(int argc, char *argv[]) {
|
||||
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||
|
||||
void *srcBuffer = nullptr;
|
||||
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
|
||||
|
||||
void *dstBuffer = nullptr;
|
||||
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
|
||||
|
||||
// Initialize memory
|
||||
constexpr uint8_t val = 55;
|
||||
@@ -171,7 +106,7 @@ int main(int argc, char *argv[]) {
|
||||
if (buildLog.size() > 0) {
|
||||
std::cout << "Build log " << buildLog;
|
||||
}
|
||||
VALIDATECALL((0 == spirV.size()));
|
||||
SUCCESS_OR_TERMINATE((0 == spirV.size()));
|
||||
|
||||
ze_module_handle_t module = nullptr;
|
||||
ze_kernel_handle_t kernel = nullptr;
|
||||
@@ -193,13 +128,13 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
free(strLog);
|
||||
}
|
||||
VALIDATECALL(zeModuleBuildLogDestroy(buildlog));
|
||||
SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog));
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = "kernel_copy";
|
||||
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel));
|
||||
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
|
||||
ze_kernel_properties_t kernProps;
|
||||
VALIDATECALL(zeKernelGetProperties(kernel, &kernProps));
|
||||
SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernProps));
|
||||
std::cout << "Kernel : \n"
|
||||
<< " * name : " << kernelDesc.pKernelName << "\n"
|
||||
<< " * uuid.mid : " << kernProps.uuid.mid << "\n"
|
||||
@@ -219,28 +154,28 @@ int main(int argc, char *argv[]) {
|
||||
uint32_t groupSizeX = 32u;
|
||||
uint32_t groupSizeY = 1u;
|
||||
uint32_t groupSizeZ = 1u;
|
||||
VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
|
||||
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
|
||||
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
|
||||
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = allocSize / groupSizeX;
|
||||
dispatchTraits.groupCountY = 1u;
|
||||
dispatchTraits.groupCountZ = 1u;
|
||||
|
||||
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
|
||||
nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
|
||||
nullptr, 0, nullptr));
|
||||
|
||||
// Close list and submit for execution
|
||||
VALIDATECALL(zeCommandListClose(cmdList));
|
||||
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||
|
||||
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||
|
||||
// Validate
|
||||
bool outputValidationSuccessful = true;
|
||||
outputValidationSuccessful = true;
|
||||
if (memcmp(dstBuffer, srcBuffer, allocSize)) {
|
||||
outputValidationSuccessful = false;
|
||||
uint8_t *srcCharBuffer = static_cast<uint8_t *>(srcBuffer);
|
||||
@@ -255,11 +190,30 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
VALIDATECALL(zeMemFree(context, dstBuffer));
|
||||
VALIDATECALL(zeMemFree(context, srcBuffer));
|
||||
VALIDATECALL(zeCommandListDestroy(cmdList));
|
||||
VALIDATECALL(zeCommandQueueDestroy(cmdQueue));
|
||||
VALIDATECALL(zeContextDestroy(context));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
ze_context_handle_t context = nullptr;
|
||||
auto device = zelloInitContextAndGetDevices(context);
|
||||
bool outputValidationSuccessful;
|
||||
|
||||
if (verbose) {
|
||||
ze_device_properties_t deviceProperties = {};
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
|
||||
std::cout << deviceProperties.name << std::endl;
|
||||
std::cout << "Device : \n"
|
||||
<< " * name : " << deviceProperties.name << "\n"
|
||||
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
|
||||
}
|
||||
|
||||
executeKernelAndValidate(context, device, outputValidationSuccessful);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
|
||||
|
||||
std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
|
||||
|
||||
|
||||
Reference in New Issue
Block a user