Add zello_copy black_box test

Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
Jitendra Sharma
2020-11-25 11:05:03 +05:30
committed by Compute-Runtime-Automation
parent 6b2cae1909
commit 7883fa331a
8 changed files with 739 additions and 295 deletions

View File

@@ -12,8 +12,11 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
zello_world_jitc_ocloc
zello_ipc_copy_dma_buf
zello_ipc_copy_dma_buf_p2p
zello_copy
)
include_directories(common)
foreach(TEST_NAME ${TEST_TARGETS})
if(MSVC)
if(${TEST_NAME} STREQUAL "zello_ipc_copy_dma_buf")

View File

@@ -0,0 +1,137 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include <level_zero/ze_api.h>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <vector>
extern bool verbose;
template <bool TerminateOnFailure, typename ResulT>
inline void validate(ResulT result, const char *message) {
if (result == ZE_RESULT_SUCCESS) {
if (verbose) {
std::cerr << "SUCCESS : " << message << std::endl;
}
return;
}
if (verbose) {
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
<< std::endl;
}
if (TerminateOnFailure) {
std::terminate();
}
}
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate<true>(!(FLAG), #FLAG)
#define SUCCESS_OR_WARNING(CALL) validate<false>(CALL, #CALL)
#define SUCCESS_OR_WARNING_BOOL(FLAG) validate<false>(!(FLAG), #FLAG)
inline bool isParamEnabled(int argc, char *argv[], const char *shortName, const char *longName) {
char **arg = &argv[1];
char **argE = &argv[argc];
for (; arg != argE; ++arg) {
if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) {
return true;
}
}
return false;
}
inline bool isVerbose(int argc, char *argv[]) {
bool enabled = isParamEnabled(argc, argv, "-v", "--verbose");
if (enabled == false) {
return false;
}
std::cerr << "Verbose mode detected";
return true;
}
uint32_t getCommandQueueOrdinal(ze_device_handle_t &device) {
uint32_t numQueueGroups = 0;
SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
std::cout << "No queue groups found!\n";
std::terminate();
}
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
queueProperties.data()));
uint32_t computeQueueGroupOrdinal = numQueueGroups;
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
computeQueueGroupOrdinal = i;
break;
}
}
return computeQueueGroupOrdinal;
}
ze_result_t createCommandQueue(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue) {
ze_command_queue_desc_t descriptor = {};
descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
descriptor.pNext = nullptr;
descriptor.flags = 0;
descriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;
descriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
descriptor.ordinal = getCommandQueueOrdinal(device);
descriptor.index = 0;
return zeCommandQueueCreate(context, device, &descriptor, &cmdQueue);
}
ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList) {
ze_command_list_desc_t descriptor = {};
descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
descriptor.pNext = nullptr;
descriptor.flags = 0;
descriptor.commandQueueGroupOrdinal = getCommandQueueOrdinal(device);
return zeCommandListCreate(context, device, &descriptor, &cmdList);
}
ze_device_handle_t zelloInitContextAndGetDevices(ze_context_handle_t &context) {
SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
uint32_t driverCount = 0;
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr));
if (driverCount == 0) {
std::cout << "No driver handle found!\n";
std::terminate();
}
ze_driver_handle_t driverHandle;
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle));
ze_context_desc_t context_desc = {};
context_desc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC;
SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &context_desc, &context));
uint32_t deviceCount = 0;
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr));
if (deviceCount == 0) {
std::cout << "No device found!\n";
std::terminate();
}
std::vector<ze_device_handle_t> devices(deviceCount, nullptr);
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, devices.data()));
return devices[0];
}

View File

@@ -0,0 +1,474 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "zello_common.h"
#include <iomanip>
extern bool verbose;
bool verbose = false;
void testAppendMemoryCopyFromHeapToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
const size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder
char *heapBuffer = new char[allocSize];
void *zeBuffer = nullptr;
char stackBuffer[allocSize];
ze_command_queue_handle_t cmdQueue;
ze_command_list_handle_t cmdList;
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.ordinal = 0;
deviceDesc.flags = 0;
deviceDesc.pNext = nullptr;
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer));
for (size_t i = 0; i < allocSize; ++i) {
heapBuffer[i] = static_cast<char>(i + 1);
}
memset(stackBuffer, 0, allocSize);
// Copy from heap to device-allocated memory
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, heapBuffer, allocSize,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
// Copy from device-allocated memory to stack
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
// Validate stack and ze buffers have the original data from heapBuffer
validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize));
delete[] heapBuffer;
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
void testAppendMemoryCopyFromHostToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder
char *hostBuffer;
void *zeBuffer = nullptr;
char stackBuffer[allocSize];
ze_command_queue_handle_t cmdQueue;
ze_command_list_handle_t cmdList;
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
hostDesc.flags = 0;
SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer)));
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.ordinal = 0;
deviceDesc.flags = 0;
deviceDesc.pNext = nullptr;
SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer));
for (size_t i = 0; i < allocSize; ++i) {
hostBuffer[i] = static_cast<char>(i + 1);
}
memset(stackBuffer, 0, allocSize);
// Copy from host-allocated to device-allocated memory
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, hostBuffer, allocSize,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
// Copy from device-allocated memory to stack
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
// Validate stack and ze buffers have the original data from hostBuffer
validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize));
SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
void testAppendMemoryCopy2DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
validRet = true;
ze_command_queue_handle_t cmdQueue;
ze_command_list_handle_t cmdList;
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
void *dstBuffer = nullptr;
uint32_t dstWidth = verbose ? 16 : 256; // width of the dst 2D buffer in bytes
uint32_t dstHeight = verbose ? 32 : 128; // height of the dst 2D buffer in bytes
uint32_t dstOriginX = verbose ? 8 : 32; // Offset in bytes
uint32_t dstOriginY = verbose ? 8 : 64; // Offset in rows
uint32_t dstSize = dstHeight * dstWidth; // Size of the dst buffer
void *srcBuffer = nullptr;
uint32_t srcWidth = verbose ? 24 : 128; // width of the src 2D buffer in bytes
uint32_t srcHeight = verbose ? 16 : 96; // height of the src 2D buffer in bytes
uint32_t srcOriginX = verbose ? 4 : 16; // Offset in bytes
uint32_t srcOriginY = verbose ? 4 : 32; // Offset in rows
uint32_t srcSize = srcHeight * srcWidth; // Size of the src buffer
uint32_t width = verbose ? 8 : 64; // width of the region to copy
uint32_t height = verbose ? 12 : 32; // height of the region to copy
const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, 0, width, height, 0};
const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, 0, width, height, 0};
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.pNext = nullptr;
deviceDesc.ordinal = 0;
deviceDesc.flags = 0;
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
hostDesc.flags = 0;
SUCCESS_OR_TERMINATE(
zeMemAllocShared(context, &deviceDesc, &hostDesc,
srcSize, 1, device, &srcBuffer));
SUCCESS_OR_TERMINATE(
zeMemAllocShared(context, &deviceDesc, &hostDesc,
dstSize, 1, device, &dstBuffer));
// Initialize buffers
// dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device
// and there's no need to perform explicit copies
uint8_t *srcBufferChar = reinterpret_cast<uint8_t *>(srcBuffer);
for (uint32_t i = 0; i < srcHeight; i++) {
for (uint32_t j = 0; j < srcWidth; j++) {
srcBufferChar[i * srcWidth + j] = static_cast<uint8_t>(i * srcWidth + j);
}
}
int value = 0;
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast<void *>(&value),
sizeof(value), dstSize, nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
// Perform the copy
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, 0,
const_cast<const void *>(srcBuffer), &srcRegion, srcWidth, 0,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
uint8_t *dstBufferChar = reinterpret_cast<uint8_t *>(dstBuffer);
if (verbose) {
std::cout << "srcBufferChar\n";
for (uint32_t i = 0; i < srcHeight; i++) {
for (uint32_t j = 0; j < srcWidth; j++) {
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(srcBufferChar[i * srcWidth + j]) << " ";
}
std::cout << "\n";
}
std::cout << "dstBuffer\n";
for (uint32_t i = 0; i < dstHeight; i++) {
for (uint32_t j = 0; j < dstWidth; j++) {
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(dstBufferChar[i * dstWidth + j]) << " ";
}
std::cout << "\n";
}
}
uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth;
uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth;
for (uint32_t i = 0; i < height; i++) {
for (uint32_t j = 0; j < width; j++) {
uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth) + j];
uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth) + j];
if (dstVal != srcVal) {
validRet = false;
}
}
}
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
void testAppendMemoryFillWithSomePattern(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
const size_t allocSize = 4096 + 7;
char pattern0 = 5;
const size_t pattern1Size = 9;
char *pattern1 = new char[pattern1Size];
void *zeBuffer0 = nullptr;
void *zeBuffer1 = nullptr;
ze_command_queue_handle_t cmdQueue;
ze_command_list_handle_t cmdList;
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
// Initialize buffers
// zeBuffer0 and zeBuffer1 are shared allocations, so they have UVA between host and device
// and there's no need to perform explicit copies
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.pNext = nullptr;
deviceDesc.ordinal = 0;
deviceDesc.flags = 0;
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
hostDesc.flags = 0;
SUCCESS_OR_TERMINATE(
zeMemAllocShared(context, &deviceDesc, &hostDesc,
allocSize, 1, device, &zeBuffer0));
SUCCESS_OR_TERMINATE(
zeMemAllocShared(context, &deviceDesc, &hostDesc,
allocSize, 1, device, &zeBuffer1));
// Fibonacci
pattern1[0] = 1;
pattern1[1] = 2;
for (size_t i = 2; i < pattern1Size; i++) {
pattern1[i] = pattern1[i - 1] + pattern1[i - 2];
}
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer0, &pattern0, sizeof(pattern0), allocSize,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer1, pattern1, pattern1Size, allocSize,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
validRet = true;
uint8_t *zeBufferChar0 = reinterpret_cast<uint8_t *>(zeBuffer0);
for (size_t i = 0; i < allocSize; ++i) {
if (zeBufferChar0[i] != pattern0) {
validRet = false;
if (verbose) {
std::cout << "dstBufferChar0[" << i << " ] "
<< static_cast<unsigned int>(zeBufferChar0[i])
<< "!= pattern0 " << pattern0 << "\n";
}
break;
}
}
if (validRet == true) {
uint8_t *zeBufferChar1 = reinterpret_cast<uint8_t *>(zeBuffer1);
size_t j = 0;
for (size_t i = 0; i < allocSize; i++) {
if (zeBufferChar1[i] != pattern1[j]) {
validRet = false;
if (verbose) {
std::cout << "dstBufferChar1[" << i << " ] "
<< static_cast<unsigned int>(zeBufferChar1[i])
<< "!= pattern1[" << j << " ] "
<< static_cast<unsigned int>(pattern1[j]) << "\n";
}
break;
}
j++;
if (j >= pattern1Size) {
j = 0;
}
}
}
delete[] pattern1;
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer0));
SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer1));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
void testAppendMemoryCopy3DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) {
validRet = true;
ze_command_queue_handle_t cmdQueue;
ze_command_list_handle_t cmdList;
SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
void *dstBuffer = nullptr;
uint32_t dstWidth = verbose ? 8 : 64; // width of the dst 3D buffer in bytes
uint32_t dstHeight = verbose ? 8 : 64; // height of the dst 3D buffer in bytes
uint32_t dstDepth = verbose ? 2 : 4; // depth of the dst 3D buffer in bytes
uint32_t dstOriginX = 0; // Offset in bytes
uint32_t dstOriginY = 0; // Offset in rows
uint32_t dstOriginZ = 0; // Offset in rows
uint32_t dstSize = dstHeight * dstWidth * dstDepth; // Size of the dst buffer
void *srcBuffer = nullptr;
uint32_t srcWidth = verbose ? 8 : 64; // width of the src 3D buffer in bytes
uint32_t srcHeight = verbose ? 8 : 64; // height of the src 3D buffer in bytes
uint32_t srcDepth = verbose ? 2 : 4; // depth of the src 3D buffer in bytes
uint32_t srcOriginX = 0; // Offset in bytes
uint32_t srcOriginY = 0; // Offset in rows
uint32_t srcOriginZ = 0; // Offset in rows
uint32_t srcSize = srcHeight * srcWidth * srcDepth; // Size of the src buffer
uint32_t width = verbose ? 8 : 64; // width of the region to copy
uint32_t height = verbose ? 8 : 64; // height of the region to copy
uint32_t depth = verbose ? 2 : 4; // height of the region to copy
const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, dstOriginZ, width, height, depth};
const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, dstOriginZ, width, height, depth};
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.pNext = nullptr;
deviceDesc.ordinal = 0;
deviceDesc.flags = 0;
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
hostDesc.flags = 0;
SUCCESS_OR_TERMINATE(
zeMemAllocShared(context, &deviceDesc, &hostDesc,
srcSize, 1, device, &srcBuffer));
SUCCESS_OR_TERMINATE(
zeMemAllocShared(context, &deviceDesc, &hostDesc,
dstSize, 1, device, &dstBuffer));
// Initialize buffers
// dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device
// and there's no need to perform explicit copies
uint8_t *srcBufferChar = reinterpret_cast<uint8_t *>(srcBuffer);
for (uint32_t i = 0; i < srcDepth; i++) {
for (uint32_t j = 0; j < srcHeight; j++) {
for (uint32_t k = 0; k < srcWidth; k++) {
size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k;
srcBufferChar[index] = static_cast<uint8_t>(index);
}
}
}
int value = 0;
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast<void *>(&value),
sizeof(value), dstSize, nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr));
// Perform the copy
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, (dstWidth * dstHeight),
const_cast<const void *>(srcBuffer), &srcRegion, srcWidth, (srcWidth * srcHeight),
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint32_t>::max()));
uint8_t *dstBufferChar = reinterpret_cast<uint8_t *>(dstBuffer);
if (verbose) {
std::cout << "srcBufferChar\n";
for (uint32_t i = 0; i < srcDepth; i++) {
for (uint32_t j = 0; j < srcHeight; j++) {
for (uint32_t k = 0; k < srcWidth; k++) {
size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k;
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(srcBufferChar[index]) << " ";
}
std::cout << "\n";
}
std::cout << "\n";
}
std::cout << "dstBuffer\n";
for (uint32_t i = 0; i < dstDepth; i++) {
for (uint32_t j = 0; j < dstHeight; j++) {
for (uint32_t k = 0; k < dstWidth; k++) {
size_t index = (i * dstWidth * dstHeight) + (j * dstWidth) + k;
std::cout << std::setw(3) << std::dec << static_cast<unsigned int>(dstBufferChar[index]) << " ";
}
std::cout << "\n";
}
std::cout << "\n";
}
}
uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth + dstOriginZ * dstDepth * dstWidth;
uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth + srcOriginZ * srcDepth * srcWidth;
for (uint32_t i = 0; i < depth; i++) {
for (uint32_t j = 0; j < height; j++) {
for (uint32_t k = 0; k < width; k++) {
uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth * dstHeight) + (j * dstWidth) + k];
uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth * srcHeight) + (j * srcWidth) + k];
if (dstVal != srcVal) {
validRet = false;
}
}
}
}
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
ze_context_handle_t context = nullptr;
auto device = zelloInitContextAndGetDevices(context);
bool outputValidationSuccessful = false;
if (verbose) {
ze_device_properties_t deviceProperties = {};
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
std::cout << deviceProperties.name << std::endl;
}
testAppendMemoryCopyFromHeapToDeviceToStack(context, device, outputValidationSuccessful);
if (outputValidationSuccessful)
testAppendMemoryCopyFromHostToDeviceToStack(context, device, outputValidationSuccessful);
if (outputValidationSuccessful)
testAppendMemoryCopy2DRegion(context, device, outputValidationSuccessful);
if (outputValidationSuccessful)
testAppendMemoryFillWithSomePattern(context, device, outputValidationSuccessful);
if (outputValidationSuccessful)
testAppendMemoryCopy3DRegion(context, device, outputValidationSuccessful);
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
std::cout << "\nZello Copy Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
return outputValidationSuccessful;
}

View File

@@ -5,36 +5,15 @@
*
*/
#include <level_zero/ze_api.h>
#include "zello_common.h"
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <vector>
template <bool TerminateOnFailure, typename ResulT>
inline void validate(ResulT result, const char *message) {
if (result == ZE_RESULT_SUCCESS) {
return;
}
if (TerminateOnFailure) {
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
<< std::endl;
std::terminate();
}
}
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate<true>(!(FLAG), #FLAG)
#define SUCCESS_OR_WARNING(CALL) validate<false>(CALL, #CALL)
#define SUCCESS_OR_WARNING_BOOL(FLAG) validate<false>(!(FLAG), #FLAG)
extern bool verbose;
bool verbose = false;
size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder
@@ -270,6 +249,7 @@ void run_server(int commSocket, bool &validRet) {
}
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
bool outputValidationSuccessful;
int sv[2];

View File

@@ -5,36 +5,15 @@
*
*/
#include <level_zero/ze_api.h>
#include "zello_common.h"
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <string.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <vector>
template <bool TerminateOnFailure, typename ResulT>
inline void validate(ResulT result, const char *message) {
if (result == ZE_RESULT_SUCCESS) {
return;
}
if (TerminateOnFailure) {
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
<< std::endl;
std::terminate();
}
}
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate<true>(!(FLAG), #FLAG)
#define SUCCESS_OR_WARNING(CALL) validate<false>(CALL, #CALL)
#define SUCCESS_OR_WARNING_BOOL(FLAG) validate<false>(!(FLAG), #FLAG)
extern bool verbose;
bool verbose = false;
uint8_t uinitializedPattern = 1;
uint8_t expectedPattern = 7;
@@ -298,6 +277,7 @@ void run_server(int commSocket, bool &validRet) {
}
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
bool outputValidationSuccessful;
int sv[2];

View File

@@ -5,15 +5,10 @@
*
*/
#include <level_zero/ze_api.h>
#include "zello_common.h"
#include <chrono>
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <vector>
extern bool verbose;
bool verbose = false;
inline std::vector<uint8_t> loadBinaryFile(const std::string &filePath) {
std::ifstream stream(filePath, std::ios::in);
@@ -31,22 +26,6 @@ inline std::vector<uint8_t> loadBinaryFile(const std::string &filePath) {
return binary_file;
}
template <bool TerminateOnFailure, typename ResulT>
inline void validate(ResulT result, const char *message) {
if (result == 0) { // assumption 0 is success
std::cerr << "SUCCESS : " << message << std::endl;
return;
}
std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result
<< std::endl;
if (TerminateOnFailure) {
std::terminate();
}
}
#define SUCCESS_OR_TERMINATE(CALL) validate<true>(CALL, #CALL)
void createCmdQueueAndCmdList(ze_context_handle_t &context,
ze_device_handle_t &device,
ze_command_queue_handle_t &cmdQueue,
@@ -270,7 +249,6 @@ bool testKernelTimestampHostQuery(ze_context_handle_t &context,
}
bool testKernelTimestampApendQuery(ze_context_handle_t &context,
ze_driver_handle_t &driver,
ze_device_handle_t &device) {
ze_command_queue_handle_t cmdQueue;
@@ -385,42 +363,26 @@ void printResult(bool result, std::string &currentTest) {
}
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
ze_context_handle_t context = nullptr;
auto device = zelloInitContextAndGetDevices(context);
if (verbose) {
ze_device_properties_t deviceProperties = {};
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
std::cout << deviceProperties.name << std::endl;
std::cout << "Device : \n"
<< " * name : " << deviceProperties.name << "\n"
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
}
bool result;
SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY));
uint32_t driverCount = 0;
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr));
if (driverCount == 0)
std::terminate();
ze_driver_handle_t driverHandle;
SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle));
ze_context_desc_t contextDesc = {};
ze_context_handle_t context;
SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context));
uint32_t deviceCount = 0;
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr));
if (deviceCount == 0)
std::terminate();
ze_device_handle_t device;
SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, &device));
ze_device_properties_t deviceProperties = {};
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
std::cout << "Device : \n"
<< " * name : " << deviceProperties.name << "\n"
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
<< " * vendorId : " << deviceProperties.vendorId << "\n";
std::string currentTest;
currentTest = "Test Append Write of Global Timestamp";
result = testKernelTimestampApendQuery(context, driverHandle, device);
result = testKernelTimestampApendQuery(context, device);
printResult(result, currentTest);
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
return result ? 0 : 1;

View File

@@ -5,86 +5,21 @@
*
*/
#include <level_zero/ze_api.h>
#include "zello_common.h"
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <vector>
#define VALIDATECALL(myZeCall) \
do { \
if (myZeCall != ZE_RESULT_SUCCESS) { \
std::cout << "Error at " \
<< #myZeCall << ": " \
<< __FUNCTION__ << ": " \
<< __LINE__ << "\n"; \
std::terminate(); \
} \
} while (0);
int main(int argc, char *argv[]) {
// Initialize driver
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
// Retrieve driver
uint32_t driverCount = 0;
VALIDATECALL(zeDriverGet(&driverCount, nullptr));
ze_driver_handle_t driverHandle;
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle));
ze_context_desc_t contextDesc = {};
ze_context_handle_t context;
VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context));
// Retrieve device
uint32_t deviceCount = 0;
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr));
ze_device_handle_t device;
deviceCount = 1;
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device));
// Print some properties
ze_device_properties_t deviceProperties = {};
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties));
std::cout << "Device : \n"
<< " * name : " << deviceProperties.name << "\n"
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
// Create command queue
uint32_t numQueueGroups = 0;
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
std::cout << "No queue groups found!\n";
std::terminate();
}
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
queueProperties.data()));
extern bool verbose;
bool verbose = false;
void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) {
ze_command_queue_handle_t cmdQueue;
ze_command_queue_desc_t cmdQueueDesc = {};
ze_command_list_handle_t cmdList;
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
cmdQueueDesc.ordinal = i;
}
}
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device);
cmdQueueDesc.index = 0;
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
// Create command list
ze_command_list_handle_t cmdList;
ze_command_list_desc_t cmdListDesc = {};
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
// Create two shared buffers
constexpr size_t allocSize = 4096;
@@ -96,10 +31,10 @@ int main(int argc, char *argv[]) {
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
void *srcBuffer = nullptr;
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
void *dstBuffer = nullptr;
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
// Initialize memory
constexpr uint8_t val = 55;
@@ -136,46 +71,46 @@ int main(int argc, char *argv[]) {
free(strLog);
}
VALIDATECALL(zeModuleBuildLogDestroy(buildlog));
SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog));
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "CopyBufferToBufferBytes";
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel));
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
uint32_t groupSizeX = 32u;
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
uint32_t offset = 0;
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset));
ze_group_count_t dispatchTraits;
dispatchTraits.groupCountX = allocSize / groupSizeX;
dispatchTraits.groupCountY = 1u;
dispatchTraits.groupCountZ = 1u;
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
nullptr, 0, nullptr));
file.close();
} else {
// Perform a GPU copy
VALIDATECALL(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr));
}
// Close list and submit for execution
VALIDATECALL(zeCommandListClose(cmdList));
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
// Validate
bool outputValidationSuccessful = true;
outputValidationSuccessful = true;
if (memcmp(dstBuffer, srcBuffer, allocSize)) {
outputValidationSuccessful = false;
uint8_t *srcCharBuffer = static_cast<uint8_t *>(srcBuffer);
@@ -190,11 +125,30 @@ int main(int argc, char *argv[]) {
}
// Cleanup
VALIDATECALL(zeMemFree(context, dstBuffer));
VALIDATECALL(zeMemFree(context, srcBuffer));
VALIDATECALL(zeCommandListDestroy(cmdList));
VALIDATECALL(zeCommandQueueDestroy(cmdQueue));
VALIDATECALL(zeContextDestroy(context));
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
ze_context_handle_t context = nullptr;
auto device = zelloInitContextAndGetDevices(context);
bool outputValidationSuccessful;
if (verbose) {
ze_device_properties_t deviceProperties = {};
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
std::cout << deviceProperties.name << std::endl;
std::cout << "Device : \n"
<< " * name : " << deviceProperties.name << "\n"
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
}
executeGpuKernelAndValidate(context, device, outputValidationSuccessful);
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";

View File

@@ -7,26 +7,12 @@
#include "shared/offline_compiler/source/ocloc_api.h"
#include <level_zero/ze_api.h>
#include "zello_common.h"
#include <cstring>
#include <fstream>
#include <iostream>
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include <sstream>
#define VALIDATECALL(myZeCall) \
do { \
if (myZeCall != ZE_RESULT_SUCCESS) { \
std::cout << "Error at " \
<< #myZeCall << ": " \
<< __FUNCTION__ << ": " \
<< __LINE__ << "\n"; \
std::terminate(); \
} \
} while (0);
extern bool verbose;
bool verbose = false;
const char *module = R"===(
__kernel void kernel_copy(__global int *dst, __global char *src){
@@ -85,67 +71,16 @@ std::vector<uint8_t> compileToSpirV(const std::string &src, const std::string &o
return ret;
}
int main(int argc, char *argv[]) {
// Initialize driver
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
// Retrieve driver
uint32_t driverCount = 0;
VALIDATECALL(zeDriverGet(&driverCount, nullptr));
ze_driver_handle_t driverHandle;
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle));
ze_context_desc_t contextDesc = {};
ze_context_handle_t context;
VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context));
// Retrieve device
uint32_t deviceCount = 0;
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr));
ze_device_handle_t device;
deviceCount = 1;
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device));
// Print some properties
ze_device_properties_t deviceProperties = {};
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties));
std::cout << "Device : \n"
<< " * name : " << deviceProperties.name << "\n"
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
<< " * vendorId : " << std::hex << deviceProperties.vendorId << std::dec << "\n";
// Create command queue
uint32_t numQueueGroups = 0;
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
if (numQueueGroups == 0) {
std::cout << "No queue groups found!\n";
std::terminate();
}
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
queueProperties.data()));
void executeKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) {
ze_command_queue_handle_t cmdQueue;
ze_command_queue_desc_t cmdQueueDesc = {};
ze_command_list_handle_t cmdList;
for (uint32_t i = 0; i < numQueueGroups; i++) {
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
cmdQueueDesc.ordinal = i;
}
}
cmdQueueDesc.ordinal = getCommandQueueOrdinal(device);
cmdQueueDesc.index = 0;
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
// Create command list
ze_command_list_handle_t cmdList;
ze_command_list_desc_t cmdListDesc = {};
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList));
// Create two shared buffers
constexpr size_t allocSize = 4096;
ze_device_mem_alloc_desc_t deviceDesc = {};
@@ -156,10 +91,10 @@ int main(int argc, char *argv[]) {
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
void *srcBuffer = nullptr;
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
void *dstBuffer = nullptr;
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
// Initialize memory
constexpr uint8_t val = 55;
@@ -171,7 +106,7 @@ int main(int argc, char *argv[]) {
if (buildLog.size() > 0) {
std::cout << "Build log " << buildLog;
}
VALIDATECALL((0 == spirV.size()));
SUCCESS_OR_TERMINATE((0 == spirV.size()));
ze_module_handle_t module = nullptr;
ze_kernel_handle_t kernel = nullptr;
@@ -193,13 +128,13 @@ int main(int argc, char *argv[]) {
free(strLog);
}
VALIDATECALL(zeModuleBuildLogDestroy(buildlog));
SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog));
ze_kernel_desc_t kernelDesc = {};
kernelDesc.pKernelName = "kernel_copy";
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel));
SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel));
ze_kernel_properties_t kernProps;
VALIDATECALL(zeKernelGetProperties(kernel, &kernProps));
SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernProps));
std::cout << "Kernel : \n"
<< " * name : " << kernelDesc.pKernelName << "\n"
<< " * uuid.mid : " << kernProps.uuid.mid << "\n"
@@ -219,28 +154,28 @@ int main(int argc, char *argv[]) {
uint32_t groupSizeX = 32u;
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
ze_group_count_t dispatchTraits;
dispatchTraits.groupCountX = allocSize / groupSizeX;
dispatchTraits.groupCountY = 1u;
dispatchTraits.groupCountZ = 1u;
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
nullptr, 0, nullptr));
// Close list and submit for execution
VALIDATECALL(zeCommandListClose(cmdList));
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
// Validate
bool outputValidationSuccessful = true;
outputValidationSuccessful = true;
if (memcmp(dstBuffer, srcBuffer, allocSize)) {
outputValidationSuccessful = false;
uint8_t *srcCharBuffer = static_cast<uint8_t *>(srcBuffer);
@@ -255,11 +190,30 @@ int main(int argc, char *argv[]) {
}
// Cleanup
VALIDATECALL(zeMemFree(context, dstBuffer));
VALIDATECALL(zeMemFree(context, srcBuffer));
VALIDATECALL(zeCommandListDestroy(cmdList));
VALIDATECALL(zeCommandQueueDestroy(cmdQueue));
VALIDATECALL(zeContextDestroy(context));
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue));
}
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
ze_context_handle_t context = nullptr;
auto device = zelloInitContextAndGetDevices(context);
bool outputValidationSuccessful;
if (verbose) {
ze_device_properties_t deviceProperties = {};
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties));
std::cout << deviceProperties.name << std::endl;
std::cout << "Device : \n"
<< " * name : " << deviceProperties.name << "\n"
<< " * vendorId : " << std::hex << deviceProperties.vendorId << "\n";
}
executeKernelAndValidate(context, device, outputValidationSuccessful);
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";