From 7883fa331a95f5677a8be7268c6e01bd53352c63 Mon Sep 17 00:00:00 2001 From: Jitendra Sharma Date: Wed, 25 Nov 2020 11:05:03 +0530 Subject: [PATCH] Add zello_copy black_box test Signed-off-by: Jitendra Sharma --- .../core/test/black_box_tests/CMakeLists.txt | 3 + .../black_box_tests/common/zello_common.h | 137 +++++ .../core/test/black_box_tests/zello_copy.cpp | 474 ++++++++++++++++++ .../zello_ipc_copy_dma_buf.cpp | 28 +- .../zello_ipc_copy_dma_buf_p2p.cpp | 28 +- .../test/black_box_tests/zello_timestamp.cpp | 74 +-- .../test/black_box_tests/zello_world_gpu.cpp | 146 ++---- .../zello_world_jitc_ocloc.cpp | 144 ++---- 8 files changed, 739 insertions(+), 295 deletions(-) create mode 100644 level_zero/core/test/black_box_tests/common/zello_common.h create mode 100644 level_zero/core/test/black_box_tests/zello_copy.cpp diff --git a/level_zero/core/test/black_box_tests/CMakeLists.txt b/level_zero/core/test/black_box_tests/CMakeLists.txt index 63ff20737e..0c9bb4addf 100644 --- a/level_zero/core/test/black_box_tests/CMakeLists.txt +++ b/level_zero/core/test/black_box_tests/CMakeLists.txt @@ -12,8 +12,11 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug") zello_world_jitc_ocloc zello_ipc_copy_dma_buf zello_ipc_copy_dma_buf_p2p + zello_copy ) + include_directories(common) + foreach(TEST_NAME ${TEST_TARGETS}) if(MSVC) if(${TEST_NAME} STREQUAL "zello_ipc_copy_dma_buf") diff --git a/level_zero/core/test/black_box_tests/common/zello_common.h b/level_zero/core/test/black_box_tests/common/zello_common.h new file mode 100644 index 0000000000..ac42c4c3d0 --- /dev/null +++ b/level_zero/core/test/black_box_tests/common/zello_common.h @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +extern bool verbose; + +template +inline void validate(ResulT result, const char *message) { + if (result == ZE_RESULT_SUCCESS) { + if (verbose) { + std::cerr << "SUCCESS : " << message << std::endl; + } + return; + } + + if (verbose) { + std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result + << std::endl; + } + + if (TerminateOnFailure) { + std::terminate(); + } +} + +#define SUCCESS_OR_TERMINATE(CALL) validate(CALL, #CALL) +#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate(!(FLAG), #FLAG) +#define SUCCESS_OR_WARNING(CALL) validate(CALL, #CALL) +#define SUCCESS_OR_WARNING_BOOL(FLAG) validate(!(FLAG), #FLAG) + +inline bool isParamEnabled(int argc, char *argv[], const char *shortName, const char *longName) { + char **arg = &argv[1]; + char **argE = &argv[argc]; + + for (; arg != argE; ++arg) { + if ((0 == strcmp(*arg, shortName)) || (0 == strcmp(*arg, longName))) { + return true; + } + } + + return false; +} + +inline bool isVerbose(int argc, char *argv[]) { + bool enabled = isParamEnabled(argc, argv, "-v", "--verbose"); + if (enabled == false) { + return false; + } + + std::cerr << "Verbose mode detected"; + + return true; +} + +uint32_t getCommandQueueOrdinal(ze_device_handle_t &device) { + uint32_t numQueueGroups = 0; + SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); + if (numQueueGroups == 0) { + std::cout << "No queue groups found!\n"; + std::terminate(); + } + std::vector queueProperties(numQueueGroups); + SUCCESS_OR_TERMINATE(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, + queueProperties.data())); + uint32_t computeQueueGroupOrdinal = numQueueGroups; + for (uint32_t i = 0; i < numQueueGroups; i++) { + if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { + computeQueueGroupOrdinal = i; + break; + } + } + return computeQueueGroupOrdinal; +} + +ze_result_t createCommandQueue(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue) { + ze_command_queue_desc_t descriptor = {}; + descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; + + descriptor.pNext = nullptr; + descriptor.flags = 0; + descriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT; + descriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + + descriptor.ordinal = getCommandQueueOrdinal(device); + descriptor.index = 0; + return zeCommandQueueCreate(context, device, &descriptor, &cmdQueue); +} + +ze_result_t createCommandList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_list_handle_t &cmdList) { + ze_command_list_desc_t descriptor = {}; + descriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC; + + descriptor.pNext = nullptr; + descriptor.flags = 0; + descriptor.commandQueueGroupOrdinal = getCommandQueueOrdinal(device); + + return zeCommandListCreate(context, device, &descriptor, &cmdList); +} + +ze_device_handle_t zelloInitContextAndGetDevices(ze_context_handle_t &context) { + SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); + + uint32_t driverCount = 0; + SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); + if (driverCount == 0) { + std::cout << "No driver handle found!\n"; + std::terminate(); + } + ze_driver_handle_t driverHandle; + SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); + ze_context_desc_t context_desc = {}; + context_desc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC; + SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &context_desc, &context)); + + uint32_t deviceCount = 0; + SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); + if (deviceCount == 0) { + std::cout << "No device found!\n"; + std::terminate(); + } + std::vector devices(deviceCount, nullptr); + SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, devices.data())); + return devices[0]; +} \ No newline at end of file diff --git a/level_zero/core/test/black_box_tests/zello_copy.cpp b/level_zero/core/test/black_box_tests/zello_copy.cpp new file mode 100644 index 0000000000..9d8c6eb6eb --- /dev/null +++ b/level_zero/core/test/black_box_tests/zello_copy.cpp @@ -0,0 +1,474 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "zello_common.h" + +#include + +extern bool verbose; +bool verbose = false; + +void testAppendMemoryCopyFromHeapToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { + const size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder + char *heapBuffer = new char[allocSize]; + void *zeBuffer = nullptr; + char stackBuffer[allocSize]; + + ze_command_queue_handle_t cmdQueue; + ze_command_list_handle_t cmdList; + + SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; + deviceDesc.ordinal = 0; + deviceDesc.flags = 0; + deviceDesc.pNext = nullptr; + + SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); + + for (size_t i = 0; i < allocSize; ++i) { + heapBuffer[i] = static_cast(i + 1); + } + memset(stackBuffer, 0, allocSize); + + // Copy from heap to device-allocated memory + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, heapBuffer, allocSize, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); + + // Copy from device-allocated memory to stack + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + + // Validate stack and ze buffers have the original data from heapBuffer + validRet = (0 == memcmp(heapBuffer, stackBuffer, allocSize)); + + delete[] heapBuffer; + SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +void testAppendMemoryCopyFromHostToDeviceToStack(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { + const size_t allocSize = 4096 + 7; // +7 to brake alignment and make it harder + char *hostBuffer; + void *zeBuffer = nullptr; + char stackBuffer[allocSize]; + + ze_command_queue_handle_t cmdQueue; + ze_command_list_handle_t cmdList; + + SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); + + ze_host_mem_alloc_desc_t hostDesc = {}; + hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; + hostDesc.pNext = nullptr; + hostDesc.flags = 0; + SUCCESS_OR_TERMINATE(zeMemAllocHost(context, &hostDesc, allocSize, 1, (void **)(&hostBuffer))); + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; + deviceDesc.ordinal = 0; + deviceDesc.flags = 0; + deviceDesc.pNext = nullptr; + + SUCCESS_OR_TERMINATE(zeMemAllocDevice(context, &deviceDesc, allocSize, allocSize, device, &zeBuffer)); + + for (size_t i = 0; i < allocSize; ++i) { + hostBuffer[i] = static_cast(i + 1); + } + memset(stackBuffer, 0, allocSize); + + // Copy from host-allocated to device-allocated memory + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, zeBuffer, hostBuffer, allocSize, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); + + // Copy from device-allocated memory to stack + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, stackBuffer, zeBuffer, allocSize, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + + // Validate stack and ze buffers have the original data from hostBuffer + validRet = (0 == memcmp(hostBuffer, stackBuffer, allocSize)); + + SUCCESS_OR_TERMINATE(zeMemFree(context, hostBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +void testAppendMemoryCopy2DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { + validRet = true; + + ze_command_queue_handle_t cmdQueue; + ze_command_list_handle_t cmdList; + + SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); + + void *dstBuffer = nullptr; + uint32_t dstWidth = verbose ? 16 : 256; // width of the dst 2D buffer in bytes + uint32_t dstHeight = verbose ? 32 : 128; // height of the dst 2D buffer in bytes + uint32_t dstOriginX = verbose ? 8 : 32; // Offset in bytes + uint32_t dstOriginY = verbose ? 8 : 64; // Offset in rows + uint32_t dstSize = dstHeight * dstWidth; // Size of the dst buffer + + void *srcBuffer = nullptr; + uint32_t srcWidth = verbose ? 24 : 128; // width of the src 2D buffer in bytes + uint32_t srcHeight = verbose ? 16 : 96; // height of the src 2D buffer in bytes + uint32_t srcOriginX = verbose ? 4 : 16; // Offset in bytes + uint32_t srcOriginY = verbose ? 4 : 32; // Offset in rows + uint32_t srcSize = srcHeight * srcWidth; // Size of the src buffer + + uint32_t width = verbose ? 8 : 64; // width of the region to copy + uint32_t height = verbose ? 12 : 32; // height of the region to copy + const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, 0, width, height, 0}; + const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, 0, width, height, 0}; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; + deviceDesc.pNext = nullptr; + deviceDesc.ordinal = 0; + deviceDesc.flags = 0; + + ze_host_mem_alloc_desc_t hostDesc = {}; + hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; + hostDesc.pNext = nullptr; + hostDesc.flags = 0; + + SUCCESS_OR_TERMINATE( + zeMemAllocShared(context, &deviceDesc, &hostDesc, + srcSize, 1, device, &srcBuffer)); + + SUCCESS_OR_TERMINATE( + zeMemAllocShared(context, &deviceDesc, &hostDesc, + dstSize, 1, device, &dstBuffer)); + + // Initialize buffers + // dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device + // and there's no need to perform explicit copies + uint8_t *srcBufferChar = reinterpret_cast(srcBuffer); + for (uint32_t i = 0; i < srcHeight; i++) { + for (uint32_t j = 0; j < srcWidth; j++) { + srcBufferChar[i * srcWidth + j] = static_cast(i * srcWidth + j); + } + } + + int value = 0; + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&value), + sizeof(value), dstSize, nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); + + // Perform the copy + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, 0, + const_cast(srcBuffer), &srcRegion, srcWidth, 0, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + + uint8_t *dstBufferChar = reinterpret_cast(dstBuffer); + if (verbose) { + std::cout << "srcBufferChar\n"; + for (uint32_t i = 0; i < srcHeight; i++) { + for (uint32_t j = 0; j < srcWidth; j++) { + std::cout << std::setw(3) << std::dec << static_cast(srcBufferChar[i * srcWidth + j]) << " "; + } + std::cout << "\n"; + } + + std::cout << "dstBuffer\n"; + for (uint32_t i = 0; i < dstHeight; i++) { + for (uint32_t j = 0; j < dstWidth; j++) { + std::cout << std::setw(3) << std::dec << static_cast(dstBufferChar[i * dstWidth + j]) << " "; + } + std::cout << "\n"; + } + } + + uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth; + uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth; + for (uint32_t i = 0; i < height; i++) { + for (uint32_t j = 0; j < width; j++) { + uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth) + j]; + uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth) + j]; + if (dstVal != srcVal) { + validRet = false; + } + } + } + + SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +void testAppendMemoryFillWithSomePattern(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { + const size_t allocSize = 4096 + 7; + + char pattern0 = 5; + const size_t pattern1Size = 9; + char *pattern1 = new char[pattern1Size]; + void *zeBuffer0 = nullptr; + void *zeBuffer1 = nullptr; + + ze_command_queue_handle_t cmdQueue; + ze_command_list_handle_t cmdList; + + SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); + + // Initialize buffers + // zeBuffer0 and zeBuffer1 are shared allocations, so they have UVA between host and device + // and there's no need to perform explicit copies + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; + deviceDesc.pNext = nullptr; + deviceDesc.ordinal = 0; + deviceDesc.flags = 0; + + ze_host_mem_alloc_desc_t hostDesc = {}; + hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; + hostDesc.pNext = nullptr; + hostDesc.flags = 0; + + SUCCESS_OR_TERMINATE( + zeMemAllocShared(context, &deviceDesc, &hostDesc, + allocSize, 1, device, &zeBuffer0)); + + SUCCESS_OR_TERMINATE( + zeMemAllocShared(context, &deviceDesc, &hostDesc, + allocSize, 1, device, &zeBuffer1)); + + // Fibonacci + pattern1[0] = 1; + pattern1[1] = 2; + for (size_t i = 2; i < pattern1Size; i++) { + pattern1[i] = pattern1[i - 1] + pattern1[i - 2]; + } + + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer0, &pattern0, sizeof(pattern0), allocSize, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, zeBuffer1, pattern1, pattern1Size, allocSize, + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + + validRet = true; + uint8_t *zeBufferChar0 = reinterpret_cast(zeBuffer0); + for (size_t i = 0; i < allocSize; ++i) { + if (zeBufferChar0[i] != pattern0) { + validRet = false; + if (verbose) { + std::cout << "dstBufferChar0[" << i << " ] " + << static_cast(zeBufferChar0[i]) + << "!= pattern0 " << pattern0 << "\n"; + } + break; + } + } + + if (validRet == true) { + uint8_t *zeBufferChar1 = reinterpret_cast(zeBuffer1); + size_t j = 0; + for (size_t i = 0; i < allocSize; i++) { + if (zeBufferChar1[i] != pattern1[j]) { + validRet = false; + if (verbose) { + std::cout << "dstBufferChar1[" << i << " ] " + << static_cast(zeBufferChar1[i]) + << "!= pattern1[" << j << " ] " + << static_cast(pattern1[j]) << "\n"; + } + break; + } + j++; + if (j >= pattern1Size) { + j = 0; + } + } + } + + delete[] pattern1; + SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer0)); + SUCCESS_OR_TERMINATE(zeMemFree(context, zeBuffer1)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +void testAppendMemoryCopy3DRegion(ze_context_handle_t context, ze_device_handle_t &device, bool &validRet) { + validRet = true; + + ze_command_queue_handle_t cmdQueue; + ze_command_list_handle_t cmdList; + + SUCCESS_OR_TERMINATE(createCommandQueue(context, device, cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); + + void *dstBuffer = nullptr; + uint32_t dstWidth = verbose ? 8 : 64; // width of the dst 3D buffer in bytes + uint32_t dstHeight = verbose ? 8 : 64; // height of the dst 3D buffer in bytes + uint32_t dstDepth = verbose ? 2 : 4; // depth of the dst 3D buffer in bytes + uint32_t dstOriginX = 0; // Offset in bytes + uint32_t dstOriginY = 0; // Offset in rows + uint32_t dstOriginZ = 0; // Offset in rows + uint32_t dstSize = dstHeight * dstWidth * dstDepth; // Size of the dst buffer + + void *srcBuffer = nullptr; + uint32_t srcWidth = verbose ? 8 : 64; // width of the src 3D buffer in bytes + uint32_t srcHeight = verbose ? 8 : 64; // height of the src 3D buffer in bytes + uint32_t srcDepth = verbose ? 2 : 4; // depth of the src 3D buffer in bytes + uint32_t srcOriginX = 0; // Offset in bytes + uint32_t srcOriginY = 0; // Offset in rows + uint32_t srcOriginZ = 0; // Offset in rows + uint32_t srcSize = srcHeight * srcWidth * srcDepth; // Size of the src buffer + + uint32_t width = verbose ? 8 : 64; // width of the region to copy + uint32_t height = verbose ? 8 : 64; // height of the region to copy + uint32_t depth = verbose ? 2 : 4; // height of the region to copy + const ze_copy_region_t dstRegion = {dstOriginX, dstOriginY, dstOriginZ, width, height, depth}; + const ze_copy_region_t srcRegion = {srcOriginX, srcOriginY, dstOriginZ, width, height, depth}; + + ze_device_mem_alloc_desc_t deviceDesc = {}; + deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC; + deviceDesc.pNext = nullptr; + deviceDesc.ordinal = 0; + deviceDesc.flags = 0; + + ze_host_mem_alloc_desc_t hostDesc = {}; + hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC; + hostDesc.pNext = nullptr; + hostDesc.flags = 0; + + SUCCESS_OR_TERMINATE( + zeMemAllocShared(context, &deviceDesc, &hostDesc, + srcSize, 1, device, &srcBuffer)); + + SUCCESS_OR_TERMINATE( + zeMemAllocShared(context, &deviceDesc, &hostDesc, + dstSize, 1, device, &dstBuffer)); + + // Initialize buffers + // dstBuffer and srcBuffer are shared allocations, so they have UVA between host and device + // and there's no need to perform explicit copies + uint8_t *srcBufferChar = reinterpret_cast(srcBuffer); + for (uint32_t i = 0; i < srcDepth; i++) { + for (uint32_t j = 0; j < srcHeight; j++) { + for (uint32_t k = 0; k < srcWidth; k++) { + size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k; + srcBufferChar[index] = static_cast(index); + } + } + } + + int value = 0; + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryFill(cmdList, dstBuffer, reinterpret_cast(&value), + sizeof(value), dstSize, nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList, nullptr, 0, nullptr)); + + // Perform the copy + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopyRegion(cmdList, dstBuffer, &dstRegion, dstWidth, (dstWidth * dstHeight), + const_cast(srcBuffer), &srcRegion, srcWidth, (srcWidth * srcHeight), + nullptr, 0, nullptr)); + + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + + uint8_t *dstBufferChar = reinterpret_cast(dstBuffer); + if (verbose) { + std::cout << "srcBufferChar\n"; + for (uint32_t i = 0; i < srcDepth; i++) { + for (uint32_t j = 0; j < srcHeight; j++) { + for (uint32_t k = 0; k < srcWidth; k++) { + size_t index = (i * srcWidth * srcHeight) + (j * srcWidth) + k; + std::cout << std::setw(3) << std::dec << static_cast(srcBufferChar[index]) << " "; + } + std::cout << "\n"; + } + std::cout << "\n"; + } + + std::cout << "dstBuffer\n"; + for (uint32_t i = 0; i < dstDepth; i++) { + for (uint32_t j = 0; j < dstHeight; j++) { + for (uint32_t k = 0; k < dstWidth; k++) { + size_t index = (i * dstWidth * dstHeight) + (j * dstWidth) + k; + std::cout << std::setw(3) << std::dec << static_cast(dstBufferChar[index]) << " "; + } + std::cout << "\n"; + } + std::cout << "\n"; + } + } + + uint32_t dstOffset = dstOriginX + dstOriginY * dstWidth + dstOriginZ * dstDepth * dstWidth; + uint32_t srcOffset = srcOriginX + srcOriginY * srcWidth + srcOriginZ * srcDepth * srcWidth; + for (uint32_t i = 0; i < depth; i++) { + for (uint32_t j = 0; j < height; j++) { + for (uint32_t k = 0; k < width; k++) { + uint8_t dstVal = dstBufferChar[dstOffset + (i * dstWidth * dstHeight) + (j * dstWidth) + k]; + uint8_t srcVal = srcBufferChar[srcOffset + (i * srcWidth * srcHeight) + (j * srcWidth) + k]; + if (dstVal != srcVal) { + validRet = false; + } + } + } + } + + SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +int main(int argc, char *argv[]) { + verbose = isVerbose(argc, argv); + + ze_context_handle_t context = nullptr; + auto device = zelloInitContextAndGetDevices(context); + bool outputValidationSuccessful = false; + + if (verbose) { + ze_device_properties_t deviceProperties = {}; + SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); + std::cout << deviceProperties.name << std::endl; + } + testAppendMemoryCopyFromHeapToDeviceToStack(context, device, outputValidationSuccessful); + if (outputValidationSuccessful) + testAppendMemoryCopyFromHostToDeviceToStack(context, device, outputValidationSuccessful); + if (outputValidationSuccessful) + testAppendMemoryCopy2DRegion(context, device, outputValidationSuccessful); + if (outputValidationSuccessful) + testAppendMemoryFillWithSomePattern(context, device, outputValidationSuccessful); + if (outputValidationSuccessful) + testAppendMemoryCopy3DRegion(context, device, outputValidationSuccessful); + + SUCCESS_OR_TERMINATE(zeContextDestroy(context)); + std::cout << "\nZello Copy Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; + return outputValidationSuccessful; +} diff --git a/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf.cpp b/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf.cpp index 0ca0c4cd07..92dd745042 100644 --- a/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf.cpp +++ b/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf.cpp @@ -5,36 +5,15 @@ * */ -#include +#include "zello_common.h" -#include -#include -#include -#include -#include #include #include #include #include -#include -template -inline void validate(ResulT result, const char *message) { - if (result == ZE_RESULT_SUCCESS) { - return; - } - - if (TerminateOnFailure) { - std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result - << std::endl; - std::terminate(); - } -} - -#define SUCCESS_OR_TERMINATE(CALL) validate(CALL, #CALL) -#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate(!(FLAG), #FLAG) -#define SUCCESS_OR_WARNING(CALL) validate(CALL, #CALL) -#define SUCCESS_OR_WARNING_BOOL(FLAG) validate(!(FLAG), #FLAG) +extern bool verbose; +bool verbose = false; size_t allocSize = 4096 + 7; // +7 to break alignment and make it harder @@ -270,6 +249,7 @@ void run_server(int commSocket, bool &validRet) { } int main(int argc, char *argv[]) { + verbose = isVerbose(argc, argv); bool outputValidationSuccessful; int sv[2]; diff --git a/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf_p2p.cpp b/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf_p2p.cpp index 01fc8e3b04..357377b9ee 100644 --- a/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf_p2p.cpp +++ b/level_zero/core/test/black_box_tests/zello_ipc_copy_dma_buf_p2p.cpp @@ -5,36 +5,15 @@ * */ -#include +#include "zello_common.h" -#include -#include -#include -#include -#include #include #include #include #include -#include -template -inline void validate(ResulT result, const char *message) { - if (result == ZE_RESULT_SUCCESS) { - return; - } - - if (TerminateOnFailure) { - std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result - << std::endl; - std::terminate(); - } -} - -#define SUCCESS_OR_TERMINATE(CALL) validate(CALL, #CALL) -#define SUCCESS_OR_TERMINATE_BOOL(FLAG) validate(!(FLAG), #FLAG) -#define SUCCESS_OR_WARNING(CALL) validate(CALL, #CALL) -#define SUCCESS_OR_WARNING_BOOL(FLAG) validate(!(FLAG), #FLAG) +extern bool verbose; +bool verbose = false; uint8_t uinitializedPattern = 1; uint8_t expectedPattern = 7; @@ -298,6 +277,7 @@ void run_server(int commSocket, bool &validRet) { } int main(int argc, char *argv[]) { + verbose = isVerbose(argc, argv); bool outputValidationSuccessful; int sv[2]; diff --git a/level_zero/core/test/black_box_tests/zello_timestamp.cpp b/level_zero/core/test/black_box_tests/zello_timestamp.cpp index c1c64ad6e4..9bf94362d5 100644 --- a/level_zero/core/test/black_box_tests/zello_timestamp.cpp +++ b/level_zero/core/test/black_box_tests/zello_timestamp.cpp @@ -5,15 +5,10 @@ * */ -#include +#include "zello_common.h" -#include -#include -#include -#include -#include -#include -#include +extern bool verbose; +bool verbose = false; inline std::vector loadBinaryFile(const std::string &filePath) { std::ifstream stream(filePath, std::ios::in); @@ -31,22 +26,6 @@ inline std::vector loadBinaryFile(const std::string &filePath) { return binary_file; } -template -inline void validate(ResulT result, const char *message) { - if (result == 0) { // assumption 0 is success - std::cerr << "SUCCESS : " << message << std::endl; - return; - } - std::cerr << (TerminateOnFailure ? "ERROR : " : "WARNING : ") << message << " : " << result - << std::endl; - - if (TerminateOnFailure) { - std::terminate(); - } -} - -#define SUCCESS_OR_TERMINATE(CALL) validate(CALL, #CALL) - void createCmdQueueAndCmdList(ze_context_handle_t &context, ze_device_handle_t &device, ze_command_queue_handle_t &cmdQueue, @@ -270,7 +249,6 @@ bool testKernelTimestampHostQuery(ze_context_handle_t &context, } bool testKernelTimestampApendQuery(ze_context_handle_t &context, - ze_driver_handle_t &driver, ze_device_handle_t &device) { ze_command_queue_handle_t cmdQueue; @@ -385,42 +363,26 @@ void printResult(bool result, std::string ¤tTest) { } int main(int argc, char *argv[]) { + verbose = isVerbose(argc, argv); + ze_context_handle_t context = nullptr; + auto device = zelloInitContextAndGetDevices(context); + + if (verbose) { + ze_device_properties_t deviceProperties = {}; + SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); + std::cout << deviceProperties.name << std::endl; + std::cout << "Device : \n" + << " * name : " << deviceProperties.name << "\n" + << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; + } + bool result; - - SUCCESS_OR_TERMINATE(zeInit(ZE_INIT_FLAG_GPU_ONLY)); - - uint32_t driverCount = 0; - SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, nullptr)); - if (driverCount == 0) - std::terminate(); - - ze_driver_handle_t driverHandle; - SUCCESS_OR_TERMINATE(zeDriverGet(&driverCount, &driverHandle)); - - ze_context_desc_t contextDesc = {}; - ze_context_handle_t context; - SUCCESS_OR_TERMINATE(zeContextCreate(driverHandle, &contextDesc, &context)); - - uint32_t deviceCount = 0; - SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, nullptr)); - if (deviceCount == 0) - std::terminate(); - - ze_device_handle_t device; - SUCCESS_OR_TERMINATE(zeDeviceGet(driverHandle, &deviceCount, &device)); - - ze_device_properties_t deviceProperties = {}; - SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); - std::cout << "Device : \n" - << " * name : " << deviceProperties.name << "\n" - << " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n" - << " * vendorId : " << deviceProperties.vendorId << "\n"; - std::string currentTest; currentTest = "Test Append Write of Global Timestamp"; - result = testKernelTimestampApendQuery(context, driverHandle, device); + result = testKernelTimestampApendQuery(context, device); printResult(result, currentTest); + SUCCESS_OR_TERMINATE(zeContextDestroy(context)); return result ? 0 : 1; diff --git a/level_zero/core/test/black_box_tests/zello_world_gpu.cpp b/level_zero/core/test/black_box_tests/zello_world_gpu.cpp index 0bf75a60a7..f03b89e84a 100644 --- a/level_zero/core/test/black_box_tests/zello_world_gpu.cpp +++ b/level_zero/core/test/black_box_tests/zello_world_gpu.cpp @@ -5,86 +5,21 @@ * */ -#include +#include "zello_common.h" -#include -#include -#include -#include -#include -#include - -#define VALIDATECALL(myZeCall) \ - do { \ - if (myZeCall != ZE_RESULT_SUCCESS) { \ - std::cout << "Error at " \ - << #myZeCall << ": " \ - << __FUNCTION__ << ": " \ - << __LINE__ << "\n"; \ - std::terminate(); \ - } \ - } while (0); - -int main(int argc, char *argv[]) { - // Initialize driver - VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY)); - - // Retrieve driver - uint32_t driverCount = 0; - VALIDATECALL(zeDriverGet(&driverCount, nullptr)); - - ze_driver_handle_t driverHandle; - VALIDATECALL(zeDriverGet(&driverCount, &driverHandle)); - - ze_context_desc_t contextDesc = {}; - ze_context_handle_t context; - VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context)); - - // Retrieve device - uint32_t deviceCount = 0; - VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr)); - - ze_device_handle_t device; - deviceCount = 1; - VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device)); - - // Print some properties - ze_device_properties_t deviceProperties = {}; - VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties)); - - std::cout << "Device : \n" - << " * name : " << deviceProperties.name << "\n" - << " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n" - << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; - - // Create command queue - uint32_t numQueueGroups = 0; - VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); - if (numQueueGroups == 0) { - std::cout << "No queue groups found!\n"; - std::terminate(); - } - std::vector queueProperties(numQueueGroups); - VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, - queueProperties.data())); +extern bool verbose; +bool verbose = false; +void executeGpuKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {}; + ze_command_list_handle_t cmdList; - for (uint32_t i = 0; i < numQueueGroups; i++) { - if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { - cmdQueueDesc.ordinal = i; - } - } + cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; - VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); - - // Create command list - ze_command_list_handle_t cmdList; - ze_command_list_desc_t cmdListDesc = {}; - cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; - VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create two shared buffers constexpr size_t allocSize = 4096; @@ -96,10 +31,10 @@ int main(int argc, char *argv[]) { hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; - VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); + SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; - VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); + SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; @@ -136,46 +71,46 @@ int main(int argc, char *argv[]) { free(strLog); } - VALIDATECALL(zeModuleBuildLogDestroy(buildlog)); + SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "CopyBufferToBufferBytes"; - VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel)); + SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; - VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); - VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); + SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); + SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); uint32_t offset = 0; - VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); - VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); - VALIDATECALL(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); - VALIDATECALL(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); - VALIDATECALL(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 2, sizeof(uint32_t), &offset)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 3, sizeof(uint32_t), &offset)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 4, sizeof(uint32_t), &offset)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; - VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, - nullptr, 0, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, + nullptr, 0, nullptr)); file.close(); } else { // Perform a GPU copy - VALIDATECALL(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(cmdList, dstBuffer, srcBuffer, allocSize, nullptr, 0, nullptr)); } // Close list and submit for execution - VALIDATECALL(zeCommandListClose(cmdList)); - VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); - VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate - bool outputValidationSuccessful = true; + outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); @@ -190,11 +125,30 @@ int main(int argc, char *argv[]) { } // Cleanup - VALIDATECALL(zeMemFree(context, dstBuffer)); - VALIDATECALL(zeMemFree(context, srcBuffer)); - VALIDATECALL(zeCommandListDestroy(cmdList)); - VALIDATECALL(zeCommandQueueDestroy(cmdQueue)); - VALIDATECALL(zeContextDestroy(context)); + SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +int main(int argc, char *argv[]) { + verbose = isVerbose(argc, argv); + ze_context_handle_t context = nullptr; + auto device = zelloInitContextAndGetDevices(context); + bool outputValidationSuccessful; + + if (verbose) { + ze_device_properties_t deviceProperties = {}; + SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); + std::cout << deviceProperties.name << std::endl; + std::cout << "Device : \n" + << " * name : " << deviceProperties.name << "\n" + << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; + } + + executeGpuKernelAndValidate(context, device, outputValidationSuccessful); + + SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n"; diff --git a/level_zero/core/test/black_box_tests/zello_world_jitc_ocloc.cpp b/level_zero/core/test/black_box_tests/zello_world_jitc_ocloc.cpp index 58dc4b2e0c..5a529d1ab0 100644 --- a/level_zero/core/test/black_box_tests/zello_world_jitc_ocloc.cpp +++ b/level_zero/core/test/black_box_tests/zello_world_jitc_ocloc.cpp @@ -7,26 +7,12 @@ #include "shared/offline_compiler/source/ocloc_api.h" -#include +#include "zello_common.h" -#include -#include -#include -#include -#include -#include -#include +#include -#define VALIDATECALL(myZeCall) \ - do { \ - if (myZeCall != ZE_RESULT_SUCCESS) { \ - std::cout << "Error at " \ - << #myZeCall << ": " \ - << __FUNCTION__ << ": " \ - << __LINE__ << "\n"; \ - std::terminate(); \ - } \ - } while (0); +extern bool verbose; +bool verbose = false; const char *module = R"===( __kernel void kernel_copy(__global int *dst, __global char *src){ @@ -85,67 +71,16 @@ std::vector compileToSpirV(const std::string &src, const std::string &o return ret; } -int main(int argc, char *argv[]) { - // Initialize driver - VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY)); - - // Retrieve driver - uint32_t driverCount = 0; - VALIDATECALL(zeDriverGet(&driverCount, nullptr)); - - ze_driver_handle_t driverHandle; - VALIDATECALL(zeDriverGet(&driverCount, &driverHandle)); - - ze_context_desc_t contextDesc = {}; - ze_context_handle_t context; - VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context)); - - // Retrieve device - uint32_t deviceCount = 0; - VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr)); - - ze_device_handle_t device; - deviceCount = 1; - VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device)); - - // Print some properties - ze_device_properties_t deviceProperties = {}; - VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties)); - - std::cout << "Device : \n" - << " * name : " << deviceProperties.name << "\n" - << " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n" - << " * vendorId : " << std::hex << deviceProperties.vendorId << std::dec << "\n"; - - // Create command queue - uint32_t numQueueGroups = 0; - VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr)); - if (numQueueGroups == 0) { - std::cout << "No queue groups found!\n"; - std::terminate(); - } - std::vector queueProperties(numQueueGroups); - VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, - queueProperties.data())); - +void executeKernelAndValidate(ze_context_handle_t context, ze_device_handle_t &device, bool &outputValidationSuccessful) { ze_command_queue_handle_t cmdQueue; ze_command_queue_desc_t cmdQueueDesc = {}; + ze_command_list_handle_t cmdList; - for (uint32_t i = 0; i < numQueueGroups; i++) { - if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) { - cmdQueueDesc.ordinal = i; - } - } + cmdQueueDesc.ordinal = getCommandQueueOrdinal(device); cmdQueueDesc.index = 0; cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; - VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); - - // Create command list - ze_command_list_handle_t cmdList; - ze_command_list_desc_t cmdListDesc = {}; - cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal; - VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList)); - + SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue)); + SUCCESS_OR_TERMINATE(createCommandList(context, device, cmdList)); // Create two shared buffers constexpr size_t allocSize = 4096; ze_device_mem_alloc_desc_t deviceDesc = {}; @@ -156,10 +91,10 @@ int main(int argc, char *argv[]) { hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED; void *srcBuffer = nullptr; - VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); + SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer)); void *dstBuffer = nullptr; - VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); + SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer)); // Initialize memory constexpr uint8_t val = 55; @@ -171,7 +106,7 @@ int main(int argc, char *argv[]) { if (buildLog.size() > 0) { std::cout << "Build log " << buildLog; } - VALIDATECALL((0 == spirV.size())); + SUCCESS_OR_TERMINATE((0 == spirV.size())); ze_module_handle_t module = nullptr; ze_kernel_handle_t kernel = nullptr; @@ -193,13 +128,13 @@ int main(int argc, char *argv[]) { free(strLog); } - VALIDATECALL(zeModuleBuildLogDestroy(buildlog)); + SUCCESS_OR_TERMINATE(zeModuleBuildLogDestroy(buildlog)); ze_kernel_desc_t kernelDesc = {}; kernelDesc.pKernelName = "kernel_copy"; - VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel)); + SUCCESS_OR_TERMINATE(zeKernelCreate(module, &kernelDesc, &kernel)); ze_kernel_properties_t kernProps; - VALIDATECALL(zeKernelGetProperties(kernel, &kernProps)); + SUCCESS_OR_TERMINATE(zeKernelGetProperties(kernel, &kernProps)); std::cout << "Kernel : \n" << " * name : " << kernelDesc.pKernelName << "\n" << " * uuid.mid : " << kernProps.uuid.mid << "\n" @@ -219,28 +154,28 @@ int main(int argc, char *argv[]) { uint32_t groupSizeX = 32u; uint32_t groupSizeY = 1u; uint32_t groupSizeZ = 1u; - VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); - VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); + SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ)); + SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ)); - VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); - VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer)); + SUCCESS_OR_TERMINATE(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer)); ze_group_count_t dispatchTraits; dispatchTraits.groupCountX = allocSize / groupSizeX; dispatchTraits.groupCountY = 1u; dispatchTraits.groupCountZ = 1u; - VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, - nullptr, 0, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits, + nullptr, 0, nullptr)); // Close list and submit for execution - VALIDATECALL(zeCommandListClose(cmdList)); - VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); + SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr)); - VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); + SUCCESS_OR_TERMINATE(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits::max())); // Validate - bool outputValidationSuccessful = true; + outputValidationSuccessful = true; if (memcmp(dstBuffer, srcBuffer, allocSize)) { outputValidationSuccessful = false; uint8_t *srcCharBuffer = static_cast(srcBuffer); @@ -255,11 +190,30 @@ int main(int argc, char *argv[]) { } // Cleanup - VALIDATECALL(zeMemFree(context, dstBuffer)); - VALIDATECALL(zeMemFree(context, srcBuffer)); - VALIDATECALL(zeCommandListDestroy(cmdList)); - VALIDATECALL(zeCommandQueueDestroy(cmdQueue)); - VALIDATECALL(zeContextDestroy(context)); + SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer)); + SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer)); + SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList)); + SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue)); +} + +int main(int argc, char *argv[]) { + verbose = isVerbose(argc, argv); + ze_context_handle_t context = nullptr; + auto device = zelloInitContextAndGetDevices(context); + bool outputValidationSuccessful; + + if (verbose) { + ze_device_properties_t deviceProperties = {}; + SUCCESS_OR_TERMINATE(zeDeviceGetProperties(device, &deviceProperties)); + std::cout << deviceProperties.name << std::endl; + std::cout << "Device : \n" + << " * name : " << deviceProperties.name << "\n" + << " * vendorId : " << std::hex << deviceProperties.vendorId << "\n"; + } + + executeKernelAndValidate(context, device, outputValidationSuccessful); + + SUCCESS_OR_TERMINATE(zeContextDestroy(context)); std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";