Add multidev black box test

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-06-21 15:48:34 +00:00
committed by Compute-Runtime-Automation
parent 31dbc04f23
commit a20c0b790a
3 changed files with 293 additions and 6 deletions

View File

@@ -21,6 +21,7 @@ set(TEST_TARGETS
zello_immediate
zello_ipc_copy_dma_buf
zello_ipc_copy_dma_buf_p2p
zello_multidev
zello_printf
zello_scratch
zello_timestamp
@@ -66,14 +67,16 @@ foreach(TEST_NAME ${TEST_TARGETS})
)
endforeach()
target_link_libraries(zello_commandlist_immediate PUBLIC ocloc_lib)
target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib)
target_link_libraries(zello_dyn_local_arg PUBLIC ocloc_lib)
target_link_libraries(zello_fence PUBLIC ocloc_lib)
target_link_libraries(zello_multidev PUBLIC ocloc_lib)
target_link_libraries(zello_image_view PUBLIC ocloc_lib)
target_link_libraries(zello_printf PUBLIC ocloc_lib)
target_link_libraries(zello_world_jitc_ocloc PUBLIC ocloc_lib)
target_link_libraries(zello_scratch PUBLIC ocloc_lib)
target_link_libraries(zello_fence PUBLIC ocloc_lib)
target_link_libraries(zello_printf PUBLIC ocloc_lib)
target_link_libraries(zello_image_view PUBLIC ocloc_lib)
target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib)
target_link_libraries(zello_commandlist_immediate PUBLIC ocloc_lib)
target_link_libraries(zello_dyn_local_arg PUBLIC ocloc_lib)
if(UNIX)
target_link_libraries(zello_world_global_work_offset PUBLIC ocloc_lib)
endif()

View File

@@ -281,6 +281,49 @@ static inline void teardown(ze_context_handle_t context, ze_command_queue_handle
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
}
inline void printDeviceProperties(const ze_device_properties_t &props) {
if (verbose) {
std::cout << "Device : "
<< "\n"
<< " * name : " << props.name << "\n"
<< " * type : " << ((props.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
<< " * vendorId : " << props.vendorId << "\n"
<< " * deviceId : " << props.deviceId << "\n"
<< " * subdeviceId : " << props.subdeviceId << "\n"
<< " * coreClockRate : " << props.coreClockRate << "\n"
<< " * maxMemAllocSize : " << props.maxMemAllocSize << "\n"
<< " * maxHardwareContexts : " << props.maxHardwareContexts << "\n"
<< " * isSubdevice : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE)) << "\n"
<< " * eccMemorySupported : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_ECC)) << "\n"
<< " * onDemandPageFaultsSupported : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING)) << "\n"
<< " * maxCommandQueuePriority : " << props.maxCommandQueuePriority << "\n"
<< " * numThreadsPerEU : " << props.numThreadsPerEU << "\n"
<< " * numEUsPerSubslice : " << props.numEUsPerSubslice << "\n"
<< " * numSubslicesPerSlice : " << props.numSubslicesPerSlice << "\n"
<< " * numSlices : " << props.numSlices << "\n"
<< " * physicalEUSimdWidth : " << props.physicalEUSimdWidth << "\n"
<< " * timerResolution : " << props.timerResolution << "\n";
}
}
inline void printCacheProperties(uint32_t index, const ze_device_cache_properties_t &props) {
if (verbose) {
std::cout << "Cache properties: \n"
<< index << "\n"
<< " * User Cache Control : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_CACHE_PROPERTY_FLAG_USER_CONTROL)) << "\n"
<< " * cache size : " << props.cacheSize << "\n";
}
}
inline void printP2PProperties(const ze_device_p2p_properties_t &props, bool canAccessPeer, uint32_t device0Index, uint32_t device1Index) {
if (verbose) {
std::cout << " * P2P Properties device " << device0Index << " to peer " << device1Index << "\n";
std::cout << "\t* accessSupported: " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS)) << "\n";
std::cout << "\t* atomicsSupported: " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS)) << "\n";
std::cout << "\t* canAccessPeer: " << std::boolalpha << static_cast<bool>(canAccessPeer) << "\n";
}
}
inline const std::vector<const char *> &getResourcesSearchLocations() {
static std::vector<const char *> locations {
"test_files/spv_modules/",

View File

@@ -0,0 +1,241 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "zello_common.h"
#include "zello_compile.h"
#include <fstream>
#include <iostream>
#include <memory>
bool verbose = false;
int main(int argc, char *argv[]) {
verbose = isVerbose(argc, argv);
// Set-up
constexpr size_t allocSize = 4096;
constexpr size_t bytesPerThread = sizeof(char);
constexpr size_t numThreads = allocSize / bytesPerThread;
std::vector<ze_module_handle_t> module;
std::vector<ze_device_handle_t> devices;
std::vector<std::string> deviceNames;
std::vector<ze_kernel_handle_t> kernel;
std::vector<ze_command_queue_handle_t> cmdQueue;
std::vector<ze_command_list_handle_t> cmdList;
void *srcBuffer = nullptr;
void *dstBuffer = nullptr;
bool outputValidationSuccessful = false;
ze_context_handle_t context = nullptr;
ze_driver_handle_t driverHandle = nullptr;
devices = zelloInitContextAndGetDevices(context, driverHandle);
uint32_t deviceCount = (uint32_t)devices.size();
// Get subdevices for each device and add to total count of devices
for (uint32_t i = 0; i < deviceCount; i++) {
uint32_t count = 0;
SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(devices[i], &count, nullptr));
deviceCount += count;
devices.resize(deviceCount);
SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(devices[i], &count,
devices.data() + (deviceCount - count)));
}
deviceNames.resize(devices.size());
for (uint32_t i = 0; i < deviceCount; i++) {
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(devices[i], &deviceProperties));
printDeviceProperties(deviceProperties);
deviceNames[i].assign(deviceProperties.name, strlen(deviceProperties.name));
uint32_t cachePropertiesCount = 0;
SUCCESS_OR_TERMINATE(zeDeviceGetCacheProperties(devices[i], &cachePropertiesCount, nullptr));
std::vector<ze_device_cache_properties_t> cacheProperties;
cacheProperties.resize(cachePropertiesCount);
SUCCESS_OR_TERMINATE(zeDeviceGetCacheProperties(devices[i], &cachePropertiesCount, cacheProperties.data()));
for (uint32_t cacheIndex = 0; cacheIndex < cachePropertiesCount; cacheIndex++) {
printCacheProperties(cacheIndex, cacheProperties[cacheIndex]);
}
ze_device_p2p_properties_t deviceP2PProperties = {ZE_STRUCTURE_TYPE_DEVICE_P2P_PROPERTIES};
for (uint32_t j = 0; j < deviceCount; j++) {
if (j == i)
continue;
SUCCESS_OR_TERMINATE(zeDeviceGetP2PProperties(devices[i], devices[j], &deviceP2PProperties));
ze_bool_t canAccessPeer = false;
SUCCESS_OR_TERMINATE(zeDeviceCanAccessPeer(devices[i], devices[j], &canAccessPeer));
printP2PProperties(deviceP2PProperties, canAccessPeer, i, j);
if (canAccessPeer == false) {
std::cout << "Device " << i << " cannot access " << j << "\n";
std::terminate();
}
}
}
module.resize(deviceCount);
cmdQueue.resize(deviceCount);
cmdList.resize(deviceCount);
kernel.resize(deviceCount);
std::string buildLog;
auto moduleBinary = compileToSpirV(const_cast<const char *>(memcpyBytesTestKernelSrc), "", buildLog);
if (buildLog.size() > 0) {
std::cout << "Build log " << buildLog;
}
SUCCESS_OR_TERMINATE((0 == moduleBinary.size()));
// init everything
for (uint32_t i = 0; i < deviceCount; i++) {
std::cout << "Creating objects for device " << i << " " << deviceNames[i] << "\n";
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
cmdQueueDesc.pNext = nullptr;
cmdQueueDesc.flags = 0;
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
cmdQueueDesc.ordinal = getCommandQueueOrdinal(devices[i]);
cmdQueueDesc.index = 0;
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, devices[i], &cmdQueueDesc, &cmdQueue[i]));
ze_command_list_desc_t cmdListDesc = {};
cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
cmdListDesc.pNext = nullptr;
cmdListDesc.flags = 0;
SUCCESS_OR_TERMINATE(zeCommandListCreate(context, devices[i], &cmdListDesc, &cmdList[i]));
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(moduleBinary.data());
moduleDesc.inputSize = moduleBinary.size();
SUCCESS_OR_TERMINATE(zeModuleCreate(context, devices[i], &moduleDesc, &module[i], nullptr));
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernelDesc.pKernelName = "memcpy_bytes";
SUCCESS_OR_TERMINATE(zeKernelCreate(module[i], &kernelDesc, &kernel[i]));
}
// ITERATE OVER DEVICES and Launch the function
for (uint32_t i = 0; i < deviceCount; i++) {
std::cout << "Launching kernels for device " << i << " " << deviceNames[i] << "\n";
uint32_t groupSizeX = 32u;
uint32_t groupSizeY = 1u;
uint32_t groupSizeZ = 1u;
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel[i], numThreads, 1U, 1U,
&groupSizeX, &groupSizeY, &groupSizeZ));
SUCCESS_OR_TERMINATE_BOOL(numThreads % groupSizeX == 0);
if (verbose) {
std::cout << "Group size : (" << groupSizeX << ", " << groupSizeY << ", " << groupSizeZ
<< ")" << std::endl;
}
SUCCESS_OR_TERMINATE(
zeKernelSetGroupSize(kernel[i], groupSizeX, groupSizeY, groupSizeZ));
// Alloc buffers
srcBuffer = nullptr;
dstBuffer = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.ordinal = i;
deviceDesc.flags = 0;
deviceDesc.pNext = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
hostDesc.flags = 0;
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc,
allocSize, 1, devices[i], &srcBuffer));
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc,
allocSize, 1, devices[i], &dstBuffer));
// Init data and copy to device
uint8_t initDataSrc[allocSize];
memset(initDataSrc, 7, sizeof(initDataSrc));
uint8_t initDataDst[allocSize];
memset(initDataDst, 3, sizeof(initDataDst));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
cmdList[i], srcBuffer, initDataSrc, sizeof(initDataSrc), nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
cmdList[i], dstBuffer, initDataDst, sizeof(initDataDst), nullptr, 0, nullptr));
// copying of data must finish before running the user function
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList[i], nullptr, 0, nullptr));
// Set function args and get ready to dispatch
SUCCESS_OR_TERMINATE(
zeKernelSetArgumentValue(kernel[i], 0, sizeof(dstBuffer), &dstBuffer));
SUCCESS_OR_TERMINATE(
zeKernelSetArgumentValue(kernel[i], 1, sizeof(srcBuffer), &srcBuffer));
ze_group_count_t dispatchTraits;
dispatchTraits.groupCountX = numThreads / groupSizeX;
dispatchTraits.groupCountY = 1u;
dispatchTraits.groupCountZ = 1u;
if (verbose) {
std::cerr << "Number of groups : (" << dispatchTraits.groupCountX << ", "
<< dispatchTraits.groupCountY << ", " << dispatchTraits.groupCountZ << ")"
<< std::endl;
}
SUCCESS_OR_TERMINATE_BOOL(dispatchTraits.groupCountX * groupSizeX == allocSize);
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(
cmdList[i], kernel[i], &dispatchTraits, nullptr, 0, nullptr));
// Barrier to complete function
uint8_t readBackData[allocSize];
memset(readBackData, 2, sizeof(readBackData));
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList[i], nullptr, 0, nullptr));
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
cmdList[i], readBackData, dstBuffer, sizeof(readBackData), nullptr, 0, nullptr));
// Dispatch and wait
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList[i]));
SUCCESS_OR_TERMINATE(
zeCommandQueueExecuteCommandLists(cmdQueue[i], 1, &cmdList[i], nullptr));
auto synchronizationResult = zeCommandQueueSynchronize(cmdQueue[i], std::numeric_limits<uint64_t>::max());
SUCCESS_OR_WARNING(synchronizationResult);
// Validate
outputValidationSuccessful = true;
for (size_t i = 0; i < allocSize; ++i) {
outputValidationSuccessful &=
((unsigned char)(initDataSrc[i]) == (unsigned char)readBackData[i]);
}
// Release Mem
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
// Break immediately if output validation is false
if (!outputValidationSuccessful) {
break;
}
}
for (uint32_t i = 0; i < deviceCount; i++) {
std::cout << "Freeing objects for device " << i << " " << deviceNames[i] << "\n";
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel[i]));
SUCCESS_OR_TERMINATE(zeModuleDestroy(module[i]));
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList[i]));
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue[i]));
}
bool aubMode = isAubMode(argc, argv);
if (aubMode == false) {
std::cout << "\nZello Multidev Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED")
<< std::endl;
}
int resultOnFailure = aubMode ? 0 : 1;
return outputValidationSuccessful ? 0 : resultOnFailure;
}