mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
Add multidev black box test
Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
31dbc04f23
commit
a20c0b790a
@@ -21,6 +21,7 @@ set(TEST_TARGETS
|
||||
zello_immediate
|
||||
zello_ipc_copy_dma_buf
|
||||
zello_ipc_copy_dma_buf_p2p
|
||||
zello_multidev
|
||||
zello_printf
|
||||
zello_scratch
|
||||
zello_timestamp
|
||||
@@ -66,14 +67,16 @@ foreach(TEST_NAME ${TEST_TARGETS})
|
||||
)
|
||||
endforeach()
|
||||
|
||||
target_link_libraries(zello_commandlist_immediate PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_dyn_local_arg PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_fence PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_multidev PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_image_view PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_printf PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_world_jitc_ocloc PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_scratch PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_fence PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_printf PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_image_view PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_dynamic_link PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_commandlist_immediate PUBLIC ocloc_lib)
|
||||
target_link_libraries(zello_dyn_local_arg PUBLIC ocloc_lib)
|
||||
|
||||
if(UNIX)
|
||||
target_link_libraries(zello_world_global_work_offset PUBLIC ocloc_lib)
|
||||
endif()
|
||||
|
||||
@@ -281,6 +281,49 @@ static inline void teardown(ze_context_handle_t context, ze_command_queue_handle
|
||||
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
|
||||
}
|
||||
|
||||
inline void printDeviceProperties(const ze_device_properties_t &props) {
|
||||
if (verbose) {
|
||||
std::cout << "Device : "
|
||||
<< "\n"
|
||||
<< " * name : " << props.name << "\n"
|
||||
<< " * type : " << ((props.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
|
||||
<< " * vendorId : " << props.vendorId << "\n"
|
||||
<< " * deviceId : " << props.deviceId << "\n"
|
||||
<< " * subdeviceId : " << props.subdeviceId << "\n"
|
||||
<< " * coreClockRate : " << props.coreClockRate << "\n"
|
||||
<< " * maxMemAllocSize : " << props.maxMemAllocSize << "\n"
|
||||
<< " * maxHardwareContexts : " << props.maxHardwareContexts << "\n"
|
||||
<< " * isSubdevice : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE)) << "\n"
|
||||
<< " * eccMemorySupported : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_ECC)) << "\n"
|
||||
<< " * onDemandPageFaultsSupported : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING)) << "\n"
|
||||
<< " * maxCommandQueuePriority : " << props.maxCommandQueuePriority << "\n"
|
||||
<< " * numThreadsPerEU : " << props.numThreadsPerEU << "\n"
|
||||
<< " * numEUsPerSubslice : " << props.numEUsPerSubslice << "\n"
|
||||
<< " * numSubslicesPerSlice : " << props.numSubslicesPerSlice << "\n"
|
||||
<< " * numSlices : " << props.numSlices << "\n"
|
||||
<< " * physicalEUSimdWidth : " << props.physicalEUSimdWidth << "\n"
|
||||
<< " * timerResolution : " << props.timerResolution << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
inline void printCacheProperties(uint32_t index, const ze_device_cache_properties_t &props) {
|
||||
if (verbose) {
|
||||
std::cout << "Cache properties: \n"
|
||||
<< index << "\n"
|
||||
<< " * User Cache Control : " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_CACHE_PROPERTY_FLAG_USER_CONTROL)) << "\n"
|
||||
<< " * cache size : " << props.cacheSize << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
inline void printP2PProperties(const ze_device_p2p_properties_t &props, bool canAccessPeer, uint32_t device0Index, uint32_t device1Index) {
|
||||
if (verbose) {
|
||||
std::cout << " * P2P Properties device " << device0Index << " to peer " << device1Index << "\n";
|
||||
std::cout << "\t* accessSupported: " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ACCESS)) << "\n";
|
||||
std::cout << "\t* atomicsSupported: " << std::boolalpha << static_cast<bool>(!!(props.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS)) << "\n";
|
||||
std::cout << "\t* canAccessPeer: " << std::boolalpha << static_cast<bool>(canAccessPeer) << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
inline const std::vector<const char *> &getResourcesSearchLocations() {
|
||||
static std::vector<const char *> locations {
|
||||
"test_files/spv_modules/",
|
||||
|
||||
241
level_zero/core/test/black_box_tests/zello_multidev.cpp
Normal file
241
level_zero/core/test/black_box_tests/zello_multidev.cpp
Normal file
@@ -0,0 +1,241 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "zello_common.h"
|
||||
#include "zello_compile.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
||||
bool verbose = false;
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
verbose = isVerbose(argc, argv);
|
||||
|
||||
// Set-up
|
||||
constexpr size_t allocSize = 4096;
|
||||
constexpr size_t bytesPerThread = sizeof(char);
|
||||
constexpr size_t numThreads = allocSize / bytesPerThread;
|
||||
std::vector<ze_module_handle_t> module;
|
||||
std::vector<ze_device_handle_t> devices;
|
||||
std::vector<std::string> deviceNames;
|
||||
std::vector<ze_kernel_handle_t> kernel;
|
||||
std::vector<ze_command_queue_handle_t> cmdQueue;
|
||||
std::vector<ze_command_list_handle_t> cmdList;
|
||||
void *srcBuffer = nullptr;
|
||||
void *dstBuffer = nullptr;
|
||||
bool outputValidationSuccessful = false;
|
||||
|
||||
ze_context_handle_t context = nullptr;
|
||||
ze_driver_handle_t driverHandle = nullptr;
|
||||
devices = zelloInitContextAndGetDevices(context, driverHandle);
|
||||
uint32_t deviceCount = (uint32_t)devices.size();
|
||||
|
||||
// Get subdevices for each device and add to total count of devices
|
||||
for (uint32_t i = 0; i < deviceCount; i++) {
|
||||
uint32_t count = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(devices[i], &count, nullptr));
|
||||
|
||||
deviceCount += count;
|
||||
devices.resize(deviceCount);
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetSubDevices(devices[i], &count,
|
||||
devices.data() + (deviceCount - count)));
|
||||
}
|
||||
|
||||
deviceNames.resize(devices.size());
|
||||
|
||||
for (uint32_t i = 0; i < deviceCount; i++) {
|
||||
ze_device_properties_t deviceProperties = {ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES};
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetProperties(devices[i], &deviceProperties));
|
||||
printDeviceProperties(deviceProperties);
|
||||
|
||||
deviceNames[i].assign(deviceProperties.name, strlen(deviceProperties.name));
|
||||
|
||||
uint32_t cachePropertiesCount = 0;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetCacheProperties(devices[i], &cachePropertiesCount, nullptr));
|
||||
|
||||
std::vector<ze_device_cache_properties_t> cacheProperties;
|
||||
cacheProperties.resize(cachePropertiesCount);
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetCacheProperties(devices[i], &cachePropertiesCount, cacheProperties.data()));
|
||||
|
||||
for (uint32_t cacheIndex = 0; cacheIndex < cachePropertiesCount; cacheIndex++) {
|
||||
printCacheProperties(cacheIndex, cacheProperties[cacheIndex]);
|
||||
}
|
||||
|
||||
ze_device_p2p_properties_t deviceP2PProperties = {ZE_STRUCTURE_TYPE_DEVICE_P2P_PROPERTIES};
|
||||
for (uint32_t j = 0; j < deviceCount; j++) {
|
||||
if (j == i)
|
||||
continue;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceGetP2PProperties(devices[i], devices[j], &deviceP2PProperties));
|
||||
ze_bool_t canAccessPeer = false;
|
||||
SUCCESS_OR_TERMINATE(zeDeviceCanAccessPeer(devices[i], devices[j], &canAccessPeer));
|
||||
printP2PProperties(deviceP2PProperties, canAccessPeer, i, j);
|
||||
if (canAccessPeer == false) {
|
||||
std::cout << "Device " << i << " cannot access " << j << "\n";
|
||||
std::terminate();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.resize(deviceCount);
|
||||
cmdQueue.resize(deviceCount);
|
||||
cmdList.resize(deviceCount);
|
||||
kernel.resize(deviceCount);
|
||||
|
||||
std::string buildLog;
|
||||
auto moduleBinary = compileToSpirV(const_cast<const char *>(memcpyBytesTestKernelSrc), "", buildLog);
|
||||
if (buildLog.size() > 0) {
|
||||
std::cout << "Build log " << buildLog;
|
||||
}
|
||||
SUCCESS_OR_TERMINATE((0 == moduleBinary.size()));
|
||||
|
||||
// init everything
|
||||
for (uint32_t i = 0; i < deviceCount; i++) {
|
||||
std::cout << "Creating objects for device " << i << " " << deviceNames[i] << "\n";
|
||||
ze_command_queue_desc_t cmdQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
cmdQueueDesc.pNext = nullptr;
|
||||
cmdQueueDesc.flags = 0;
|
||||
cmdQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
cmdQueueDesc.ordinal = getCommandQueueOrdinal(devices[i]);
|
||||
cmdQueueDesc.index = 0;
|
||||
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueCreate(context, devices[i], &cmdQueueDesc, &cmdQueue[i]));
|
||||
|
||||
ze_command_list_desc_t cmdListDesc = {};
|
||||
cmdListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
|
||||
cmdListDesc.pNext = nullptr;
|
||||
cmdListDesc.flags = 0;
|
||||
SUCCESS_OR_TERMINATE(zeCommandListCreate(context, devices[i], &cmdListDesc, &cmdList[i]));
|
||||
|
||||
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
|
||||
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
|
||||
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(moduleBinary.data());
|
||||
moduleDesc.inputSize = moduleBinary.size();
|
||||
SUCCESS_OR_TERMINATE(zeModuleCreate(context, devices[i], &moduleDesc, &module[i], nullptr));
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
|
||||
kernelDesc.pKernelName = "memcpy_bytes";
|
||||
SUCCESS_OR_TERMINATE(zeKernelCreate(module[i], &kernelDesc, &kernel[i]));
|
||||
}
|
||||
|
||||
// ITERATE OVER DEVICES and Launch the function
|
||||
for (uint32_t i = 0; i < deviceCount; i++) {
|
||||
std::cout << "Launching kernels for device " << i << " " << deviceNames[i] << "\n";
|
||||
uint32_t groupSizeX = 32u;
|
||||
uint32_t groupSizeY = 1u;
|
||||
uint32_t groupSizeZ = 1u;
|
||||
SUCCESS_OR_TERMINATE(zeKernelSuggestGroupSize(kernel[i], numThreads, 1U, 1U,
|
||||
&groupSizeX, &groupSizeY, &groupSizeZ));
|
||||
SUCCESS_OR_TERMINATE_BOOL(numThreads % groupSizeX == 0);
|
||||
if (verbose) {
|
||||
std::cout << "Group size : (" << groupSizeX << ", " << groupSizeY << ", " << groupSizeZ
|
||||
<< ")" << std::endl;
|
||||
}
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeKernelSetGroupSize(kernel[i], groupSizeX, groupSizeY, groupSizeZ));
|
||||
|
||||
// Alloc buffers
|
||||
srcBuffer = nullptr;
|
||||
dstBuffer = nullptr;
|
||||
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
|
||||
deviceDesc.ordinal = i;
|
||||
deviceDesc.flags = 0;
|
||||
deviceDesc.pNext = nullptr;
|
||||
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
|
||||
hostDesc.pNext = nullptr;
|
||||
hostDesc.flags = 0;
|
||||
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
allocSize, 1, devices[i], &srcBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemAllocShared(context, &deviceDesc, &hostDesc,
|
||||
allocSize, 1, devices[i], &dstBuffer));
|
||||
|
||||
// Init data and copy to device
|
||||
uint8_t initDataSrc[allocSize];
|
||||
memset(initDataSrc, 7, sizeof(initDataSrc));
|
||||
uint8_t initDataDst[allocSize];
|
||||
memset(initDataDst, 3, sizeof(initDataDst));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
|
||||
cmdList[i], srcBuffer, initDataSrc, sizeof(initDataSrc), nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
|
||||
cmdList[i], dstBuffer, initDataDst, sizeof(initDataDst), nullptr, 0, nullptr));
|
||||
|
||||
// copying of data must finish before running the user function
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList[i], nullptr, 0, nullptr));
|
||||
|
||||
// Set function args and get ready to dispatch
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeKernelSetArgumentValue(kernel[i], 0, sizeof(dstBuffer), &dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeKernelSetArgumentValue(kernel[i], 1, sizeof(srcBuffer), &srcBuffer));
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = numThreads / groupSizeX;
|
||||
dispatchTraits.groupCountY = 1u;
|
||||
dispatchTraits.groupCountZ = 1u;
|
||||
if (verbose) {
|
||||
std::cerr << "Number of groups : (" << dispatchTraits.groupCountX << ", "
|
||||
<< dispatchTraits.groupCountY << ", " << dispatchTraits.groupCountZ << ")"
|
||||
<< std::endl;
|
||||
}
|
||||
SUCCESS_OR_TERMINATE_BOOL(dispatchTraits.groupCountX * groupSizeX == allocSize);
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendLaunchKernel(
|
||||
cmdList[i], kernel[i], &dispatchTraits, nullptr, 0, nullptr));
|
||||
|
||||
// Barrier to complete function
|
||||
uint8_t readBackData[allocSize];
|
||||
memset(readBackData, 2, sizeof(readBackData));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendBarrier(cmdList[i], nullptr, 0, nullptr));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListAppendMemoryCopy(
|
||||
cmdList[i], readBackData, dstBuffer, sizeof(readBackData), nullptr, 0, nullptr));
|
||||
|
||||
// Dispatch and wait
|
||||
SUCCESS_OR_TERMINATE(zeCommandListClose(cmdList[i]));
|
||||
SUCCESS_OR_TERMINATE(
|
||||
zeCommandQueueExecuteCommandLists(cmdQueue[i], 1, &cmdList[i], nullptr));
|
||||
auto synchronizationResult = zeCommandQueueSynchronize(cmdQueue[i], std::numeric_limits<uint64_t>::max());
|
||||
SUCCESS_OR_WARNING(synchronizationResult);
|
||||
|
||||
// Validate
|
||||
outputValidationSuccessful = true;
|
||||
for (size_t i = 0; i < allocSize; ++i) {
|
||||
outputValidationSuccessful &=
|
||||
((unsigned char)(initDataSrc[i]) == (unsigned char)readBackData[i]);
|
||||
}
|
||||
|
||||
// Release Mem
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, dstBuffer));
|
||||
SUCCESS_OR_TERMINATE(zeMemFree(context, srcBuffer));
|
||||
|
||||
// Break immediately if output validation is false
|
||||
if (!outputValidationSuccessful) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < deviceCount; i++) {
|
||||
std::cout << "Freeing objects for device " << i << " " << deviceNames[i] << "\n";
|
||||
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernel[i]));
|
||||
SUCCESS_OR_TERMINATE(zeModuleDestroy(module[i]));
|
||||
SUCCESS_OR_TERMINATE(zeCommandListDestroy(cmdList[i]));
|
||||
SUCCESS_OR_TERMINATE(zeCommandQueueDestroy(cmdQueue[i]));
|
||||
}
|
||||
|
||||
bool aubMode = isAubMode(argc, argv);
|
||||
if (aubMode == false) {
|
||||
std::cout << "\nZello Multidev Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED")
|
||||
<< std::endl;
|
||||
}
|
||||
int resultOnFailure = aubMode ? 0 : 1;
|
||||
return outputValidationSuccessful ? 0 : resultOnFailure;
|
||||
}
|
||||
Reference in New Issue
Block a user