Fixing zeKernelGetProperties
Additionally, adding zell_world_jitc_ocloc BB test Change-Id: If3b8c7e4bd4a789ec1b4f4de55ab21e91c1e7afa
This commit is contained in:
parent
75d1ebb811
commit
d181a2e5a3
|
@ -574,44 +574,23 @@ ze_result_t KernelImp::getKernelName(size_t *pSize, char *pName) {
|
||||||
}
|
}
|
||||||
|
|
||||||
ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) {
|
ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) {
|
||||||
|
const auto &kernelDescriptor = this->kernelImmData->getDescriptor();
|
||||||
pKernelProperties->requiredGroupSizeX = this->groupSize[0];
|
pKernelProperties->numKernelArgs = static_cast<uint32_t>(kernelDescriptor.payloadMappings.explicitArgs.size());
|
||||||
pKernelProperties->requiredGroupSizeY = this->groupSize[1];
|
pKernelProperties->requiredGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0];
|
||||||
pKernelProperties->requiredGroupSizeZ = this->groupSize[2];
|
pKernelProperties->requiredGroupSizeY = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[1];
|
||||||
|
pKernelProperties->requiredGroupSizeZ = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[2];
|
||||||
pKernelProperties->numKernelArgs =
|
pKernelProperties->requiredNumSubGroups = kernelDescriptor.kernelMetadata.compiledSubGroupsNumber;
|
||||||
static_cast<uint32_t>(this->kernelImmData->getDescriptor().payloadMappings.explicitArgs.size());
|
pKernelProperties->requiredSubgroupSize = kernelDescriptor.kernelMetadata.requiredSubGroupSize;
|
||||||
|
pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize;
|
||||||
ModuleImp *moduleImp = reinterpret_cast<ModuleImp *>(this->module);
|
pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize;
|
||||||
NEO::KernelInfo *ki = nullptr;
|
pKernelProperties->privateMemSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
|
||||||
for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) {
|
pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||||
ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i];
|
|
||||||
if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), this->kernelImmData->getDescriptor().kernelMetadata.kernelName) == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nullptr == ki) {
|
|
||||||
return ZE_RESULT_ERROR_UNINITIALIZED;
|
|
||||||
}
|
|
||||||
|
|
||||||
pKernelProperties->requiredNumSubGroups = static_cast<uint32_t>(ki->patchInfo.executionEnvironment->CompiledSubGroupsNumber);
|
|
||||||
pKernelProperties->requiredSubgroupSize = static_cast<uint32_t>(ki->requiredSubGroupSize);
|
|
||||||
pKernelProperties->maxSubgroupSize = ki->getMaxSimdSize();
|
|
||||||
|
|
||||||
uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
|
|
||||||
uint32_t maxRequiredWorkGroupSize = static_cast<uint32_t>(ki->getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize));
|
|
||||||
uint32_t largestCompiledSIMDSize = static_cast<uint32_t>(ki->patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
|
||||||
pKernelProperties->maxNumSubgroups = static_cast<uint32_t>(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize));
|
|
||||||
|
|
||||||
pKernelProperties->localMemSize = static_cast<uint32_t>(moduleImp->getDevice()->getNEODevice()->getDeviceInfo().localMemSize);
|
|
||||||
pKernelProperties->privateMemSize = ki->patchInfo.pAllocateStatelessPrivateSurface ? ki->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize
|
|
||||||
: 0;
|
|
||||||
pKernelProperties->spillMemSize = ki->patchInfo.mediavfestate ? ki->patchInfo.mediavfestate->PerThreadScratchSpace
|
|
||||||
: 0;
|
|
||||||
memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE);
|
memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE);
|
||||||
memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE);
|
memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE);
|
||||||
|
|
||||||
|
uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
|
||||||
|
pKernelProperties->maxNumSubgroups = maxKernelWorkGroupSize / kernelDescriptor.kernelAttributes.simdSize;
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,8 +7,9 @@
|
||||||
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||||
set(L0_BLACK_BOX_TEST_PROJECT_FOLDER "ze_intel_gpu/black_box_tests")
|
set(L0_BLACK_BOX_TEST_PROJECT_FOLDER "ze_intel_gpu/black_box_tests")
|
||||||
set(TEST_TARGETS
|
set(TEST_TARGETS
|
||||||
zello_world_gpu
|
|
||||||
zello_timestamp
|
zello_timestamp
|
||||||
|
zello_world_gpu
|
||||||
|
zello_world_jitc_ocloc
|
||||||
)
|
)
|
||||||
|
|
||||||
foreach(TEST_NAME ${TEST_TARGETS})
|
foreach(TEST_NAME ${TEST_TARGETS})
|
||||||
|
@ -18,7 +19,7 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
VS_DEBUGGER_COMMAND "$(TargetPath)"
|
VS_DEBUGGER_COMMAND "$(TargetPath)"
|
||||||
VS_DEBUGGER_COMMAND_ARGUMENTS ""
|
VS_DEBUGGER_COMMAND_ARGUMENTS ""
|
||||||
VS_DEBUGGER_WORKING_DIRECTORY "$(OutputPath)"
|
VS_DEBUGGER_WORKING_DIRECTORY "${TargetDir}"
|
||||||
)
|
)
|
||||||
|
|
||||||
add_dependencies(${TEST_NAME} ${TARGET_NAME_L0})
|
add_dependencies(${TEST_NAME} ${TARGET_NAME_L0})
|
||||||
|
@ -26,4 +27,6 @@ if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")
|
||||||
|
|
||||||
set_target_properties(${TEST_NAME} PROPERTIES FOLDER ${L0_BLACK_BOX_TEST_PROJECT_FOLDER})
|
set_target_properties(${TEST_NAME} PROPERTIES FOLDER ${L0_BLACK_BOX_TEST_PROJECT_FOLDER})
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
|
target_link_libraries(zello_world_jitc_ocloc PUBLIC ocloc_lib)
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -0,0 +1,267 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2020 Intel Corporation
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: MIT
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "shared/offline_compiler/source/ocloc_api.h"
|
||||||
|
|
||||||
|
#include <level_zero/ze_api.h>
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <limits>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#define VALIDATECALL(myZeCall) \
|
||||||
|
do { \
|
||||||
|
if (myZeCall != ZE_RESULT_SUCCESS) { \
|
||||||
|
std::cout << "Error at " \
|
||||||
|
<< #myZeCall << ": " \
|
||||||
|
<< __FUNCTION__ << ": " \
|
||||||
|
<< __LINE__ << "\n"; \
|
||||||
|
std::terminate(); \
|
||||||
|
} \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
|
const char *module = R"===(
|
||||||
|
__kernel void kernel_copy(__global int *dst, __global char *src){
|
||||||
|
uint gid = get_global_id(0);
|
||||||
|
dst[gid] = src[gid];
|
||||||
|
}
|
||||||
|
)===";
|
||||||
|
|
||||||
|
std::vector<uint8_t> compileToSpirV(const std::string &src, const std::string &options, std::string &outCompilerLog) {
|
||||||
|
std::vector<uint8_t> ret;
|
||||||
|
|
||||||
|
const char *mainFileName = "main.cl";
|
||||||
|
const char *argv[] = {"ocloc", "-q", "-device", "skl", "-file", mainFileName};
|
||||||
|
const unsigned char *sources[] = {reinterpret_cast<const unsigned char *>(src.c_str())};
|
||||||
|
size_t sourcesLengths[] = {src.size() + 1};
|
||||||
|
const char *sourcesNames[] = {mainFileName};
|
||||||
|
unsigned int numOutputs = 0U;
|
||||||
|
unsigned char **outputs = nullptr;
|
||||||
|
size_t *ouputLengths = nullptr;
|
||||||
|
char **outputNames = nullptr;
|
||||||
|
|
||||||
|
int result = oclocInvoke(sizeof(argv) / sizeof(argv[0]), argv,
|
||||||
|
1, sources, sourcesLengths, sourcesNames,
|
||||||
|
0, nullptr, nullptr, nullptr,
|
||||||
|
&numOutputs, &outputs, &ouputLengths, &outputNames);
|
||||||
|
|
||||||
|
unsigned char *spirV = nullptr;
|
||||||
|
size_t spirVlen = 0;
|
||||||
|
const char *log = nullptr;
|
||||||
|
size_t logLen = 0;
|
||||||
|
for (unsigned int i = 0; i < numOutputs; ++i) {
|
||||||
|
std::string spvExtension = ".spv";
|
||||||
|
std::string logFileName = "stdout.log";
|
||||||
|
auto nameLen = strlen(outputNames[i]);
|
||||||
|
if ((nameLen > spvExtension.size()) && (strstr(&outputNames[i][nameLen - spvExtension.size()], spvExtension.c_str()) != nullptr)) {
|
||||||
|
spirV = outputs[i];
|
||||||
|
spirVlen = ouputLengths[i];
|
||||||
|
} else if ((nameLen >= logFileName.size()) && (strstr(outputNames[i], logFileName.c_str()) != nullptr)) {
|
||||||
|
log = reinterpret_cast<const char *>(outputs[i]);
|
||||||
|
logLen = ouputLengths[i];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((result != 0) && (logLen == 0)) {
|
||||||
|
outCompilerLog = "Unknown error, ocloc returnerd : " + std::to_string(result) + "\n";
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (logLen != 0) {
|
||||||
|
outCompilerLog = std::string(log, logLen).c_str();
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.assign(spirV, spirV + spirVlen);
|
||||||
|
oclocFreeOutput(&numOutputs, &outputs, &ouputLengths, &outputNames);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
// Initialize driver
|
||||||
|
VALIDATECALL(zeInit(ZE_INIT_FLAG_GPU_ONLY));
|
||||||
|
|
||||||
|
// Retrieve driver
|
||||||
|
uint32_t driverCount = 0;
|
||||||
|
VALIDATECALL(zeDriverGet(&driverCount, nullptr));
|
||||||
|
|
||||||
|
ze_driver_handle_t driverHandle;
|
||||||
|
VALIDATECALL(zeDriverGet(&driverCount, &driverHandle));
|
||||||
|
|
||||||
|
ze_context_desc_t contextDesc = {};
|
||||||
|
ze_context_handle_t context;
|
||||||
|
VALIDATECALL(zeContextCreate(driverHandle, &contextDesc, &context));
|
||||||
|
|
||||||
|
// Retrieve device
|
||||||
|
uint32_t deviceCount = 0;
|
||||||
|
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, nullptr));
|
||||||
|
|
||||||
|
ze_device_handle_t device;
|
||||||
|
deviceCount = 1;
|
||||||
|
VALIDATECALL(zeDeviceGet(driverHandle, &deviceCount, &device));
|
||||||
|
|
||||||
|
// Print some properties
|
||||||
|
ze_device_properties_t deviceProperties = {};
|
||||||
|
VALIDATECALL(zeDeviceGetProperties(device, &deviceProperties));
|
||||||
|
|
||||||
|
std::cout << "Device : \n"
|
||||||
|
<< " * name : " << deviceProperties.name << "\n"
|
||||||
|
<< " * type : " << ((deviceProperties.type == ZE_DEVICE_TYPE_GPU) ? "GPU" : "FPGA") << "\n"
|
||||||
|
<< " * vendorId : " << std::hex << deviceProperties.vendorId << std::dec << "\n";
|
||||||
|
|
||||||
|
// Create command queue
|
||||||
|
uint32_t numQueueGroups = 0;
|
||||||
|
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups, nullptr));
|
||||||
|
if (numQueueGroups == 0) {
|
||||||
|
std::cout << "No queue groups found!\n";
|
||||||
|
std::terminate();
|
||||||
|
}
|
||||||
|
std::vector<ze_command_queue_group_properties_t> queueProperties(numQueueGroups);
|
||||||
|
VALIDATECALL(zeDeviceGetCommandQueueGroupProperties(device, &numQueueGroups,
|
||||||
|
queueProperties.data()));
|
||||||
|
|
||||||
|
ze_command_queue_handle_t cmdQueue;
|
||||||
|
ze_command_queue_desc_t cmdQueueDesc = {};
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < numQueueGroups; i++) {
|
||||||
|
if (queueProperties[i].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE) {
|
||||||
|
cmdQueueDesc.ordinal = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cmdQueueDesc.index = 0;
|
||||||
|
cmdQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||||
|
VALIDATECALL(zeCommandQueueCreate(context, device, &cmdQueueDesc, &cmdQueue));
|
||||||
|
|
||||||
|
// Create command list
|
||||||
|
ze_command_list_handle_t cmdList;
|
||||||
|
ze_command_list_desc_t cmdListDesc = {};
|
||||||
|
cmdListDesc.commandQueueGroupOrdinal = cmdQueueDesc.ordinal;
|
||||||
|
VALIDATECALL(zeCommandListCreate(context, device, &cmdListDesc, &cmdList));
|
||||||
|
|
||||||
|
// Create two shared buffers
|
||||||
|
constexpr size_t allocSize = 4096;
|
||||||
|
ze_device_mem_alloc_desc_t deviceDesc;
|
||||||
|
deviceDesc.flags = ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||||
|
deviceDesc.ordinal = 0;
|
||||||
|
|
||||||
|
ze_host_mem_alloc_desc_t hostDesc;
|
||||||
|
hostDesc.flags = ZE_HOST_MEM_ALLOC_FLAG_BIAS_UNCACHED;
|
||||||
|
|
||||||
|
void *srcBuffer = nullptr;
|
||||||
|
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &srcBuffer));
|
||||||
|
|
||||||
|
void *dstBuffer = nullptr;
|
||||||
|
VALIDATECALL(zeMemAllocShared(context, &deviceDesc, &hostDesc, allocSize, 1, device, &dstBuffer));
|
||||||
|
|
||||||
|
// Initialize memory
|
||||||
|
constexpr uint8_t val = 55;
|
||||||
|
memset(srcBuffer, val, allocSize);
|
||||||
|
memset(dstBuffer, 0, allocSize);
|
||||||
|
|
||||||
|
std::string buildLog;
|
||||||
|
auto spirV = compileToSpirV(module, "", buildLog);
|
||||||
|
if (buildLog.size() > 0) {
|
||||||
|
std::cout << "Build log " << buildLog;
|
||||||
|
}
|
||||||
|
VALIDATECALL((0 == spirV.size()));
|
||||||
|
|
||||||
|
ze_module_handle_t module = nullptr;
|
||||||
|
ze_kernel_handle_t kernel = nullptr;
|
||||||
|
|
||||||
|
ze_module_desc_t moduleDesc = {};
|
||||||
|
ze_module_build_log_handle_t buildlog;
|
||||||
|
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
|
||||||
|
moduleDesc.pInputModule = spirV.data();
|
||||||
|
moduleDesc.inputSize = spirV.size();
|
||||||
|
moduleDesc.pBuildFlags = "";
|
||||||
|
|
||||||
|
if (zeModuleCreate(context, device, &moduleDesc, &module, &buildlog) != ZE_RESULT_SUCCESS) {
|
||||||
|
size_t szLog = 0;
|
||||||
|
zeModuleBuildLogGetString(buildlog, &szLog, nullptr);
|
||||||
|
|
||||||
|
char *strLog = (char *)malloc(szLog);
|
||||||
|
zeModuleBuildLogGetString(buildlog, &szLog, strLog);
|
||||||
|
std::cout << "Build log:" << strLog << std::endl;
|
||||||
|
|
||||||
|
free(strLog);
|
||||||
|
}
|
||||||
|
VALIDATECALL(zeModuleBuildLogDestroy(buildlog));
|
||||||
|
|
||||||
|
ze_kernel_desc_t kernelDesc = {};
|
||||||
|
kernelDesc.pKernelName = "kernel_copy";
|
||||||
|
VALIDATECALL(zeKernelCreate(module, &kernelDesc, &kernel));
|
||||||
|
ze_kernel_properties_t kernProps;
|
||||||
|
VALIDATECALL(zeKernelGetProperties(kernel, &kernProps));
|
||||||
|
std::cout << "Kernel : \n"
|
||||||
|
<< " * name : " << kernelDesc.pKernelName << "\n"
|
||||||
|
<< " * uuid.mid : " << kernProps.uuid.mid << "\n"
|
||||||
|
<< " * uuid.kid : " << kernProps.uuid.kid << "\n"
|
||||||
|
<< " * maxSubgroupSize : " << kernProps.maxSubgroupSize << "\n"
|
||||||
|
<< " * localMemSize : " << kernProps.localMemSize << "\n"
|
||||||
|
<< " * spillMemSize : " << kernProps.spillMemSize << "\n"
|
||||||
|
<< " * privateMemSize : " << kernProps.privateMemSize << "\n"
|
||||||
|
<< " * maxNumSubgroups : " << kernProps.maxNumSubgroups << "\n"
|
||||||
|
<< " * numKernelArgs : " << kernProps.numKernelArgs << "\n"
|
||||||
|
<< " * requiredSubgroupSize : " << kernProps.requiredSubgroupSize << "\n"
|
||||||
|
<< " * requiredNumSubGroups : " << kernProps.requiredNumSubGroups << "\n"
|
||||||
|
<< " * requiredGroupSizeX : " << kernProps.requiredGroupSizeX << "\n"
|
||||||
|
<< " * requiredGroupSizeY : " << kernProps.requiredGroupSizeY << "\n"
|
||||||
|
<< " * requiredGroupSizeZ : " << kernProps.requiredGroupSizeZ << "\n";
|
||||||
|
|
||||||
|
uint32_t groupSizeX = 32u;
|
||||||
|
uint32_t groupSizeY = 1u;
|
||||||
|
uint32_t groupSizeZ = 1u;
|
||||||
|
VALIDATECALL(zeKernelSuggestGroupSize(kernel, allocSize, 1U, 1U, &groupSizeX, &groupSizeY, &groupSizeZ));
|
||||||
|
VALIDATECALL(zeKernelSetGroupSize(kernel, groupSizeX, groupSizeY, groupSizeZ));
|
||||||
|
|
||||||
|
VALIDATECALL(zeKernelSetArgumentValue(kernel, 1, sizeof(dstBuffer), &dstBuffer));
|
||||||
|
VALIDATECALL(zeKernelSetArgumentValue(kernel, 0, sizeof(srcBuffer), &srcBuffer));
|
||||||
|
|
||||||
|
ze_group_count_t dispatchTraits;
|
||||||
|
dispatchTraits.groupCountX = allocSize / groupSizeX;
|
||||||
|
dispatchTraits.groupCountY = 1u;
|
||||||
|
dispatchTraits.groupCountZ = 1u;
|
||||||
|
|
||||||
|
VALIDATECALL(zeCommandListAppendLaunchKernel(cmdList, kernel, &dispatchTraits,
|
||||||
|
nullptr, 0, nullptr));
|
||||||
|
|
||||||
|
// Close list and submit for execution
|
||||||
|
VALIDATECALL(zeCommandListClose(cmdList));
|
||||||
|
VALIDATECALL(zeCommandQueueExecuteCommandLists(cmdQueue, 1, &cmdList, nullptr));
|
||||||
|
|
||||||
|
VALIDATECALL(zeCommandQueueSynchronize(cmdQueue, std::numeric_limits<uint64_t>::max()));
|
||||||
|
|
||||||
|
// Validate
|
||||||
|
bool outputValidationSuccessful = true;
|
||||||
|
if (memcmp(dstBuffer, srcBuffer, allocSize)) {
|
||||||
|
outputValidationSuccessful = false;
|
||||||
|
uint8_t *srcCharBuffer = static_cast<uint8_t *>(srcBuffer);
|
||||||
|
uint8_t *dstCharBuffer = static_cast<uint8_t *>(dstBuffer);
|
||||||
|
for (size_t i = 0; i < allocSize; i++) {
|
||||||
|
if (srcCharBuffer[i] != dstCharBuffer[i]) {
|
||||||
|
std::cout << "srcBuffer[" << i << "] = " << static_cast<unsigned int>(srcCharBuffer[i]) << " not equal to "
|
||||||
|
<< "dstBuffer[" << i << "] = " << static_cast<unsigned int>(dstCharBuffer[i]) << "\n";
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cleanup
|
||||||
|
VALIDATECALL(zeMemFree(context, dstBuffer));
|
||||||
|
VALIDATECALL(zeMemFree(context, srcBuffer));
|
||||||
|
VALIDATECALL(zeCommandListDestroy(cmdList));
|
||||||
|
VALIDATECALL(zeCommandQueueDestroy(cmdQueue));
|
||||||
|
VALIDATECALL(zeContextDestroy(context));
|
||||||
|
|
||||||
|
std::cout << "\nZello World Results validation " << (outputValidationSuccessful ? "PASSED" : "FAILED") << "\n";
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -245,21 +245,6 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
|
||||||
delete[] kernelNameRetrieved;
|
delete[] kernelNameRetrieved;
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST_F(KernelPropertiesTests, givenInvalidKernelThenUnitializedIsReturned) {
|
|
||||||
ze_kernel_properties_t kernelProperties = {};
|
|
||||||
|
|
||||||
std::vector<KernelInfo *> prevKernelInfos;
|
|
||||||
L0::ModuleImp *moduleImp = reinterpret_cast<L0::ModuleImp *>(module.get());
|
|
||||||
moduleImp->getTranslationUnit()->programInfo.kernelInfos.swap(prevKernelInfos);
|
|
||||||
EXPECT_EQ(0u, moduleImp->getTranslationUnit()->programInfo.kernelInfos.size());
|
|
||||||
|
|
||||||
ze_result_t res = kernel->getProperties(&kernelProperties);
|
|
||||||
EXPECT_EQ(ZE_RESULT_ERROR_UNINITIALIZED, res);
|
|
||||||
|
|
||||||
prevKernelInfos.swap(moduleImp->getTranslationUnit()->programInfo.kernelInfos);
|
|
||||||
EXPECT_NE(0u, moduleImp->getTranslationUnit()->programInfo.kernelInfos.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
|
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
|
||||||
uint32_t computeUnitsUsedForSratch = 0x300;
|
uint32_t computeUnitsUsedForSratch = 0x300;
|
||||||
|
|
||||||
|
@ -297,42 +282,22 @@ HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {
|
||||||
ze_result_t res = kernel->getProperties(&kernelProperties);
|
ze_result_t res = kernel->getProperties(&kernelProperties);
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||||
|
|
||||||
EXPECT_EQ(numKernelArguments, kernelProperties.numKernelArgs);
|
EXPECT_EQ(6U, kernelProperties.numKernelArgs);
|
||||||
|
|
||||||
L0::ModuleImp *moduleImp = reinterpret_cast<L0::ModuleImp *>(module.get());
|
EXPECT_EQ(0U, kernelProperties.requiredNumSubGroups);
|
||||||
NEO::KernelInfo *ki = nullptr;
|
EXPECT_EQ(0U, kernelProperties.requiredSubgroupSize);
|
||||||
for (uint32_t i = 0; i < moduleImp->getTranslationUnit()->programInfo.kernelInfos.size(); i++) {
|
|
||||||
ki = moduleImp->getTranslationUnit()->programInfo.kernelInfos[i];
|
|
||||||
if (ki->kernelDescriptor.kernelMetadata.kernelName.compare(0, ki->kernelDescriptor.kernelMetadata.kernelName.size(), kernel->getImmutableData()->getDescriptor().kernelMetadata.kernelName) == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t requiredNumSubGroups = static_cast<uint32_t>(ki->patchInfo.executionEnvironment->CompiledSubGroupsNumber);
|
uint32_t maxSubgroupSize = this->kernel->getKernelDescriptor().kernelAttributes.simdSize;
|
||||||
EXPECT_EQ(requiredNumSubGroups, kernelProperties.requiredNumSubGroups);
|
ASSERT_NE(0U, maxSubgroupSize);
|
||||||
|
|
||||||
uint32_t requiredSubgroupSize = static_cast<uint32_t>(ki->requiredSubGroupSize);
|
|
||||||
EXPECT_EQ(requiredSubgroupSize, kernelProperties.requiredSubgroupSize);
|
|
||||||
|
|
||||||
uint32_t maxSubgroupSize = ki->getMaxSimdSize();
|
|
||||||
EXPECT_EQ(maxSubgroupSize, kernelProperties.maxSubgroupSize);
|
EXPECT_EQ(maxSubgroupSize, kernelProperties.maxSubgroupSize);
|
||||||
|
|
||||||
uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
|
uint32_t maxKernelWorkGroupSize = static_cast<uint32_t>(this->module->getDevice()->getNEODevice()->getDeviceInfo().maxWorkGroupSize);
|
||||||
uint32_t maxRequiredWorkGroupSize = static_cast<uint32_t>(ki->getMaxRequiredWorkGroupSize(maxKernelWorkGroupSize));
|
uint32_t maxNumSubgroups = maxKernelWorkGroupSize / maxSubgroupSize;
|
||||||
uint32_t largestCompiledSIMDSize = static_cast<uint32_t>(ki->patchInfo.executionEnvironment->LargestCompiledSIMDSize);
|
|
||||||
uint32_t maxNumSubgroups = static_cast<uint32_t>(Math::divideAndRoundUp(maxRequiredWorkGroupSize, largestCompiledSIMDSize));
|
|
||||||
EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups);
|
EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups);
|
||||||
|
|
||||||
uint32_t localMemSize = static_cast<uint32_t>(moduleImp->getDevice()->getNEODevice()->getDeviceInfo().localMemSize);
|
EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize);
|
||||||
EXPECT_EQ(localMemSize, kernelProperties.localMemSize);
|
EXPECT_EQ(0U, kernelProperties.privateMemSize);
|
||||||
|
EXPECT_EQ(0U, kernelProperties.spillMemSize);
|
||||||
uint32_t privateMemSize = ki->patchInfo.pAllocateStatelessPrivateSurface ? ki->patchInfo.pAllocateStatelessPrivateSurface->PerThreadPrivateMemorySize
|
|
||||||
: 0;
|
|
||||||
EXPECT_EQ(privateMemSize, kernelProperties.privateMemSize);
|
|
||||||
|
|
||||||
uint32_t spillMemSize = ki->patchInfo.mediavfestate ? ki->patchInfo.mediavfestate->PerThreadScratchSpace
|
|
||||||
: 0;
|
|
||||||
EXPECT_EQ(spillMemSize, kernelProperties.spillMemSize);
|
|
||||||
|
|
||||||
uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE];
|
uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE];
|
||||||
uint8_t zeroMid[ZE_MAX_MODULE_UUID_SIZE];
|
uint8_t zeroMid[ZE_MAX_MODULE_UUID_SIZE];
|
||||||
|
|
Loading…
Reference in New Issue