feature implicit args: patch rt dispatch global array in implicit args buffer

handle has_rtcalls in kernels and functions in zebin

Related-To: NEO-7818
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-03-20 12:46:06 +00:00
committed by Compute-Runtime-Automation
parent 7e5e27f0b9
commit dd39b822d3
11 changed files with 104 additions and 48 deletions

View File

@@ -954,27 +954,28 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
if (this->usesRayTracing()) {
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
if (arg.pointerSize == 0) {
// application is allocating its own RTDispatchGlobals manually
neoDevice->initializeRayTracing(0);
} else {
neoDevice->initializeRayTracing(bvhLevels);
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobalsInfo == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
neoDevice->initializeRayTracing(bvhLevels);
for (auto rtStack : rtDispatchGlobalsInfo->rtStacks) {
this->residencyContainer.push_back(rtStack);
}
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
if (rtDispatchGlobalsInfo == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
auto address = rtDispatchGlobalsInfo->rtDispatchGlobalsArray->getGpuAddressToPatch();
for (auto rtStack : rtDispatchGlobalsInfo->rtStacks) {
this->residencyContainer.push_back(rtStack);
}
auto address = rtDispatchGlobalsInfo->rtDispatchGlobalsArray->getGpuAddressToPatch();
if (NEO::isValidOffset(arg.stateless)) {
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
arg,
static_cast<uintptr_t>(address));
this->residencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
}
if (this->pImplicitArgs) {
pImplicitArgs->rtGlobalBufferPtr = address;
}
this->residencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
}
this->midThreadPreemptionDisallowedForRayTracingKernels = productHelper.isMidThreadPreemptionDisallowedForRayTracingKernels();

View File

@@ -1253,7 +1253,7 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
nameToKernelDescriptor[kd.kernelMetadata.kernelName] = &kd;
}
}
auto error = NEO::resolveBarrierCount(externalFunctionInfos, kernelDependencies, extFuncDependencies, nameToKernelDescriptor);
auto error = NEO::resolveExternalDependencies(externalFunctionInfos, kernelDependencies, extFuncDependencies, nameToKernelDescriptor);
if (error != NEO::RESOLVE_SUCCESS) {
return ZE_RESULT_ERROR_MODULE_LINK_FAILURE;
}

View File

@@ -21,6 +21,7 @@
#include "shared/test/common/device_binary_format/patchtokens_tests.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/gtest_helpers.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
@@ -924,11 +925,18 @@ TEST_F(KernelImmutableDataTests, givenModuleWithPrivateMemoryBiggerThanGlobalMem
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized) {
static_cast<OsAgnosticMemoryManager *>(device->getNEODevice()->getMemoryManager())->turnOnFakingBigAllocations();
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitializedAndPatchedInImplicitArgsBuffer) {
auto &hwInfo = *neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
hwInfo.gtSystemInfo.IsDynamicallyPopulated = false;
hwInfo.gtSystemInfo.SliceCount = 1;
hwInfo.gtSystemInfo.MaxSlicesSupported = 1;
hwInfo.gtSystemInfo.SubSliceCount = 1;
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 1;
hwInfo.gtSystemInfo.DualSubSliceCount = 1;
hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 1;
KernelDescriptor mockDescriptor = {};
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
mockDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
mockDescriptor.kernelMetadata.kernelName = "rt_test";
for (auto i = 0u; i < 3u; i++) {
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
@@ -963,6 +971,9 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_NE(nullptr, rtDispatchGlobals);
auto implicitArgs = kernel->getImplicitArgs();
ASSERT_NE(nullptr, implicitArgs);
EXPECT_EQ_VAL(implicitArgs->rtGlobalBufferPtr, rtDispatchGlobals->rtDispatchGlobalsArray->getGpuAddressToPatch());
}
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZeroThenRayTracingIsInitialized) {
@@ -1002,9 +1013,8 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
// Application is expected to allocate its own RTDispatchGlobals manually in this case.
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
EXPECT_EQ(nullptr, rtDispatchGlobals);
EXPECT_NE(nullptr, rtDispatchGlobals);
}
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,8 +12,8 @@
#include <algorithm>
namespace NEO {
uint32_t resolveBarrierCount(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor) {
uint32_t resolveExternalDependencies(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor) {
FuncNameToIdMapT funcNameToId;
for (size_t i = 0U; i < externalFunctionInfos.size(); i++) {
auto &extFuncInfo = externalFunctionInfos[i];
@@ -64,6 +64,7 @@ uint32_t resolveExtFuncDependencies(const ExternalFunctionInfosT &externalFuncti
for (auto callerId : calledBy[calleeId]) {
auto caller = externalFunctionInfos[callerId];
caller->barrierCount = std::max(caller->barrierCount, callee->barrierCount);
caller->hasRTCalls |= callee->hasRTCalls;
}
}
return RESOLVE_SUCCESS;
@@ -76,9 +77,10 @@ uint32_t resolveKernelDependencies(const ExternalFunctionInfosT &externalFunctio
} else if (nameToKernelDescriptor.count(kernelDep->kernelName) == 0) {
return ERROR_KERNEL_DESCRIPTOR_MISSING;
}
const auto functionBarrierCount = externalFunctionInfos.at(funcNameToId.at(kernelDep->usedFuncName))->barrierCount;
auto &kernelBarrierCount = nameToKernelDescriptor.at(kernelDep->kernelName)->kernelAttributes.barrierCount;
kernelBarrierCount = std::max(kernelBarrierCount, functionBarrierCount);
auto &kernelAttributes = nameToKernelDescriptor.at(kernelDep->kernelName)->kernelAttributes;
const auto &externalFunctionInfo = *externalFunctionInfos.at(funcNameToId.at(kernelDep->usedFuncName));
kernelAttributes.barrierCount = std::max(externalFunctionInfo.barrierCount, kernelAttributes.barrierCount);
kernelAttributes.flags.hasRTCalls |= externalFunctionInfo.hasRTCalls;
}
return RESOLVE_SUCCESS;
}

View File

@@ -27,6 +27,7 @@ struct ExternalFunctionInfo {
uint8_t barrierCount = 0U;
uint16_t numGrfRequired = 0U;
uint8_t simdSize = 0U;
bool hasRTCalls = false;
};
struct ExternalFunctionUsageKernel {
@@ -59,8 +60,8 @@ class DependencyResolver {
const std::vector<std::vector<size_t>> &graph;
};
uint32_t resolveBarrierCount(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor);
uint32_t resolveExternalDependencies(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor);
uint32_t getExtFuncDependencies(const FuncNameToIdMapT &funcNameToId, const FunctionDependenciesT &funcDependencies, size_t numExternalFuncs,
DependenciesT &outDependencies, CalledByT &outCalledBy);

View File

@@ -633,7 +633,7 @@ bool Linker::resolveExternalFunctions(const KernelDescriptorsT &kernelDescriptor
nameToKernelDescriptor[kd->kernelMetadata.kernelName] = kd;
}
auto error = NEO::resolveBarrierCount(externalFunctionsPtrs, kernelDependenciesPtrs, functionDependenciesPtrs, nameToKernelDescriptor);
auto error = NEO::resolveExternalDependencies(externalFunctionsPtrs, kernelDependenciesPtrs, functionDependenciesPtrs, nameToKernelDescriptor);
return (error == RESOLVE_SUCCESS) ? true : false;
}

View File

@@ -45,6 +45,7 @@ inline constexpr ConstStringRef hasGlobalAtomics("has_global_atomics");
inline constexpr ConstStringRef hasMultiScratchSpaces("has_multi_scratch_spaces");
inline constexpr ConstStringRef hasNoStatelessWrite("has_no_stateless_write");
inline constexpr ConstStringRef hasStackCalls("has_stack_calls");
inline constexpr ConstStringRef hasRTCalls("has_rtcalls");
inline constexpr ConstStringRef hwPreemptionMode("hw_preemption_mode");
inline constexpr ConstStringRef inlineDataPayloadSize("inline_data_payload_size");
inline constexpr ConstStringRef offsetToSkipPerThreadDataLoad("offset_to_skip_per_thread_data_load");
@@ -321,6 +322,7 @@ using HasNonKernelArgLoadT = int32_t;
using HasNonKernelArgStoreT = int32_t;
using HasNoStatelessWriteT = bool;
using HasStackCallsT = bool;
using HasRTCallsT = bool;
using HwPreemptionModeT = int32_t;
using InlineDataPayloadSizeT = int32_t;
using OffsetToSkipPerThreadDataLoadT = int32_t;
@@ -350,6 +352,7 @@ inline constexpr HasNonKernelArgLoadT hasNonKernelArgLoad = false;
inline constexpr HasNonKernelArgStoreT hasNonKernelArgStore = false;
inline constexpr HasNoStatelessWriteT hasNoStatelessWrite = false;
inline constexpr HasStackCallsT hasStackCalls = false;
inline constexpr HasRTCallsT hasRTCalls = false;
inline constexpr HwPreemptionModeT hwPreemptionMode = -1;
inline constexpr InlineDataPayloadSizeT inlineDataPayloadSize = 0;
inline constexpr OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad = 0;
@@ -381,6 +384,7 @@ struct ExecutionEnvBaseT {
HasMultiScratchSpacesT hasMultiScratchSpaces = Defaults::hasMultiScratchSpaces;
HasNoStatelessWriteT hasNoStatelessWrite = Defaults::hasNoStatelessWrite;
HasStackCallsT hasStackCalls = Defaults::hasStackCalls;
HasRTCallsT hasRTCalls = Defaults::hasRTCalls;
HwPreemptionModeT hwPreemptionMode = Defaults::hwPreemptionMode;
InlineDataPayloadSizeT inlineDataPayloadSize = Defaults::inlineDataPayloadSize;
OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad = Defaults::offsetToSkipPerThreadDataLoad;

View File

@@ -270,11 +270,12 @@ DecodeError populateExternalFunctionsMetadata(NEO::ProgramInfo &dst, NEO::Yaml::
}
if (isValid) {
NEO::ExternalFunctionInfo extFunInfo;
NEO::ExternalFunctionInfo extFunInfo{};
extFunInfo.functionName = functionName.str();
extFunInfo.barrierCount = static_cast<uint8_t>(execEnv.barrierCount);
extFunInfo.numGrfRequired = static_cast<uint16_t>(execEnv.grfCount);
extFunInfo.simdSize = static_cast<uint8_t>(execEnv.simdSize);
extFunInfo.hasRTCalls = execEnv.hasRTCalls;
dst.externalFunctions.push_back(extFunInfo);
return DecodeError::Success;
} else {
@@ -610,6 +611,8 @@ DecodeError readZeInfoExecutionEnvironment(const Yaml::YamlParser &parser, const
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasNoStatelessWrite, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::hasStackCalls == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasStackCalls, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::hasRTCalls == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasRTCalls, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::hwPreemptionMode == key) {
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hwPreemptionMode, context, outErrReason);
} else if (Tags::Kernel::ExecutionEnv::inlineDataPayloadSize == key) {
@@ -664,6 +667,7 @@ void populateKernelExecutionEnvironment(KernelDescriptor &dst, const KernelExecu
dst.kernelAttributes.flags.requiresDisabledEUFusion = execEnv.requireDisableEUFusion;
dst.kernelAttributes.flags.useGlobalAtomics = execEnv.hasGlobalAtomics;
dst.kernelAttributes.flags.useStackCalls = execEnv.hasStackCalls;
dst.kernelAttributes.flags.hasRTCalls = execEnv.hasRTCalls;
dst.kernelAttributes.flags.usesFencesForReadWriteImages = execEnv.hasFenceForImageAccess;
dst.kernelAttributes.flags.usesSystolicPipelineSelectMode = execEnv.hasDpas;
dst.kernelAttributes.flags.usesStatelessWrites = (false == execEnv.hasNoStatelessWrite);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -36,9 +36,13 @@ struct ImplicitArgs {
uint32_t groupCountX;
uint32_t groupCountY;
uint32_t groupCountZ;
uint32_t reserved;
uint32_t padding0;
uint64_t rtGlobalBufferPtr;
uint8_t reserved[24];
};
static_assert((sizeof(ImplicitArgs) & 31) == 0, "Implicit args size need to be aligned to 32");
static_assert(sizeof(ImplicitArgs) == 128);
static_assert(std::is_pod<ImplicitArgs>::value);
inline constexpr const char *implicitArgsRelocationSymbolName = "__INTEL_PATCH_CROSS_THREAD_OFFSET_OFF_R0";

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -79,9 +79,9 @@ struct ExternalFunctionsTests : public ::testing::Test {
void SetUp() override {}
void TearDown() override {}
void addExternalFunction(const std::string &functionName, uint8_t barrierCount) {
void addExternalFunction(const std::string &functionName, uint8_t barrierCount, bool hasRTCalls) {
funcNameToId[functionName] = extFuncInfoStorage.size();
extFuncInfoStorage.push_back(ExternalFunctionInfo{functionName, barrierCount, 128U, 8U});
extFuncInfoStorage.push_back(ExternalFunctionInfo{functionName, barrierCount, 128U, 8U, hasRTCalls});
}
void addKernel(const std::string &kernelName) {
kernelDescriptorStorage.push_back(std::make_unique<KernelDescriptor>());
@@ -137,14 +137,14 @@ TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInLookupMapWhenResolvingExtFun
clear();
addFuncDependency("fun1", "fun0");
addExternalFunction("fun1", 0);
addExternalFunction("fun1", 0, false);
set();
error = resolveExtFuncDependencies(extFuncInfo, funcNameToId, functionDependencies);
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
clear();
addFuncDependency("fun1", "fun0");
addExternalFunction("fun0", 0);
addExternalFunction("fun0", 0, false);
set();
error = resolveExtFuncDependencies(extFuncInfo, funcNameToId, functionDependencies);
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
@@ -159,7 +159,7 @@ TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInLookupMapWhenResolvingKernel
}
TEST_F(ExternalFunctionsTests, GivenMissingKernelInLookupMapWhenResolvingKernelDependenciesThenReturnError) {
addExternalFunction("fun0", 0);
addExternalFunction("fun0", 0, false);
addKernelDependency("fun0", "kernel");
set();
auto error = resolveKernelDependencies(extFuncInfo, funcNameToId, kernelDependencies, nameToKernelDescriptor);
@@ -168,14 +168,14 @@ TEST_F(ExternalFunctionsTests, GivenMissingKernelInLookupMapWhenResolvingKernelD
TEST_F(ExternalFunctionsTests, GivenNoDependenciesWhenResolvingBarrierCountThenReturnSuccess) {
set();
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
EXPECT_EQ(RESOLVE_SUCCESS, error);
}
TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInExtFuncDependenciesWhenResolvingBarrierCountThenReturnError) {
addFuncDependency("fun0", "fun1");
set();
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
}
@@ -183,13 +183,13 @@ TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInKernelDependenciesWhenResolv
addKernelDependency("fun0", "kernel");
addKernel("kernel");
set();
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
}
TEST_F(ExternalFunctionsTests, GivenLoopWhenResolvingExtFuncDependenciesThenReturnSuccess) {
addExternalFunction("fun0", 4);
addExternalFunction("fun1", 2);
addExternalFunction("fun0", 4, false);
addExternalFunction("fun1", 2, false);
addFuncDependency("fun0", "fun1");
addFuncDependency("fun1", "fun0");
set();
@@ -199,16 +199,41 @@ TEST_F(ExternalFunctionsTests, GivenLoopWhenResolvingExtFuncDependenciesThenRetu
EXPECT_EQ(4U, extFuncInfo[funcNameToId["fun1"]]->barrierCount);
}
TEST_F(ExternalFunctionsTests, GivenValidFunctionAndKernelDependenciesWhenResolvingBarrierCountThenSetAppropriateBarrierCountAndReturnSuccess) {
TEST_F(ExternalFunctionsTests, GivenValidFunctionAndKernelDependenciesWhenResolvingDependenciesThenSetAppropriateBarrierCountAndReturnSuccess) {
addKernel("kernel");
addExternalFunction("fun0", 1U);
addExternalFunction("fun1", 2U);
addExternalFunction("fun0", 1U, false);
addExternalFunction("fun1", 2U, false);
addFuncDependency("fun1", "fun0");
addKernelDependency("fun0", "kernel");
set();
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
EXPECT_EQ(RESOLVE_SUCCESS, error);
EXPECT_EQ(2U, extFuncInfo[funcNameToId["fun0"]]->barrierCount);
EXPECT_EQ(2U, extFuncInfo[funcNameToId["fun1"]]->barrierCount);
EXPECT_EQ(2U, nameToKernelDescriptor["kernel"]->kernelAttributes.barrierCount);
}
TEST_F(ExternalFunctionsTests, GivenValidFunctionAndKernelDependenciesWhenResolvingDependenciesThenSetAppropriateHasRTCallsAndReturnSuccess) {
addKernel("kernel0");
addKernel("kernel1");
addKernel("kernel2");
addExternalFunction("fun0", 0u, false);
addExternalFunction("fun1", 0u, true);
addExternalFunction("fun2", 0u, false);
addFuncDependency("fun1", "fun0");
addKernelDependency("fun0", "kernel0");
addKernelDependency("fun2", "kernel1");
addKernelDependency("fun2", "kernel2");
set();
nameToKernelDescriptor["kernel2"]->kernelAttributes.flags.hasRTCalls = true;
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
EXPECT_EQ(RESOLVE_SUCCESS, error);
EXPECT_TRUE(extFuncInfo[funcNameToId["fun0"]]->hasRTCalls);
EXPECT_TRUE(extFuncInfo[funcNameToId["fun1"]]->hasRTCalls);
EXPECT_FALSE(extFuncInfo[funcNameToId["fun2"]]->hasRTCalls);
EXPECT_TRUE(nameToKernelDescriptor["kernel0"]->kernelAttributes.flags.hasRTCalls);
EXPECT_FALSE(nameToKernelDescriptor["kernel1"]->kernelAttributes.flags.hasRTCalls);
EXPECT_TRUE(nameToKernelDescriptor["kernel2"]->kernelAttributes.flags.hasRTCalls);
}

View File

@@ -1523,6 +1523,7 @@ kernels:
has_multi_scratch_spaces : true
has_no_stateless_write : true
has_stack_calls : true
has_rtcalls : true
require_disable_eufusion : true
has_sample : true
hw_preemption_mode : 2
@@ -1571,6 +1572,7 @@ kernels:
EXPECT_TRUE(execEnv.hasMultiScratchSpaces);
EXPECT_TRUE(execEnv.hasNoStatelessWrite);
EXPECT_TRUE(execEnv.hasStackCalls);
EXPECT_TRUE(execEnv.hasRTCalls);
EXPECT_TRUE(execEnv.hasSample);
EXPECT_EQ(2, execEnv.hwPreemptionMode);
EXPECT_EQ(32, execEnv.inlineDataPayloadSize);
@@ -3073,6 +3075,7 @@ functions:
grf_count: 128
simd_size: 8
barrier_count: 1
has_rtcalls: true
)===";
uint8_t kernelIsa[8]{0U};
@@ -3097,6 +3100,7 @@ functions:
EXPECT_EQ(128U, funInfo.numGrfRequired);
EXPECT_EQ(8U, funInfo.simdSize);
EXPECT_EQ(1U, funInfo.barrierCount);
EXPECT_EQ(true, funInfo.hasRTCalls);
}
TEST(DecodeSingleDeviceBinaryZebin, GivenValidZeInfoAndInvalidExternalFunctionsMetadataThenFail) {
@@ -3230,6 +3234,7 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenMinimalExecutionEnvThenPopulateKernelDe
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress, Defaults::subgroupIndependentForwardProgress);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, Defaults::hasGlobalAtomics);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.useStackCalls, Defaults::hasStackCalls);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.hasRTCalls, Defaults::hasRTCalls);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages, Defaults::hasFenceForImageAccess);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, Defaults::hasDpas);
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.hasSample, Defaults::hasSample);