mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature implicit args: patch rt dispatch global array in implicit args buffer
handle has_rtcalls in kernels and functions in zebin Related-To: NEO-7818 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7e5e27f0b9
commit
dd39b822d3
@@ -954,27 +954,28 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
||||
if (this->usesRayTracing()) {
|
||||
uint32_t bvhLevels = NEO::RayTracingHelper::maxBvhLevels;
|
||||
auto arg = this->getImmutableData()->getDescriptor().payloadMappings.implicitArgs.rtDispatchGlobals;
|
||||
if (arg.pointerSize == 0) {
|
||||
// application is allocating its own RTDispatchGlobals manually
|
||||
neoDevice->initializeRayTracing(0);
|
||||
} else {
|
||||
neoDevice->initializeRayTracing(bvhLevels);
|
||||
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
|
||||
if (rtDispatchGlobalsInfo == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
neoDevice->initializeRayTracing(bvhLevels);
|
||||
|
||||
for (auto rtStack : rtDispatchGlobalsInfo->rtStacks) {
|
||||
this->residencyContainer.push_back(rtStack);
|
||||
}
|
||||
auto rtDispatchGlobalsInfo = neoDevice->getRTDispatchGlobals(bvhLevels);
|
||||
if (rtDispatchGlobalsInfo == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
|
||||
auto address = rtDispatchGlobalsInfo->rtDispatchGlobalsArray->getGpuAddressToPatch();
|
||||
for (auto rtStack : rtDispatchGlobalsInfo->rtStacks) {
|
||||
this->residencyContainer.push_back(rtStack);
|
||||
}
|
||||
|
||||
auto address = rtDispatchGlobalsInfo->rtDispatchGlobalsArray->getGpuAddressToPatch();
|
||||
if (NEO::isValidOffset(arg.stateless)) {
|
||||
NEO::patchPointer(ArrayRef<uint8_t>(crossThreadData.get(), crossThreadDataSize),
|
||||
arg,
|
||||
static_cast<uintptr_t>(address));
|
||||
|
||||
this->residencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
|
||||
}
|
||||
if (this->pImplicitArgs) {
|
||||
pImplicitArgs->rtGlobalBufferPtr = address;
|
||||
}
|
||||
|
||||
this->residencyContainer.push_back(rtDispatchGlobalsInfo->rtDispatchGlobalsArray);
|
||||
this->residencyContainer.push_back(neoDevice->getRTMemoryBackedBuffer());
|
||||
}
|
||||
this->midThreadPreemptionDisallowedForRayTracingKernels = productHelper.isMidThreadPreemptionDisallowedForRayTracingKernels();
|
||||
|
||||
@@ -1253,7 +1253,7 @@ ze_result_t ModuleImp::performDynamicLink(uint32_t numModules,
|
||||
nameToKernelDescriptor[kd.kernelMetadata.kernelName] = &kd;
|
||||
}
|
||||
}
|
||||
auto error = NEO::resolveBarrierCount(externalFunctionInfos, kernelDependencies, extFuncDependencies, nameToKernelDescriptor);
|
||||
auto error = NEO::resolveExternalDependencies(externalFunctionInfos, kernelDependencies, extFuncDependencies, nameToKernelDescriptor);
|
||||
if (error != NEO::RESOLVE_SUCCESS) {
|
||||
return ZE_RESULT_ERROR_MODULE_LINK_FAILURE;
|
||||
}
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "shared/test/common/device_binary_format/patchtokens_tests.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/engine_descriptor_helper.h"
|
||||
#include "shared/test/common/helpers/gtest_helpers.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
@@ -924,11 +925,18 @@ TEST_F(KernelImmutableDataTests, givenModuleWithPrivateMemoryBiggerThanGlobalMem
|
||||
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized) {
|
||||
static_cast<OsAgnosticMemoryManager *>(device->getNEODevice()->getMemoryManager())->turnOnFakingBigAllocations();
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitializedAndPatchedInImplicitArgsBuffer) {
|
||||
auto &hwInfo = *neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
hwInfo.gtSystemInfo.IsDynamicallyPopulated = false;
|
||||
hwInfo.gtSystemInfo.SliceCount = 1;
|
||||
hwInfo.gtSystemInfo.MaxSlicesSupported = 1;
|
||||
hwInfo.gtSystemInfo.SubSliceCount = 1;
|
||||
hwInfo.gtSystemInfo.MaxSubSlicesSupported = 1;
|
||||
hwInfo.gtSystemInfo.DualSubSliceCount = 1;
|
||||
hwInfo.gtSystemInfo.MaxDualSubSlicesSupported = 1;
|
||||
KernelDescriptor mockDescriptor = {};
|
||||
mockDescriptor.kernelAttributes.flags.hasRTCalls = true;
|
||||
mockDescriptor.kernelAttributes.flags.requiresImplicitArgs = true;
|
||||
mockDescriptor.kernelMetadata.kernelName = "rt_test";
|
||||
for (auto i = 0u; i < 3u; i++) {
|
||||
mockDescriptor.kernelAttributes.requiredWorkgroupSize[i] = 0;
|
||||
@@ -963,6 +971,9 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueThenRayTracingIsInitialized
|
||||
|
||||
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
|
||||
EXPECT_NE(nullptr, rtDispatchGlobals);
|
||||
auto implicitArgs = kernel->getImplicitArgs();
|
||||
ASSERT_NE(nullptr, implicitArgs);
|
||||
EXPECT_EQ_VAL(implicitArgs->rtGlobalBufferPtr, rtDispatchGlobals->rtDispatchGlobalsArray->getGpuAddressToPatch());
|
||||
}
|
||||
|
||||
TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZeroThenRayTracingIsInitialized) {
|
||||
@@ -1002,9 +1013,8 @@ TEST_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndPatchTokenPointerSizeIsZ
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_NE(nullptr, module->getDevice()->getNEODevice()->getRTMemoryBackedBuffer());
|
||||
|
||||
// Application is expected to allocate its own RTDispatchGlobals manually in this case.
|
||||
auto rtDispatchGlobals = neoDevice->getRTDispatchGlobals(NEO::RayTracingHelper::maxBvhLevels);
|
||||
EXPECT_EQ(nullptr, rtDispatchGlobals);
|
||||
EXPECT_NE(nullptr, rtDispatchGlobals);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelImmutableDataTests, whenHasRTCallsIsTrueAndNoRTDispatchGlobalsIsAllocatedThenRayTracingIsNotInitialized, IsAtLeastXeHpgCore) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,8 +12,8 @@
|
||||
#include <algorithm>
|
||||
namespace NEO {
|
||||
|
||||
uint32_t resolveBarrierCount(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
|
||||
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor) {
|
||||
uint32_t resolveExternalDependencies(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
|
||||
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor) {
|
||||
FuncNameToIdMapT funcNameToId;
|
||||
for (size_t i = 0U; i < externalFunctionInfos.size(); i++) {
|
||||
auto &extFuncInfo = externalFunctionInfos[i];
|
||||
@@ -64,6 +64,7 @@ uint32_t resolveExtFuncDependencies(const ExternalFunctionInfosT &externalFuncti
|
||||
for (auto callerId : calledBy[calleeId]) {
|
||||
auto caller = externalFunctionInfos[callerId];
|
||||
caller->barrierCount = std::max(caller->barrierCount, callee->barrierCount);
|
||||
caller->hasRTCalls |= callee->hasRTCalls;
|
||||
}
|
||||
}
|
||||
return RESOLVE_SUCCESS;
|
||||
@@ -76,9 +77,10 @@ uint32_t resolveKernelDependencies(const ExternalFunctionInfosT &externalFunctio
|
||||
} else if (nameToKernelDescriptor.count(kernelDep->kernelName) == 0) {
|
||||
return ERROR_KERNEL_DESCRIPTOR_MISSING;
|
||||
}
|
||||
const auto functionBarrierCount = externalFunctionInfos.at(funcNameToId.at(kernelDep->usedFuncName))->barrierCount;
|
||||
auto &kernelBarrierCount = nameToKernelDescriptor.at(kernelDep->kernelName)->kernelAttributes.barrierCount;
|
||||
kernelBarrierCount = std::max(kernelBarrierCount, functionBarrierCount);
|
||||
auto &kernelAttributes = nameToKernelDescriptor.at(kernelDep->kernelName)->kernelAttributes;
|
||||
const auto &externalFunctionInfo = *externalFunctionInfos.at(funcNameToId.at(kernelDep->usedFuncName));
|
||||
kernelAttributes.barrierCount = std::max(externalFunctionInfo.barrierCount, kernelAttributes.barrierCount);
|
||||
kernelAttributes.flags.hasRTCalls |= externalFunctionInfo.hasRTCalls;
|
||||
}
|
||||
return RESOLVE_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -27,6 +27,7 @@ struct ExternalFunctionInfo {
|
||||
uint8_t barrierCount = 0U;
|
||||
uint16_t numGrfRequired = 0U;
|
||||
uint8_t simdSize = 0U;
|
||||
bool hasRTCalls = false;
|
||||
};
|
||||
|
||||
struct ExternalFunctionUsageKernel {
|
||||
@@ -59,8 +60,8 @@ class DependencyResolver {
|
||||
const std::vector<std::vector<size_t>> &graph;
|
||||
};
|
||||
|
||||
uint32_t resolveBarrierCount(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
|
||||
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor);
|
||||
uint32_t resolveExternalDependencies(const ExternalFunctionInfosT &externalFunctionInfos, const KernelDependenciesT &kernelDependencies,
|
||||
const FunctionDependenciesT &funcDependencies, const KernelDescriptorMapT &nameToKernelDescriptor);
|
||||
|
||||
uint32_t getExtFuncDependencies(const FuncNameToIdMapT &funcNameToId, const FunctionDependenciesT &funcDependencies, size_t numExternalFuncs,
|
||||
DependenciesT &outDependencies, CalledByT &outCalledBy);
|
||||
|
||||
@@ -633,7 +633,7 @@ bool Linker::resolveExternalFunctions(const KernelDescriptorsT &kernelDescriptor
|
||||
nameToKernelDescriptor[kd->kernelMetadata.kernelName] = kd;
|
||||
}
|
||||
|
||||
auto error = NEO::resolveBarrierCount(externalFunctionsPtrs, kernelDependenciesPtrs, functionDependenciesPtrs, nameToKernelDescriptor);
|
||||
auto error = NEO::resolveExternalDependencies(externalFunctionsPtrs, kernelDependenciesPtrs, functionDependenciesPtrs, nameToKernelDescriptor);
|
||||
return (error == RESOLVE_SUCCESS) ? true : false;
|
||||
}
|
||||
|
||||
|
||||
@@ -45,6 +45,7 @@ inline constexpr ConstStringRef hasGlobalAtomics("has_global_atomics");
|
||||
inline constexpr ConstStringRef hasMultiScratchSpaces("has_multi_scratch_spaces");
|
||||
inline constexpr ConstStringRef hasNoStatelessWrite("has_no_stateless_write");
|
||||
inline constexpr ConstStringRef hasStackCalls("has_stack_calls");
|
||||
inline constexpr ConstStringRef hasRTCalls("has_rtcalls");
|
||||
inline constexpr ConstStringRef hwPreemptionMode("hw_preemption_mode");
|
||||
inline constexpr ConstStringRef inlineDataPayloadSize("inline_data_payload_size");
|
||||
inline constexpr ConstStringRef offsetToSkipPerThreadDataLoad("offset_to_skip_per_thread_data_load");
|
||||
@@ -321,6 +322,7 @@ using HasNonKernelArgLoadT = int32_t;
|
||||
using HasNonKernelArgStoreT = int32_t;
|
||||
using HasNoStatelessWriteT = bool;
|
||||
using HasStackCallsT = bool;
|
||||
using HasRTCallsT = bool;
|
||||
using HwPreemptionModeT = int32_t;
|
||||
using InlineDataPayloadSizeT = int32_t;
|
||||
using OffsetToSkipPerThreadDataLoadT = int32_t;
|
||||
@@ -350,6 +352,7 @@ inline constexpr HasNonKernelArgLoadT hasNonKernelArgLoad = false;
|
||||
inline constexpr HasNonKernelArgStoreT hasNonKernelArgStore = false;
|
||||
inline constexpr HasNoStatelessWriteT hasNoStatelessWrite = false;
|
||||
inline constexpr HasStackCallsT hasStackCalls = false;
|
||||
inline constexpr HasRTCallsT hasRTCalls = false;
|
||||
inline constexpr HwPreemptionModeT hwPreemptionMode = -1;
|
||||
inline constexpr InlineDataPayloadSizeT inlineDataPayloadSize = 0;
|
||||
inline constexpr OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad = 0;
|
||||
@@ -381,6 +384,7 @@ struct ExecutionEnvBaseT {
|
||||
HasMultiScratchSpacesT hasMultiScratchSpaces = Defaults::hasMultiScratchSpaces;
|
||||
HasNoStatelessWriteT hasNoStatelessWrite = Defaults::hasNoStatelessWrite;
|
||||
HasStackCallsT hasStackCalls = Defaults::hasStackCalls;
|
||||
HasRTCallsT hasRTCalls = Defaults::hasRTCalls;
|
||||
HwPreemptionModeT hwPreemptionMode = Defaults::hwPreemptionMode;
|
||||
InlineDataPayloadSizeT inlineDataPayloadSize = Defaults::inlineDataPayloadSize;
|
||||
OffsetToSkipPerThreadDataLoadT offsetToSkipPerThreadDataLoad = Defaults::offsetToSkipPerThreadDataLoad;
|
||||
|
||||
@@ -270,11 +270,12 @@ DecodeError populateExternalFunctionsMetadata(NEO::ProgramInfo &dst, NEO::Yaml::
|
||||
}
|
||||
|
||||
if (isValid) {
|
||||
NEO::ExternalFunctionInfo extFunInfo;
|
||||
NEO::ExternalFunctionInfo extFunInfo{};
|
||||
extFunInfo.functionName = functionName.str();
|
||||
extFunInfo.barrierCount = static_cast<uint8_t>(execEnv.barrierCount);
|
||||
extFunInfo.numGrfRequired = static_cast<uint16_t>(execEnv.grfCount);
|
||||
extFunInfo.simdSize = static_cast<uint8_t>(execEnv.simdSize);
|
||||
extFunInfo.hasRTCalls = execEnv.hasRTCalls;
|
||||
dst.externalFunctions.push_back(extFunInfo);
|
||||
return DecodeError::Success;
|
||||
} else {
|
||||
@@ -610,6 +611,8 @@ DecodeError readZeInfoExecutionEnvironment(const Yaml::YamlParser &parser, const
|
||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasNoStatelessWrite, context, outErrReason);
|
||||
} else if (Tags::Kernel::ExecutionEnv::hasStackCalls == key) {
|
||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasStackCalls, context, outErrReason);
|
||||
} else if (Tags::Kernel::ExecutionEnv::hasRTCalls == key) {
|
||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hasRTCalls, context, outErrReason);
|
||||
} else if (Tags::Kernel::ExecutionEnv::hwPreemptionMode == key) {
|
||||
validExecEnv &= readZeInfoValueChecked(parser, execEnvMetadataNd, outExecEnv.hwPreemptionMode, context, outErrReason);
|
||||
} else if (Tags::Kernel::ExecutionEnv::inlineDataPayloadSize == key) {
|
||||
@@ -664,6 +667,7 @@ void populateKernelExecutionEnvironment(KernelDescriptor &dst, const KernelExecu
|
||||
dst.kernelAttributes.flags.requiresDisabledEUFusion = execEnv.requireDisableEUFusion;
|
||||
dst.kernelAttributes.flags.useGlobalAtomics = execEnv.hasGlobalAtomics;
|
||||
dst.kernelAttributes.flags.useStackCalls = execEnv.hasStackCalls;
|
||||
dst.kernelAttributes.flags.hasRTCalls = execEnv.hasRTCalls;
|
||||
dst.kernelAttributes.flags.usesFencesForReadWriteImages = execEnv.hasFenceForImageAccess;
|
||||
dst.kernelAttributes.flags.usesSystolicPipelineSelectMode = execEnv.hasDpas;
|
||||
dst.kernelAttributes.flags.usesStatelessWrites = (false == execEnv.hasNoStatelessWrite);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -36,9 +36,13 @@ struct ImplicitArgs {
|
||||
uint32_t groupCountX;
|
||||
uint32_t groupCountY;
|
||||
uint32_t groupCountZ;
|
||||
uint32_t reserved;
|
||||
uint32_t padding0;
|
||||
uint64_t rtGlobalBufferPtr;
|
||||
uint8_t reserved[24];
|
||||
};
|
||||
|
||||
static_assert((sizeof(ImplicitArgs) & 31) == 0, "Implicit args size need to be aligned to 32");
|
||||
static_assert(sizeof(ImplicitArgs) == 128);
|
||||
static_assert(std::is_pod<ImplicitArgs>::value);
|
||||
|
||||
inline constexpr const char *implicitArgsRelocationSymbolName = "__INTEL_PATCH_CROSS_THREAD_OFFSET_OFF_R0";
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -79,9 +79,9 @@ struct ExternalFunctionsTests : public ::testing::Test {
|
||||
void SetUp() override {}
|
||||
void TearDown() override {}
|
||||
|
||||
void addExternalFunction(const std::string &functionName, uint8_t barrierCount) {
|
||||
void addExternalFunction(const std::string &functionName, uint8_t barrierCount, bool hasRTCalls) {
|
||||
funcNameToId[functionName] = extFuncInfoStorage.size();
|
||||
extFuncInfoStorage.push_back(ExternalFunctionInfo{functionName, barrierCount, 128U, 8U});
|
||||
extFuncInfoStorage.push_back(ExternalFunctionInfo{functionName, barrierCount, 128U, 8U, hasRTCalls});
|
||||
}
|
||||
void addKernel(const std::string &kernelName) {
|
||||
kernelDescriptorStorage.push_back(std::make_unique<KernelDescriptor>());
|
||||
@@ -137,14 +137,14 @@ TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInLookupMapWhenResolvingExtFun
|
||||
clear();
|
||||
|
||||
addFuncDependency("fun1", "fun0");
|
||||
addExternalFunction("fun1", 0);
|
||||
addExternalFunction("fun1", 0, false);
|
||||
set();
|
||||
error = resolveExtFuncDependencies(extFuncInfo, funcNameToId, functionDependencies);
|
||||
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
|
||||
clear();
|
||||
|
||||
addFuncDependency("fun1", "fun0");
|
||||
addExternalFunction("fun0", 0);
|
||||
addExternalFunction("fun0", 0, false);
|
||||
set();
|
||||
error = resolveExtFuncDependencies(extFuncInfo, funcNameToId, functionDependencies);
|
||||
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
|
||||
@@ -159,7 +159,7 @@ TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInLookupMapWhenResolvingKernel
|
||||
}
|
||||
|
||||
TEST_F(ExternalFunctionsTests, GivenMissingKernelInLookupMapWhenResolvingKernelDependenciesThenReturnError) {
|
||||
addExternalFunction("fun0", 0);
|
||||
addExternalFunction("fun0", 0, false);
|
||||
addKernelDependency("fun0", "kernel");
|
||||
set();
|
||||
auto error = resolveKernelDependencies(extFuncInfo, funcNameToId, kernelDependencies, nameToKernelDescriptor);
|
||||
@@ -168,14 +168,14 @@ TEST_F(ExternalFunctionsTests, GivenMissingKernelInLookupMapWhenResolvingKernelD
|
||||
|
||||
TEST_F(ExternalFunctionsTests, GivenNoDependenciesWhenResolvingBarrierCountThenReturnSuccess) {
|
||||
set();
|
||||
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
EXPECT_EQ(RESOLVE_SUCCESS, error);
|
||||
}
|
||||
|
||||
TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInExtFuncDependenciesWhenResolvingBarrierCountThenReturnError) {
|
||||
addFuncDependency("fun0", "fun1");
|
||||
set();
|
||||
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
|
||||
}
|
||||
|
||||
@@ -183,13 +183,13 @@ TEST_F(ExternalFunctionsTests, GivenMissingExtFuncInKernelDependenciesWhenResolv
|
||||
addKernelDependency("fun0", "kernel");
|
||||
addKernel("kernel");
|
||||
set();
|
||||
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
EXPECT_EQ(ERROR_EXTERNAL_FUNCTION_INFO_MISSING, error);
|
||||
}
|
||||
|
||||
TEST_F(ExternalFunctionsTests, GivenLoopWhenResolvingExtFuncDependenciesThenReturnSuccess) {
|
||||
addExternalFunction("fun0", 4);
|
||||
addExternalFunction("fun1", 2);
|
||||
addExternalFunction("fun0", 4, false);
|
||||
addExternalFunction("fun1", 2, false);
|
||||
addFuncDependency("fun0", "fun1");
|
||||
addFuncDependency("fun1", "fun0");
|
||||
set();
|
||||
@@ -199,16 +199,41 @@ TEST_F(ExternalFunctionsTests, GivenLoopWhenResolvingExtFuncDependenciesThenRetu
|
||||
EXPECT_EQ(4U, extFuncInfo[funcNameToId["fun1"]]->barrierCount);
|
||||
}
|
||||
|
||||
TEST_F(ExternalFunctionsTests, GivenValidFunctionAndKernelDependenciesWhenResolvingBarrierCountThenSetAppropriateBarrierCountAndReturnSuccess) {
|
||||
TEST_F(ExternalFunctionsTests, GivenValidFunctionAndKernelDependenciesWhenResolvingDependenciesThenSetAppropriateBarrierCountAndReturnSuccess) {
|
||||
addKernel("kernel");
|
||||
addExternalFunction("fun0", 1U);
|
||||
addExternalFunction("fun1", 2U);
|
||||
addExternalFunction("fun0", 1U, false);
|
||||
addExternalFunction("fun1", 2U, false);
|
||||
addFuncDependency("fun1", "fun0");
|
||||
addKernelDependency("fun0", "kernel");
|
||||
set();
|
||||
auto error = resolveBarrierCount(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
EXPECT_EQ(RESOLVE_SUCCESS, error);
|
||||
EXPECT_EQ(2U, extFuncInfo[funcNameToId["fun0"]]->barrierCount);
|
||||
EXPECT_EQ(2U, extFuncInfo[funcNameToId["fun1"]]->barrierCount);
|
||||
EXPECT_EQ(2U, nameToKernelDescriptor["kernel"]->kernelAttributes.barrierCount);
|
||||
}
|
||||
|
||||
TEST_F(ExternalFunctionsTests, GivenValidFunctionAndKernelDependenciesWhenResolvingDependenciesThenSetAppropriateHasRTCallsAndReturnSuccess) {
|
||||
addKernel("kernel0");
|
||||
addKernel("kernel1");
|
||||
addKernel("kernel2");
|
||||
addExternalFunction("fun0", 0u, false);
|
||||
addExternalFunction("fun1", 0u, true);
|
||||
addExternalFunction("fun2", 0u, false);
|
||||
|
||||
addFuncDependency("fun1", "fun0");
|
||||
addKernelDependency("fun0", "kernel0");
|
||||
addKernelDependency("fun2", "kernel1");
|
||||
addKernelDependency("fun2", "kernel2");
|
||||
set();
|
||||
|
||||
nameToKernelDescriptor["kernel2"]->kernelAttributes.flags.hasRTCalls = true;
|
||||
auto error = resolveExternalDependencies(extFuncInfo, kernelDependencies, functionDependencies, nameToKernelDescriptor);
|
||||
EXPECT_EQ(RESOLVE_SUCCESS, error);
|
||||
EXPECT_TRUE(extFuncInfo[funcNameToId["fun0"]]->hasRTCalls);
|
||||
EXPECT_TRUE(extFuncInfo[funcNameToId["fun1"]]->hasRTCalls);
|
||||
EXPECT_FALSE(extFuncInfo[funcNameToId["fun2"]]->hasRTCalls);
|
||||
EXPECT_TRUE(nameToKernelDescriptor["kernel0"]->kernelAttributes.flags.hasRTCalls);
|
||||
EXPECT_FALSE(nameToKernelDescriptor["kernel1"]->kernelAttributes.flags.hasRTCalls);
|
||||
EXPECT_TRUE(nameToKernelDescriptor["kernel2"]->kernelAttributes.flags.hasRTCalls);
|
||||
}
|
||||
|
||||
@@ -1523,6 +1523,7 @@ kernels:
|
||||
has_multi_scratch_spaces : true
|
||||
has_no_stateless_write : true
|
||||
has_stack_calls : true
|
||||
has_rtcalls : true
|
||||
require_disable_eufusion : true
|
||||
has_sample : true
|
||||
hw_preemption_mode : 2
|
||||
@@ -1571,6 +1572,7 @@ kernels:
|
||||
EXPECT_TRUE(execEnv.hasMultiScratchSpaces);
|
||||
EXPECT_TRUE(execEnv.hasNoStatelessWrite);
|
||||
EXPECT_TRUE(execEnv.hasStackCalls);
|
||||
EXPECT_TRUE(execEnv.hasRTCalls);
|
||||
EXPECT_TRUE(execEnv.hasSample);
|
||||
EXPECT_EQ(2, execEnv.hwPreemptionMode);
|
||||
EXPECT_EQ(32, execEnv.inlineDataPayloadSize);
|
||||
@@ -3073,6 +3075,7 @@ functions:
|
||||
grf_count: 128
|
||||
simd_size: 8
|
||||
barrier_count: 1
|
||||
has_rtcalls: true
|
||||
)===";
|
||||
|
||||
uint8_t kernelIsa[8]{0U};
|
||||
@@ -3097,6 +3100,7 @@ functions:
|
||||
EXPECT_EQ(128U, funInfo.numGrfRequired);
|
||||
EXPECT_EQ(8U, funInfo.simdSize);
|
||||
EXPECT_EQ(1U, funInfo.barrierCount);
|
||||
EXPECT_EQ(true, funInfo.hasRTCalls);
|
||||
}
|
||||
|
||||
TEST(DecodeSingleDeviceBinaryZebin, GivenValidZeInfoAndInvalidExternalFunctionsMetadataThenFail) {
|
||||
@@ -3230,6 +3234,7 @@ TEST_F(decodeZeInfoKernelEntryTest, GivenMinimalExecutionEnvThenPopulateKernelDe
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.requiresSubgroupIndependentForwardProgress, Defaults::subgroupIndependentForwardProgress);
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, Defaults::hasGlobalAtomics);
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.useStackCalls, Defaults::hasStackCalls);
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.hasRTCalls, Defaults::hasRTCalls);
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages, Defaults::hasFenceForImageAccess);
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.usesSystolicPipelineSelectMode, Defaults::hasDpas);
|
||||
EXPECT_EQ(kernelDescriptor.kernelAttributes.flags.hasSample, Defaults::hasSample);
|
||||
|
||||
Reference in New Issue
Block a user