Add RT calls support to Kernel descriptor

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2022-04-22 12:47:26 +00:00
committed by Compute-Runtime-Automation
parent 604329479f
commit 9959896957
15 changed files with 92 additions and 100 deletions

View File

@@ -15,15 +15,11 @@ set(NEO_CORE_KERNEL
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h
${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}read_extended_info.cpp
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor_from_patchtokens_extended.cpp
)
set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL})

View File

@@ -1,14 +0,0 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/kernel_descriptor.h"
namespace NEO {
bool KernelDescriptor::hasRTCalls() const {
return false;
}
} // namespace NEO

View File

@@ -16,6 +16,7 @@
#include "shared/source/utilities/arrayref.h"
#include "shared/source/utilities/stackvec.h"
#include <array>
#include <cinttypes>
#include <cstddef>
#include <limits>
@@ -28,10 +29,6 @@ namespace NEO {
using StringMap = std::unordered_map<uint32_t, std::string>;
using InstructionsSegmentOffset = uint16_t;
struct ExtendedInfoBase {
virtual ~ExtendedInfoBase() = default;
};
struct KernelDescriptor {
enum AddressingMode : uint8_t {
AddrNone,
@@ -44,7 +41,6 @@ struct KernelDescriptor {
KernelDescriptor() = default;
virtual ~KernelDescriptor() = default;
virtual bool hasRTCalls() const;
void updateCrossThreadDataSize() {
uint32_t crossThreadDataSize = 0;
@@ -144,12 +140,11 @@ struct KernelDescriptor {
}
struct KernelAttributes {
KernelAttributes() { flags.packed = 0U; }
uint32_t slmInlineSize = 0U;
uint32_t perThreadScratchSize[2] = {0U, 0U};
uint32_t perHwThreadPrivateMemorySize = 0U;
uint32_t perThreadSystemThreadSurfaceSize = 0U;
uint32_t numThreadsRequired = 0u;
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
uint16_t crossThreadDataSize = 0U;
uint16_t inlineDataPayloadSize = 0U;
@@ -185,14 +180,16 @@ struct KernelDescriptor {
union {
struct {
// 0
bool usesSpecialPipelineSelectMode : 1;
bool usesStringMapForPrintf : 1;
bool usesPrintf : 1;
bool usesFencesForReadWriteImages : 1;
bool usesFlattenedLocalIds;
bool usesFlattenedLocalIds : 1;
bool usesPrivateMemory : 1;
bool usesVme : 1;
bool usesImages : 1;
// 1
bool usesSamplers : 1;
bool usesSyncBuffer : 1;
bool useGlobalAtomics : 1;
@@ -201,14 +198,17 @@ struct KernelDescriptor {
bool perThreadDataHeaderIsPresent : 1;
bool perThreadDataUnusedGrfIsPresent : 1;
bool requiresDisabledEUFusion : 1;
// 2
bool requiresDisabledMidThreadPreemption : 1;
bool requiresSubgroupIndependentForwardProgress : 1;
bool requiresWorkgroupWalkOrder : 1;
bool requiresImplicitArgs : 1;
bool useStackCalls : 1;
bool hasRTCalls : 1;
bool reserved : 2;
};
uint32_t packed;
} flags;
std::array<bool, 3> packed;
} flags = {};
static_assert(sizeof(KernelAttributes::flags) == sizeof(KernelAttributes::flags.packed), "");
bool usesStringMap() const {
@@ -297,7 +297,6 @@ struct KernelDescriptor {
} external;
std::vector<uint8_t> generatedHeaps;
std::unique_ptr<ExtendedInfoBase> extendedInfo;
};
} // namespace NEO

View File

@@ -11,7 +11,6 @@
#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h"
#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/kernel/read_extended_info.h"
#include <sstream>
#include <string>
@@ -54,6 +53,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
dst.kernelAttributes.barrierCount = execEnv.HasBarriers;
dst.kernelAttributes.numThreadsRequired = execEnv.NumThreadsRequired;
dst.kernelAttributes.flags.requiresDisabledEUFusion = (0 != execEnv.RequireDisableEUFusion);
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
@@ -63,9 +63,9 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
dst.kernelAttributes.flags.usesSpecialPipelineSelectMode = (0 != execEnv.HasDPAS);
dst.kernelAttributes.flags.usesStatelessWrites = (0 != execEnv.StatelessWritesCount);
dst.kernelAttributes.flags.useStackCalls = (0 != execEnv.HasStackCalls);
dst.kernelAttributes.flags.hasRTCalls = (0 != execEnv.HasRTCalls);
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
readExtendedInfo(dst.extendedInfo, execEnv);
}
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {
@@ -477,7 +477,24 @@ void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::Ker
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface);
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface);
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateSyncBuffer);
populateKernelDescriptorRtDispatchGlobals(dst, src);
{
uint32_t heapOffset = 0;
uint32_t paramOffset = 0;
uint32_t paramSize = 0;
if (src.tokens.allocateRTGlobalBuffer != nullptr) {
auto allocateRTGlobalBuffer = static_cast<const struct iOpenCL::SPatchAllocateRTGlobalBuffer *>(src.tokens.allocateRTGlobalBuffer);
heapOffset = allocateRTGlobalBuffer->SurfaceStateHeapOffset;
paramOffset = allocateRTGlobalBuffer->DataParamOffset;
paramSize = allocateRTGlobalBuffer->DataParamSize;
}
populatePointerKernelArg(dst.payloadMappings.implicitArgs.rtDispatchGlobals,
paramOffset, paramSize, heapOffset, heapOffset,
dst.kernelAttributes.bufferAddressingMode);
}
dst.payloadMappings.explicitArgs.resize(src.tokens.kernelArgs.size());
dst.explicitArgsExtendedMetadata.resize(src.tokens.kernelArgs.size());

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,7 +16,6 @@ namespace PatchTokenBinary {
struct KernelFromPatchtokens;
}
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src);
void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes);
} // namespace NEO

View File

@@ -1,14 +0,0 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
namespace NEO {
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src) {}
} // namespace NEO

View File

@@ -1,12 +0,0 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/kernel/read_extended_info.h"
namespace NEO {
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv) {}
} // namespace NEO

View File

@@ -1,15 +0,0 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "kernel_descriptor.h"
#include "patch_shared.h"
#include <memory>
namespace NEO {
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv);
} // namespace NEO