mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 13:33:02 +08:00
Add RT calls support to Kernel descriptor
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
604329479f
commit
9959896957
@@ -15,15 +15,11 @@ set(NEO_CORE_KERNEL
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_device_side_enqueue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_descriptor_extended_vme.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_arg_metadata.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_descriptor_from_patchtokens.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_execution_type.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_properties.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/read_extended_info.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}read_extended_info.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}kernel_descriptor_from_patchtokens_extended.cpp
|
||||
)
|
||||
|
||||
set_property(GLOBAL PROPERTY NEO_CORE_KERNEL ${NEO_CORE_KERNEL})
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
|
||||
namespace NEO {
|
||||
bool KernelDescriptor::hasRTCalls() const {
|
||||
return false;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "shared/source/utilities/arrayref.h"
|
||||
#include "shared/source/utilities/stackvec.h"
|
||||
|
||||
#include <array>
|
||||
#include <cinttypes>
|
||||
#include <cstddef>
|
||||
#include <limits>
|
||||
@@ -28,10 +29,6 @@ namespace NEO {
|
||||
using StringMap = std::unordered_map<uint32_t, std::string>;
|
||||
using InstructionsSegmentOffset = uint16_t;
|
||||
|
||||
struct ExtendedInfoBase {
|
||||
virtual ~ExtendedInfoBase() = default;
|
||||
};
|
||||
|
||||
struct KernelDescriptor {
|
||||
enum AddressingMode : uint8_t {
|
||||
AddrNone,
|
||||
@@ -44,7 +41,6 @@ struct KernelDescriptor {
|
||||
|
||||
KernelDescriptor() = default;
|
||||
virtual ~KernelDescriptor() = default;
|
||||
virtual bool hasRTCalls() const;
|
||||
|
||||
void updateCrossThreadDataSize() {
|
||||
uint32_t crossThreadDataSize = 0;
|
||||
@@ -144,12 +140,11 @@ struct KernelDescriptor {
|
||||
}
|
||||
|
||||
struct KernelAttributes {
|
||||
KernelAttributes() { flags.packed = 0U; }
|
||||
|
||||
uint32_t slmInlineSize = 0U;
|
||||
uint32_t perThreadScratchSize[2] = {0U, 0U};
|
||||
uint32_t perHwThreadPrivateMemorySize = 0U;
|
||||
uint32_t perThreadSystemThreadSurfaceSize = 0U;
|
||||
uint32_t numThreadsRequired = 0u;
|
||||
uint16_t requiredWorkgroupSize[3] = {0U, 0U, 0U};
|
||||
uint16_t crossThreadDataSize = 0U;
|
||||
uint16_t inlineDataPayloadSize = 0U;
|
||||
@@ -185,14 +180,16 @@ struct KernelDescriptor {
|
||||
|
||||
union {
|
||||
struct {
|
||||
// 0
|
||||
bool usesSpecialPipelineSelectMode : 1;
|
||||
bool usesStringMapForPrintf : 1;
|
||||
bool usesPrintf : 1;
|
||||
bool usesFencesForReadWriteImages : 1;
|
||||
bool usesFlattenedLocalIds;
|
||||
bool usesFlattenedLocalIds : 1;
|
||||
bool usesPrivateMemory : 1;
|
||||
bool usesVme : 1;
|
||||
bool usesImages : 1;
|
||||
// 1
|
||||
bool usesSamplers : 1;
|
||||
bool usesSyncBuffer : 1;
|
||||
bool useGlobalAtomics : 1;
|
||||
@@ -201,14 +198,17 @@ struct KernelDescriptor {
|
||||
bool perThreadDataHeaderIsPresent : 1;
|
||||
bool perThreadDataUnusedGrfIsPresent : 1;
|
||||
bool requiresDisabledEUFusion : 1;
|
||||
// 2
|
||||
bool requiresDisabledMidThreadPreemption : 1;
|
||||
bool requiresSubgroupIndependentForwardProgress : 1;
|
||||
bool requiresWorkgroupWalkOrder : 1;
|
||||
bool requiresImplicitArgs : 1;
|
||||
bool useStackCalls : 1;
|
||||
bool hasRTCalls : 1;
|
||||
bool reserved : 2;
|
||||
};
|
||||
uint32_t packed;
|
||||
} flags;
|
||||
std::array<bool, 3> packed;
|
||||
} flags = {};
|
||||
static_assert(sizeof(KernelAttributes::flags) == sizeof(KernelAttributes::flags.packed), "");
|
||||
|
||||
bool usesStringMap() const {
|
||||
@@ -297,7 +297,6 @@ struct KernelDescriptor {
|
||||
} external;
|
||||
|
||||
std::vector<uint8_t> generatedHeaps;
|
||||
std::unique_ptr<ExtendedInfoBase> extendedInfo;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
#include "shared/source/kernel/kernel_arg_descriptor_extended_device_side_enqueue.h"
|
||||
#include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/kernel/read_extended_info.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@@ -54,6 +53,7 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
|
||||
dst.kernelAttributes.numGrfRequired = execEnv.NumGRFRequired;
|
||||
dst.kernelAttributes.simdSize = execEnv.LargestCompiledSIMDSize;
|
||||
dst.kernelAttributes.barrierCount = execEnv.HasBarriers;
|
||||
dst.kernelAttributes.numThreadsRequired = execEnv.NumThreadsRequired;
|
||||
|
||||
dst.kernelAttributes.flags.requiresDisabledEUFusion = (0 != execEnv.RequireDisableEUFusion);
|
||||
dst.kernelAttributes.flags.requiresDisabledMidThreadPreemption = (0 != execEnv.DisableMidThreadPreemption);
|
||||
@@ -63,9 +63,9 @@ void populateKernelDescriptor(KernelDescriptor &dst, const SPatchExecutionEnviro
|
||||
dst.kernelAttributes.flags.usesSpecialPipelineSelectMode = (0 != execEnv.HasDPAS);
|
||||
dst.kernelAttributes.flags.usesStatelessWrites = (0 != execEnv.StatelessWritesCount);
|
||||
dst.kernelAttributes.flags.useStackCalls = (0 != execEnv.HasStackCalls);
|
||||
dst.kernelAttributes.flags.hasRTCalls = (0 != execEnv.HasRTCalls);
|
||||
|
||||
dst.kernelMetadata.compiledSubGroupsNumber = execEnv.CompiledSubGroupsNumber;
|
||||
readExtendedInfo(dst.extendedInfo, execEnv);
|
||||
}
|
||||
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const SPatchSamplerStateArray &token) {
|
||||
@@ -477,7 +477,24 @@ void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::Ker
|
||||
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessEventPoolSurface);
|
||||
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateStatelessDefaultDeviceQueueSurface);
|
||||
populateKernelDescriptorIfNotNull(dst, src.tokens.allocateSyncBuffer);
|
||||
populateKernelDescriptorRtDispatchGlobals(dst, src);
|
||||
|
||||
{
|
||||
uint32_t heapOffset = 0;
|
||||
uint32_t paramOffset = 0;
|
||||
uint32_t paramSize = 0;
|
||||
|
||||
if (src.tokens.allocateRTGlobalBuffer != nullptr) {
|
||||
auto allocateRTGlobalBuffer = static_cast<const struct iOpenCL::SPatchAllocateRTGlobalBuffer *>(src.tokens.allocateRTGlobalBuffer);
|
||||
heapOffset = allocateRTGlobalBuffer->SurfaceStateHeapOffset;
|
||||
paramOffset = allocateRTGlobalBuffer->DataParamOffset;
|
||||
paramSize = allocateRTGlobalBuffer->DataParamSize;
|
||||
}
|
||||
|
||||
populatePointerKernelArg(dst.payloadMappings.implicitArgs.rtDispatchGlobals,
|
||||
paramOffset, paramSize, heapOffset, heapOffset,
|
||||
dst.kernelAttributes.bufferAddressingMode);
|
||||
}
|
||||
|
||||
dst.payloadMappings.explicitArgs.resize(src.tokens.kernelArgs.size());
|
||||
dst.explicitArgsExtendedMetadata.resize(src.tokens.kernelArgs.size());
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -16,7 +16,6 @@ namespace PatchTokenBinary {
|
||||
struct KernelFromPatchtokens;
|
||||
}
|
||||
|
||||
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src);
|
||||
void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src, uint32_t gpuPointerSizeInBytes);
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/kernel/kernel_descriptor_from_patchtokens.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
void populateKernelDescriptorRtDispatchGlobals(KernelDescriptor &dst, const PatchTokenBinary::KernelFromPatchtokens &src) {}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -1,12 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/kernel/read_extended_info.h"
|
||||
|
||||
namespace NEO {
|
||||
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv) {}
|
||||
} // namespace NEO
|
||||
@@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "kernel_descriptor.h"
|
||||
#include "patch_shared.h"
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace NEO {
|
||||
void readExtendedInfo(std::unique_ptr<ExtendedInfoBase> &extendedInfo, const iOpenCL::SPatchExecutionEnvironment &execEnv);
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user