mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Move scheduler from builtins to context
Change-Id: I465dd0710d1079d0f30c5e3ff6e3972447f48a02 Signed-off-by: Jablonski, Mateusz <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
a69f393356
commit
d60ecac1a6
@@ -581,12 +581,8 @@ std::string OfflineCompiler::parseBinAsCharArray(uint8_t *binary, size_t size, s
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
out << "namespace NEO {" << std::endl;
|
out << "namespace NEO {" << std::endl;
|
||||||
out << "static RegisterEmbeddedResource register" << builtinName << "Bin(" << std::endl;
|
out << "static RegisterEmbeddedResource register" << builtinName << "Bin(" << std::endl;
|
||||||
out << " createBuiltinResourceName(" << std::endl;
|
out << " \"" << familyNameWithType << "_0_" << fileName.c_str() << ".igdrcl_built_in.bin\"," << std::endl;
|
||||||
out << " EBuiltInOps::" << builtinName << "," << std::endl;
|
out << " (const char *)" << builtinName << "Binary_" << familyNameWithType << "," << std::endl;
|
||||||
out << " BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary), \"" << familyNameWithType << "\", 0)" << std::endl;
|
|
||||||
out << " .c_str()," << std::endl;
|
|
||||||
out << " (const char *)" << builtinName << "Binary"
|
|
||||||
<< "_" << familyNameWithType << "," << std::endl;
|
|
||||||
out << " " << builtinName << "BinarySize_" << familyNameWithType << ");" << std::endl;
|
out << " " << builtinName << "BinarySize_" << familyNameWithType << ");" << std::endl;
|
||||||
out << "}" << std::endl;
|
out << "}" << std::endl;
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2019 Intel Corporation
|
* Copyright (C) 2019-2020 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -32,8 +32,7 @@ constexpr Type FillImage3d{16};
|
|||||||
constexpr Type VmeBlockMotionEstimateIntel{17};
|
constexpr Type VmeBlockMotionEstimateIntel{17};
|
||||||
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{18};
|
constexpr Type VmeBlockAdvancedMotionEstimateCheckIntel{18};
|
||||||
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{19};
|
constexpr Type VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel{19};
|
||||||
constexpr Type Scheduler{20};
|
|
||||||
|
|
||||||
constexpr uint32_t MaxBaseValue{20};
|
constexpr uint32_t MaxBaseValue{19};
|
||||||
} // namespace EBuiltInOps
|
} // namespace EBuiltInOps
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -33,54 +33,7 @@ BuiltIns::BuiltIns() {
|
|||||||
builtinsLib.reset(new BuiltinsLib());
|
builtinsLib.reset(new BuiltinsLib());
|
||||||
}
|
}
|
||||||
|
|
||||||
BuiltIns::~BuiltIns() {
|
BuiltIns::~BuiltIns() = default;
|
||||||
delete static_cast<SchedulerKernel *>(schedulerBuiltIn.pKernel);
|
|
||||||
delete schedulerBuiltIn.pProgram;
|
|
||||||
schedulerBuiltIn.pKernel = nullptr;
|
|
||||||
schedulerBuiltIn.pProgram = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
SchedulerKernel &BuiltIns::getSchedulerKernel(Context &context) {
|
|
||||||
if (schedulerBuiltIn.pKernel) {
|
|
||||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn.pKernel);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto initializeSchedulerProgramAndKernel = [&] {
|
|
||||||
cl_int retVal = CL_SUCCESS;
|
|
||||||
|
|
||||||
auto src = context.getDevice(0)->getExecutionEnvironment()->getBuiltIns()->builtinsLib->getBuiltinCode(EBuiltInOps::Scheduler, BuiltinCode::ECodeType::Any, context.getDevice(0)->getDevice());
|
|
||||||
|
|
||||||
auto program = Program::createFromGenBinary(*context.getDevice(0)->getExecutionEnvironment(),
|
|
||||||
&context,
|
|
||||||
src.resource.data(),
|
|
||||||
src.resource.size(),
|
|
||||||
true,
|
|
||||||
&retVal);
|
|
||||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
|
||||||
DEBUG_BREAK_IF(!program);
|
|
||||||
|
|
||||||
retVal = program->processGenBinary();
|
|
||||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
|
||||||
|
|
||||||
schedulerBuiltIn.pProgram = program;
|
|
||||||
|
|
||||||
auto kernelInfo = schedulerBuiltIn.pProgram->getKernelInfo(SchedulerKernel::schedulerName);
|
|
||||||
DEBUG_BREAK_IF(!kernelInfo);
|
|
||||||
|
|
||||||
schedulerBuiltIn.pKernel = Kernel::create<SchedulerKernel>(
|
|
||||||
schedulerBuiltIn.pProgram,
|
|
||||||
*kernelInfo,
|
|
||||||
&retVal);
|
|
||||||
|
|
||||||
UNRECOVERABLE_IF(schedulerBuiltIn.pKernel->getScratchSize() != 0);
|
|
||||||
|
|
||||||
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
|
||||||
};
|
|
||||||
std::call_once(schedulerBuiltIn.programIsInitialized, initializeSchedulerProgramAndKernel);
|
|
||||||
|
|
||||||
UNRECOVERABLE_IF(schedulerBuiltIn.pKernel == nullptr);
|
|
||||||
return *static_cast<SchedulerKernel *>(schedulerBuiltIn.pKernel);
|
|
||||||
}
|
|
||||||
|
|
||||||
const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) {
|
const SipKernel &BuiltIns::getSipKernel(SipKernelType type, Device &device) {
|
||||||
uint32_t kernelId = static_cast<uint32_t>(type);
|
uint32_t kernelId = static_cast<uint32_t>(type);
|
||||||
|
|||||||
@@ -180,8 +180,6 @@ class BuiltIns {
|
|||||||
const char *kernelNames,
|
const char *kernelNames,
|
||||||
int &errcodeRet);
|
int &errcodeRet);
|
||||||
|
|
||||||
SchedulerKernel &getSchedulerKernel(Context &context);
|
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device);
|
MOCKABLE_VIRTUAL const SipKernel &getSipKernel(SipKernelType type, Device &device);
|
||||||
|
|
||||||
BuiltinsLib &getBuiltinsLib() {
|
BuiltinsLib &getBuiltinsLib() {
|
||||||
@@ -198,9 +196,6 @@ class BuiltIns {
|
|||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// scheduler kernel
|
|
||||||
BuiltInKernel schedulerBuiltIn;
|
|
||||||
|
|
||||||
// sip builtins
|
// sip builtins
|
||||||
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
|
std::pair<std::unique_ptr<SipKernel>, std::once_flag> sipKernels[static_cast<uint32_t>(SipKernelType::COUNT)];
|
||||||
|
|
||||||
|
|||||||
@@ -60,8 +60,6 @@ const char *getBuiltinAsString(EBuiltInOps::Type builtin) {
|
|||||||
return "vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in";
|
return "vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in";
|
||||||
case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel:
|
case EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel:
|
||||||
return "vme_block_advanced_motion_estimate_bidirectional_check_intel";
|
return "vme_block_advanced_motion_estimate_bidirectional_check_intel";
|
||||||
case EBuiltInOps::Scheduler:
|
|
||||||
return "scheduler.igdrcl_built_in";
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -100,8 +100,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
|||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void CommandQueueHw<GfxFamily>::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) {
|
void CommandQueueHw<GfxFamily>::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) {
|
||||||
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
|
||||||
DispatchInfo dispatchInfo(&scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
|
DispatchInfo dispatchInfo(&scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
|
||||||
|
|
||||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||||
@@ -546,8 +545,7 @@ void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *d
|
|||||||
hwTimeStamps,
|
hwTimeStamps,
|
||||||
isCcsUsed);
|
isCcsUsed);
|
||||||
|
|
||||||
BuiltIns &builtIns = *getDevice().getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(this->getContext());
|
|
||||||
|
|
||||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||||
devQueueHw->getStackBuffer(),
|
devQueueHw->getStackBuffer(),
|
||||||
|
|||||||
@@ -210,7 +210,7 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
|||||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
|
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(multiDispatchInfo.getMemObjsForAuxTranslation());
|
||||||
}
|
}
|
||||||
if (parentKernel) {
|
if (parentKernel) {
|
||||||
SchedulerKernel &scheduler = commandQueue.getDevice().getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(parentKernel->getContext());
|
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler);
|
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler);
|
||||||
}
|
}
|
||||||
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||||
|
|||||||
@@ -24,6 +24,7 @@
|
|||||||
#include "runtime/helpers/surface_formats.h"
|
#include "runtime/helpers/surface_formats.h"
|
||||||
#include "runtime/mem_obj/image.h"
|
#include "runtime/mem_obj/image.h"
|
||||||
#include "runtime/platform/platform.h"
|
#include "runtime/platform/platform.h"
|
||||||
|
#include "runtime/scheduler/scheduler_kernel.h"
|
||||||
#include "runtime/sharings/sharing.h"
|
#include "runtime/sharings/sharing.h"
|
||||||
#include "runtime/sharings/sharing_factory.h"
|
#include "runtime/sharings/sharing_factory.h"
|
||||||
|
|
||||||
@@ -46,6 +47,7 @@ Context::Context(
|
|||||||
defaultDeviceQueue = nullptr;
|
defaultDeviceQueue = nullptr;
|
||||||
driverDiagnostics = nullptr;
|
driverDiagnostics = nullptr;
|
||||||
sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
|
sharingFunctions.resize(SharingType::MAX_SHARING_VALUE);
|
||||||
|
schedulerBuiltIn = std::make_unique<BuiltInKernel>();
|
||||||
}
|
}
|
||||||
|
|
||||||
Context::~Context() {
|
Context::~Context() {
|
||||||
@@ -66,6 +68,10 @@ Context::~Context() {
|
|||||||
for (auto &device : devices) {
|
for (auto &device : devices) {
|
||||||
device->decRefInternal();
|
device->decRefInternal();
|
||||||
}
|
}
|
||||||
|
delete static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||||
|
delete schedulerBuiltIn->pProgram;
|
||||||
|
schedulerBuiltIn->pKernel = nullptr;
|
||||||
|
schedulerBuiltIn->pProgram = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
DeviceQueue *Context::getDefaultDeviceQueue() {
|
DeviceQueue *Context::getDefaultDeviceQueue() {
|
||||||
@@ -325,4 +331,46 @@ cl_int Context::getSupportedImageFormats(
|
|||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SchedulerKernel &Context::getSchedulerKernel() {
|
||||||
|
if (schedulerBuiltIn->pKernel) {
|
||||||
|
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto initializeSchedulerProgramAndKernel = [&] {
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
|
||||||
|
auto src = SchedulerKernel::loadSchedulerKernel(&getDevice(0)->getDevice());
|
||||||
|
|
||||||
|
auto program = Program::createFromGenBinary(*getDevice(0)->getExecutionEnvironment(),
|
||||||
|
this,
|
||||||
|
src.resource.data(),
|
||||||
|
src.resource.size(),
|
||||||
|
true,
|
||||||
|
&retVal);
|
||||||
|
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||||
|
DEBUG_BREAK_IF(!program);
|
||||||
|
|
||||||
|
retVal = program->processGenBinary();
|
||||||
|
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||||
|
|
||||||
|
schedulerBuiltIn->pProgram = program;
|
||||||
|
|
||||||
|
auto kernelInfo = schedulerBuiltIn->pProgram->getKernelInfo(SchedulerKernel::schedulerName);
|
||||||
|
DEBUG_BREAK_IF(!kernelInfo);
|
||||||
|
|
||||||
|
schedulerBuiltIn->pKernel = Kernel::create<SchedulerKernel>(
|
||||||
|
schedulerBuiltIn->pProgram,
|
||||||
|
*kernelInfo,
|
||||||
|
&retVal);
|
||||||
|
|
||||||
|
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel->getScratchSize() != 0);
|
||||||
|
|
||||||
|
DEBUG_BREAK_IF(retVal != CL_SUCCESS);
|
||||||
|
};
|
||||||
|
std::call_once(schedulerBuiltIn->programIsInitialized, initializeSchedulerProgramAndKernel);
|
||||||
|
|
||||||
|
UNRECOVERABLE_IF(schedulerBuiltIn->pKernel == nullptr);
|
||||||
|
return *static_cast<SchedulerKernel *>(schedulerBuiltIn->pKernel);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
|
struct BuiltInKernel;
|
||||||
class CommandStreamReceiver;
|
class CommandStreamReceiver;
|
||||||
class CommandQueue;
|
class CommandQueue;
|
||||||
class Device;
|
class Device;
|
||||||
@@ -24,6 +25,7 @@ class MemObj;
|
|||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
class SharingFunctions;
|
class SharingFunctions;
|
||||||
class SVMAllocsManager;
|
class SVMAllocsManager;
|
||||||
|
class SchedulerKernel;
|
||||||
|
|
||||||
enum class BlitOperationResult {
|
enum class BlitOperationResult {
|
||||||
Unsupported,
|
Unsupported,
|
||||||
@@ -135,6 +137,8 @@ class Context : public BaseObject<_cl_context> {
|
|||||||
|
|
||||||
MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const;
|
MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const;
|
||||||
|
|
||||||
|
SchedulerKernel &getSchedulerKernel();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Context(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *) = nullptr,
|
Context(void(CL_CALLBACK *pfnNotify)(const char *, const void *, size_t, void *) = nullptr,
|
||||||
void *userData = nullptr);
|
void *userData = nullptr);
|
||||||
@@ -149,6 +153,8 @@ class Context : public BaseObject<_cl_context> {
|
|||||||
void(CL_CALLBACK *contextCallback)(const char *, const void *, size_t, void *);
|
void(CL_CALLBACK *contextCallback)(const char *, const void *, size_t, void *);
|
||||||
void *userData;
|
void *userData;
|
||||||
|
|
||||||
|
std::unique_ptr<BuiltInKernel> schedulerBuiltIn;
|
||||||
|
|
||||||
ClDeviceVector devices;
|
ClDeviceVector devices;
|
||||||
MemoryManager *memoryManager;
|
MemoryManager *memoryManager;
|
||||||
SVMAllocsManager *svmAllocsManager = nullptr;
|
SVMAllocsManager *svmAllocsManager = nullptr;
|
||||||
|
|||||||
@@ -16,6 +16,7 @@
|
|||||||
#include "core/helpers/string.h"
|
#include "core/helpers/string.h"
|
||||||
#include "core/indirect_heap/indirect_heap.h"
|
#include "core/indirect_heap/indirect_heap.h"
|
||||||
#include "runtime/command_queue/local_id_gen.h"
|
#include "runtime/command_queue/local_id_gen.h"
|
||||||
|
#include "runtime/context/context.h"
|
||||||
#include "runtime/device/cl_device.h"
|
#include "runtime/device/cl_device.h"
|
||||||
#include "runtime/helpers/dispatch_info.h"
|
#include "runtime/helpers/dispatch_info.h"
|
||||||
#include "runtime/kernel/kernel.h"
|
#include "runtime/kernel/kernel.h"
|
||||||
@@ -152,8 +153,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kern
|
|||||||
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count);
|
maxBindingTableCount = std::max(maxBindingTableCount, pBlockInfo->patchInfo.bindingTableState->Count);
|
||||||
}
|
}
|
||||||
|
|
||||||
BuiltIns &builtIns = *kernel.getDevice().getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(kernel.getContext());
|
|
||||||
|
|
||||||
totalSize += getSizeRequiredSSH(scheduler);
|
totalSize += getSizeRequiredSSH(scheduler);
|
||||||
|
|
||||||
|
|||||||
@@ -166,8 +166,7 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
|
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
|
||||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
|
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
|
||||||
|
|
||||||
BuiltIns &builtIns = *this->kernel->getDevice().getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(commandQueue.getContext());
|
|
||||||
|
|
||||||
scheduler.setArgs(devQueue->getQueueBuffer(),
|
scheduler.setArgs(devQueue->getQueueBuffer(),
|
||||||
devQueue->getStackBuffer(),
|
devQueue->getStackBuffer(),
|
||||||
|
|||||||
@@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
#include "runtime/scheduler/scheduler_kernel.h"
|
#include "runtime/scheduler/scheduler_kernel.h"
|
||||||
|
|
||||||
|
#include "core/device/device.h"
|
||||||
#include "core/helpers/hw_helper.h"
|
#include "core/helpers/hw_helper.h"
|
||||||
#include "runtime/device/cl_device.h"
|
#include "runtime/device/cl_device.h"
|
||||||
|
|
||||||
@@ -62,4 +63,14 @@ void SchedulerKernel::computeGws() {
|
|||||||
DBG_LOG(PrintEMDebugInformation, "Scheduler GWS: ", gws);
|
DBG_LOG(PrintEMDebugInformation, "Scheduler GWS: ", gws);
|
||||||
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Scheduler GWS: %" PRIu64, static_cast<uint64_t>(gws));
|
printDebugString(DebugManager.flags.PrintDebugMessages.get(), stderr, "Scheduler GWS: %" PRIu64, static_cast<uint64_t>(gws));
|
||||||
}
|
}
|
||||||
|
BuiltinCode SchedulerKernel::loadSchedulerKernel(Device *device) {
|
||||||
|
std::string schedulerResourceName = getFamilyNameWithType(device->getHardwareInfo()) + "_0_scheduler.igdrcl_built_in.bin";
|
||||||
|
|
||||||
|
BuiltinCode ret;
|
||||||
|
auto storage = std::make_unique<EmbeddedStorage>("");
|
||||||
|
ret.resource = storage.get()->load(schedulerResourceName);
|
||||||
|
ret.type = BuiltinCode::ECodeType::Binary;
|
||||||
|
ret.targetDevice = device;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include "runtime/built_ins/built_ins.h"
|
||||||
#include "runtime/kernel/kernel.h"
|
#include "runtime/kernel/kernel.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
@@ -50,6 +51,7 @@ class SchedulerKernel : public Kernel {
|
|||||||
GraphicsAllocation *queueStorageBuffer,
|
GraphicsAllocation *queueStorageBuffer,
|
||||||
GraphicsAllocation *ssh,
|
GraphicsAllocation *ssh,
|
||||||
GraphicsAllocation *debugQueue = nullptr);
|
GraphicsAllocation *debugQueue = nullptr);
|
||||||
|
static BuiltinCode loadSchedulerKernel(Device *device);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : Kernel(programArg, kernelInfoArg, deviceArg, true), gws(0) {
|
SchedulerKernel(Program *programArg, const KernelInfo &kernelInfoArg, const ClDevice &deviceArg) : Kernel(programArg, kernelInfoArg, deviceArg, true), gws(0) {
|
||||||
|
|||||||
@@ -29,8 +29,7 @@ GEN11TEST_F(GEN11AUBParentKernelFixture, EnqueueParentKernel) {
|
|||||||
properties[0],
|
properties[0],
|
||||||
retVal);
|
retVal);
|
||||||
|
|
||||||
BuiltIns &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(pCmdQ->getContext());
|
|
||||||
// Aub execution takes huge time for bigger GWS
|
// Aub execution takes huge time for bigger GWS
|
||||||
scheduler.setGws(24);
|
scheduler.setGws(24);
|
||||||
|
|
||||||
|
|||||||
@@ -29,8 +29,7 @@ GEN12LPTEST_F(GEN12LPAUBParentKernelFixture, EnqueueParentKernel) {
|
|||||||
properties[0],
|
properties[0],
|
||||||
retVal);
|
retVal);
|
||||||
|
|
||||||
BuiltIns &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(pCmdQ->getContext());
|
|
||||||
// Aub execution takes huge time for bigger GWS
|
// Aub execution takes huge time for bigger GWS
|
||||||
scheduler.setGws(24);
|
scheduler.setGws(24);
|
||||||
|
|
||||||
|
|||||||
@@ -31,8 +31,7 @@ GEN8TEST_F(GEN8AUBParentKernelFixture, EnqueueParentKernel) {
|
|||||||
properties[0],
|
properties[0],
|
||||||
retVal));
|
retVal));
|
||||||
|
|
||||||
auto &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(pCmdQ->getContext());
|
|
||||||
// Aub execution takes huge time for bigger GWS
|
// Aub execution takes huge time for bigger GWS
|
||||||
scheduler.setGws(24);
|
scheduler.setGws(24);
|
||||||
|
|
||||||
|
|||||||
@@ -27,8 +27,7 @@ GEN9TEST_F(AUBParentKernelFixture, EnqueueParentKernel) {
|
|||||||
properties[0],
|
properties[0],
|
||||||
retVal);
|
retVal);
|
||||||
|
|
||||||
BuiltIns &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns();
|
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||||
SchedulerKernel &scheduler = builtIns.getSchedulerKernel(pCmdQ->getContext());
|
|
||||||
// Aub execution takes huge time for bigger GWS
|
// Aub execution takes huge time for bigger GWS
|
||||||
scheduler.setGws(24);
|
scheduler.setGws(24);
|
||||||
|
|
||||||
|
|||||||
@@ -911,8 +911,7 @@ TEST_F(BuiltInTests, BuiltinDispatchInfoBuilderGetBuilderForUnknownBuiltInOp) {
|
|||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernel) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernel) {
|
||||||
if (pDevice->getSupportedClVersion() >= 20) {
|
if (pDevice->getSupportedClVersion() >= 20) {
|
||||||
Context &context = *pContext;
|
SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel();
|
||||||
SchedulerKernel &schedulerKernel = pBuiltIns->getSchedulerKernel(context);
|
|
||||||
std::string name = SchedulerKernel::schedulerName;
|
std::string name = SchedulerKernel::schedulerName;
|
||||||
EXPECT_EQ(name, schedulerKernel.getKernelInfo().name);
|
EXPECT_EQ(name, schedulerKernel.getKernelInfo().name);
|
||||||
}
|
}
|
||||||
@@ -920,12 +919,12 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernel) {
|
|||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernelForSecondTimeDoesNotCreateNewKernel) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getSchedulerKernelForSecondTimeDoesNotCreateNewKernel) {
|
||||||
if (pDevice->getSupportedClVersion() >= 20) {
|
if (pDevice->getSupportedClVersion() >= 20) {
|
||||||
Context &context = *pContext;
|
SchedulerKernel &schedulerKernel = pContext->getSchedulerKernel();
|
||||||
SchedulerKernel &schedulerKernel = pBuiltIns->getSchedulerKernel(context);
|
|
||||||
|
|
||||||
Program *program = schedulerKernel.getProgram();
|
Program *program = schedulerKernel.getProgram();
|
||||||
|
EXPECT_NE(nullptr, program);
|
||||||
|
|
||||||
SchedulerKernel &schedulerKernelSecond = pBuiltIns->getSchedulerKernel(context);
|
SchedulerKernel &schedulerKernelSecond = pContext->getSchedulerKernel();
|
||||||
|
|
||||||
Program *program2 = schedulerKernelSecond.getProgram();
|
Program *program2 = schedulerKernelSecond.getProgram();
|
||||||
|
|
||||||
@@ -1126,7 +1125,6 @@ TEST_F(VmeBuiltInTests, getBuiltinAsString) {
|
|||||||
EXPECT_EQ(0, strcmp("vme_block_motion_estimate_intel.igdrcl_built_in", getBuiltinAsString(EBuiltInOps::VmeBlockMotionEstimateIntel)));
|
EXPECT_EQ(0, strcmp("vme_block_motion_estimate_intel.igdrcl_built_in", getBuiltinAsString(EBuiltInOps::VmeBlockMotionEstimateIntel)));
|
||||||
EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel)));
|
EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_check_intel.igdrcl_built_in", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel)));
|
||||||
EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_bidirectional_check_intel", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel)));
|
EXPECT_EQ(0, strcmp("vme_block_advanced_motion_estimate_bidirectional_check_intel", getBuiltinAsString(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel)));
|
||||||
EXPECT_EQ(0, strcmp("scheduler.igdrcl_built_in", getBuiltinAsString(EBuiltInOps::Scheduler)));
|
|
||||||
EXPECT_EQ(0, strcmp("unknown", getBuiltinAsString(EBuiltInOps::COUNT)));
|
EXPECT_EQ(0, strcmp("unknown", getBuiltinAsString(EBuiltInOps::COUNT)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1322,10 +1320,6 @@ TEST_F(BuiltInTests, getBuiltinResourcesForTypeSource) {
|
|||||||
EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size());
|
EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size());
|
||||||
EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size());
|
EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Source, *pDevice).size());
|
||||||
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Source, *pDevice).size());
|
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Source, *pDevice).size());
|
||||||
|
|
||||||
if (pClDevice->getHardwareInfo().capabilityTable.supportsDeviceEnqueue) {
|
|
||||||
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::Scheduler, BuiltinCode::ECodeType::Source, *pDevice).size());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getBuiltinResourcesForTypeBinary) {
|
HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getBuiltinResourcesForTypeBinary) {
|
||||||
@@ -1351,9 +1345,6 @@ HWCMDTEST_F(IGFX_GEN8_CORE, BuiltInTests, getBuiltinResourcesForTypeBinary) {
|
|||||||
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockMotionEstimateIntel, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
||||||
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
||||||
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::VmeBlockAdvancedMotionEstimateBidirectionalCheckIntel, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
||||||
if (this->pClDevice->getEnabledClVersion() >= 20) {
|
|
||||||
EXPECT_NE(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::Scheduler, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
|
||||||
}
|
|
||||||
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
EXPECT_EQ(0u, mockBuiltinsLib->getBuiltinResource(EBuiltInOps::COUNT, BuiltinCode::ECodeType::Binary, *pDevice).size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2019 Intel Corporation
|
* Copyright (C) 2017-2020 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -211,7 +211,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandStreamFixture, GivenDispatchInfoW
|
|||||||
|
|
||||||
size_t rest = MemoryConstants::pageSize - (numOfKernels * size);
|
size_t rest = MemoryConstants::pageSize - (numOfKernels * size);
|
||||||
|
|
||||||
SchedulerKernel &scheduler = device->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*mockParentKernel->getContext());
|
SchedulerKernel &scheduler = pCmdQ->getContext().getSchedulerKernel();
|
||||||
size_t schedulerSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler);
|
size_t schedulerSize = EnqueueOperation<FamilyType>::getSizeRequiredCS(CL_COMMAND_NDRANGE_KERNEL, false, false, *pCmdQ, &scheduler);
|
||||||
|
|
||||||
while (rest >= schedulerSize) {
|
while (rest >= schedulerSize) {
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchScheduler) {
|
|||||||
|
|
||||||
if (pDevice->getSupportedClVersion() >= 20) {
|
if (pDevice->getSupportedClVersion() >= 20) {
|
||||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||||
SchedulerKernel &scheduler = pDevice->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*context);
|
SchedulerKernel &scheduler = context->getSchedulerKernel();
|
||||||
|
|
||||||
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
|
auto *executionModelDshAllocation = pDevQueueHw->getDshBuffer();
|
||||||
auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
auto *dshHeap = pDevQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||||
@@ -173,7 +173,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ExecutionModelSchedulerFixture, dispatchSchedulerDoe
|
|||||||
|
|
||||||
if (pDevice->getSupportedClVersion() >= 20) {
|
if (pDevice->getSupportedClVersion() >= 20) {
|
||||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||||
SchedulerKernel &scheduler = pDevice->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*context);
|
SchedulerKernel &scheduler = context->getSchedulerKernel();
|
||||||
|
|
||||||
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||||
// Setup heaps in pCmdQ
|
// Setup heaps in pCmdQ
|
||||||
@@ -210,7 +210,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ParentKernelCommandQueueFixture, dispatchSchedulerWi
|
|||||||
ASSERT_NE(nullptr, igilQueue);
|
ASSERT_NE(nullptr, igilQueue);
|
||||||
igilQueue->m_controls.m_SchedulerEarlyReturn = 1;
|
igilQueue->m_controls.m_SchedulerEarlyReturn = 1;
|
||||||
|
|
||||||
SchedulerKernel &scheduler = device->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*context);
|
SchedulerKernel &scheduler = context->getSchedulerKernel();
|
||||||
|
|
||||||
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
|
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSizeRequiredSSH(scheduler);
|
||||||
// Setup heaps in pCmdQ
|
// Setup heaps in pCmdQ
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2017-2019 Intel Corporation
|
* Copyright (C) 2017-2020 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -28,7 +28,7 @@ BDWTEST_F(BdwSchedulerTest, givenCallToDispatchSchedulerWhenPipeControlWithCSSta
|
|||||||
|
|
||||||
if (pDevice->getSupportedClVersion() >= 20) {
|
if (pDevice->getSupportedClVersion() >= 20) {
|
||||||
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
DeviceQueueHw<FamilyType> *pDevQueueHw = castToObject<DeviceQueueHw<FamilyType>>(pDevQueue);
|
||||||
SchedulerKernel &scheduler = pDevice->getExecutionEnvironment()->getBuiltIns()->getSchedulerKernel(*context);
|
SchedulerKernel &scheduler = context->getSchedulerKernel();
|
||||||
|
|
||||||
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
size_t minRequiredSizeForSchedulerSSH = HardwareCommandsHelper<FamilyType>::getSshSizeForExecutionModel(*parentKernel);
|
||||||
|
|
||||||
|
|||||||
@@ -1200,8 +1200,7 @@ HWCMDTEST_P(IGFX_GEN8_CORE, ParentKernelCommandsFromBinaryTest, getSizeRequiredF
|
|||||||
|
|
||||||
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
|
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
|
||||||
|
|
||||||
BuiltIns &builtIns = *pDevice->getExecutionEnvironment()->getBuiltIns();
|
auto &scheduler = pContext->getSchedulerKernel();
|
||||||
auto &scheduler = builtIns.getSchedulerKernel(*pContext);
|
|
||||||
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
|
auto schedulerSshSize = scheduler.getSurfaceStateHeapSize();
|
||||||
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
|
totalSize += schedulerSshSize + ((schedulerSshSize != 0) ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0);
|
||||||
|
|
||||||
|
|||||||
@@ -351,12 +351,9 @@ TEST_F(OfflineCompilerTests, GoodParseBinToCharArray) {
|
|||||||
"#include \"runtime/built_ins/registry/built_ins_registry.h\"\n\n"
|
"#include \"runtime/built_ins/registry/built_ins_registry.h\"\n\n"
|
||||||
"namespace NEO {\n"
|
"namespace NEO {\n"
|
||||||
"static RegisterEmbeddedResource registerSchedulerBin(\n"
|
"static RegisterEmbeddedResource registerSchedulerBin(\n"
|
||||||
" createBuiltinResourceName(\n"
|
" \"" +
|
||||||
" EBuiltInOps::Scheduler,\n"
|
gEnvironment->familyNameWithType + "_0_scheduler.igdrcl_built_in.bin\",\n"
|
||||||
" BuiltinCode::getExtension(BuiltinCode::ECodeType::Binary), \"" +
|
" (const char *)SchedulerBinary_" +
|
||||||
familyNameWithType + "\", 0)\n"
|
|
||||||
" .c_str(),\n"
|
|
||||||
" (const char *)SchedulerBinary_" +
|
|
||||||
familyNameWithType + ",\n"
|
familyNameWithType + ",\n"
|
||||||
" SchedulerBinarySize_" +
|
" SchedulerBinarySize_" +
|
||||||
familyNameWithType + ");\n"
|
familyNameWithType + ");\n"
|
||||||
|
|||||||
Reference in New Issue
Block a user