Remove device enqueue part 1

Remove:
-tests with matcher for device enqueue
-classes: MockDeviceQueueHw, DeviceQueueHw, SchedulerSimulation,
DeviceQueueHwTest, KernelArgDevQueueTest
-functions: forceDispatchScheduler, processDeviceEnqueue, dispatchScheduler

Related-To: NEO-6559
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-01-03 17:29:57 +00:00
committed by Compute-Runtime-Automation
parent 61e5e0687d
commit a9ebb49fb5
61 changed files with 61 additions and 4980 deletions

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -8,9 +8,6 @@ set(BUILTIN_KERNELS_SIMULATION_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h
)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

View File

@@ -1,78 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
#include <cstdint>
#include <thread>
namespace NEO {
class GraphicsAllocation;
}
namespace BuiltinKernelsSimulation {
extern bool conditionReady;
extern std::thread threads[];
template <typename GfxFamily>
class SchedulerSimulation {
public:
void runSchedulerSimulation(NEO::GraphicsAllocation *queue,
NEO::GraphicsAllocation *commandsStack,
NEO::GraphicsAllocation *eventsPool,
NEO::GraphicsAllocation *secondaryBatchBuffer,
NEO::GraphicsAllocation *dsh,
NEO::GraphicsAllocation *reflectionSurface,
NEO::GraphicsAllocation *queueStorageBuffer,
NEO::GraphicsAllocation *ssh,
NEO::GraphicsAllocation *debugQueue);
void cleanSchedulerSimulation();
static void startScheduler(uint32_t index,
NEO::GraphicsAllocation *queue,
NEO::GraphicsAllocation *commandsStack,
NEO::GraphicsAllocation *eventsPool,
NEO::GraphicsAllocation *secondaryBatchBuffer,
NEO::GraphicsAllocation *dsh,
NEO::GraphicsAllocation *reflectionSurface,
NEO::GraphicsAllocation *queueStorageBuffer,
NEO::GraphicsAllocation *ssh,
NEO::GraphicsAllocation *debugQueue);
void initializeSchedulerSimulation(NEO::GraphicsAllocation *queue,
NEO::GraphicsAllocation *commandsStack,
NEO::GraphicsAllocation *eventsPool,
NEO::GraphicsAllocation *secondaryBatchBuffer,
NEO::GraphicsAllocation *dsh,
NEO::GraphicsAllocation *reflectionSurface,
NEO::GraphicsAllocation *queueStorageBuffer,
NEO::GraphicsAllocation *ssh,
NEO::GraphicsAllocation *debugQueue);
static void patchGpGpuWalker(uint secondLevelBatchOffset,
__global uint *secondaryBatchBuffer,
uint interfaceDescriptorOffset,
uint simdSize,
uint totalLocalWorkSize,
uint3 dimSize,
uint3 startPoint,
uint numberOfHwThreadsPerWg,
uint indirectPayloadSize,
uint ioHoffset);
static bool enabled;
static bool simulationRun;
};
template <typename GfxFamily>
bool SchedulerSimulation<GfxFamily>::enabled = true;
template <typename GfxFamily>
bool SchedulerSimulation<GfxFamily>::simulationRun = false;
} // namespace BuiltinKernelsSimulation

View File

@@ -1,97 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
#include <cstdint>
#include <mutex>
#include <thread>
using namespace NEO;
namespace BuiltinKernelsSimulation {
template <typename GfxFamily>
void SchedulerSimulation<GfxFamily>::cleanSchedulerSimulation() {
threadIDToLocalIDmap.clear();
delete pGlobalBarrier;
}
template <typename GfxFamily>
void SchedulerSimulation<GfxFamily>::initializeSchedulerSimulation(GraphicsAllocation *queue,
GraphicsAllocation *commandsStack,
GraphicsAllocation *eventsPool,
GraphicsAllocation *secondaryBatchBuffer,
GraphicsAllocation *dsh,
GraphicsAllocation *reflectionSurface,
GraphicsAllocation *queueStorageBuffer,
GraphicsAllocation *ssh,
GraphicsAllocation *debugQueue) {
localSize[0] = NUM_OF_THREADS;
localSize[1] = 1;
localSize[2] = 1;
threadIDToLocalIDmap.clear();
pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS);
// Spawn Thread ID == 0 on main thread
for (uint32_t i = 1; i < NUM_OF_THREADS; i++) {
threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue);
}
conditionReady = true;
}
template <typename GfxFamily>
void SchedulerSimulation<GfxFamily>::runSchedulerSimulation(GraphicsAllocation *queue,
GraphicsAllocation *commandsStack,
GraphicsAllocation *eventsPool,
GraphicsAllocation *secondaryBatchBuffer,
GraphicsAllocation *dsh,
GraphicsAllocation *reflectionSurface,
GraphicsAllocation *queueStorageBuffer,
GraphicsAllocation *ssh,
GraphicsAllocation *debugQueue) {
simulationRun = true;
if (enabled) {
initializeSchedulerSimulation(queue,
commandsStack,
eventsPool,
secondaryBatchBuffer,
dsh,
reflectionSurface,
queueStorageBuffer,
ssh,
debugQueue);
// start main thread with LID == 0
startScheduler(0,
queue,
commandsStack,
eventsPool,
secondaryBatchBuffer,
dsh,
reflectionSurface,
queueStorageBuffer,
ssh,
debugQueue);
// Wait for all threads on main thread
if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) {
for (uint32_t i = 1; i < NUM_OF_THREADS; i++)
threads[i].join();
cleanSchedulerSimulation();
}
}
};
} // namespace BuiltinKernelsSimulation

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -13,8 +13,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_disabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_enabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
@@ -43,8 +41,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_disabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_enabled_device_enqueue.inl
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -15,7 +15,6 @@
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/helpers/queue_helpers.h"
#include "opencl/source/mem_obj/mem_obj.h"
@@ -502,8 +501,6 @@ class CommandQueueHw : public CommandQueue {
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo);
void runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel);
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
size_t *hostOffset,
const size_t *bufferOrigin,
@@ -513,10 +510,6 @@ class CommandQueueHw : public CommandQueue {
size_t bufferSlicePitch,
size_t hostRowPitch,
size_t hostSlicePitch);
void processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
const MultiDispatchInfo &multiDispatchInfo,
TagNodeBase *hwTimeStamps,
bool &blocking);
template <uint32_t commandType>
void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
@@ -524,7 +517,6 @@ class CommandQueueHw : public CommandQueue {
Event *event,
TagNodeBase *&hwTimeStamps,
bool blockQueue,
DeviceQueueHw<GfxFamily> *devQueueHw,
CsrDependencies &csrDeps,
KernelOperation *blockedCommandsData,
TimestampPacketDependencies &timestampPacketDependencies);

View File

@@ -1,15 +0,0 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/command_queue_hw.h"
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
}
} // namespace NEO

View File

@@ -1,25 +0,0 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/command_queue_hw.h"
namespace NEO {
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(),
devQueueHw.getStackBuffer(),
devQueueHw.getEventPoolBuffer(),
devQueueHw.getSlbBuffer(),
devQueueHw.getDshBuffer(),
parentKernel.getKernelReflectionSurface(),
devQueueHw.getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
devQueueHw.getDebugQueue());
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,6 @@
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/command_queue/command_queue_hw_base.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "opencl/source/memory_manager/resource_surface.h"
namespace NEO {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -22,7 +22,6 @@
#include "shared/source/utilities/tag_allocator.h"
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/command_queue/hardware_interface.h"
@@ -67,45 +66,40 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
auto auxTranslationMode = AuxTranslationMode::None;
if (DebugManager.flags.ForceDispatchScheduler.get()) {
forceDispatchScheduler(multiDispatchInfo);
kernel->updateAuxTranslationRequired();
if (kernel->isAuxTranslationRequired()) {
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
if (!kernelObjsForAuxTranslation.empty()) {
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
}
}
if (AuxTranslationMode::Builtin == auxTranslationMode) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
builtInLock.takeOwnership(builder, this->context);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
}
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
builder.setKernel(kernel);
builder.bake(multiDispatchInfo);
} else {
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
kernel->updateAuxTranslationRequired();
if (kernel->isAuxTranslationRequired()) {
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
if (!kernelObjsForAuxTranslation.empty()) {
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
}
if (multiDispatchInfo.size() == 0) {
return;
}
}
if (AuxTranslationMode::Builtin == auxTranslationMode) {
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
builtInLock.takeOwnership(builder, this->context);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
}
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
builder.setKernel(kernel);
builder.bake(multiDispatchInfo);
} else {
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
if (multiDispatchInfo.size() == 0) {
return;
}
}
if (AuxTranslationMode::Builtin == auxTranslationMode) {
UNRECOVERABLE_IF(kernel->isParentKernel);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
}
if (AuxTranslationMode::Builtin == auxTranslationMode) {
UNRECOVERABLE_IF(kernel->isParentKernel);
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
}
if (AuxTranslationMode::Blit == auxTranslationMode) {
@@ -115,35 +109,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) {
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
auto devQueue = this->getContext().getDefaultDeviceQueue();
DeviceQueueHw<GfxFamily> *devQueueHw = castToObjectOrAbort<DeviceQueueHw<GfxFamily>>(devQueue);
DispatchInfo dispatchInfo(devQueue->getClDevice(), &scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
Vec3<size_t> workGroupCount = generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize());
dispatchInfo.setTotalNumberOfWorkgroups(workGroupCount);
dispatchInfo.setNumberOfWorkgroups(workGroupCount);
scheduler.createReflectionSurface();
GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface();
devQueueHw->resetDeviceQueue();
scheduler.setArgs(devQueueHw->getQueueBuffer(),
devQueueHw->getStackBuffer(),
devQueueHw->getEventPoolBuffer(),
devQueueHw->getSlbBuffer(),
devQueueHw->getDshBuffer(),
reflectionSurface,
devQueueHw->getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation());
multiDispatchInfo.push(dispatchInfo);
}
template <typename GfxFamily>
template <uint32_t commandType>
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
@@ -163,8 +128,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
auto devQueue = this->getContext().getDefaultDeviceQueue();
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
TagNodeBase *hwTimeStamps = nullptr;
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
@@ -183,11 +146,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
auto taskLevel = 0u;
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
if (parentKernel && !blockQueue) {
while (!devQueueHw->isEMCriticalSectionFree())
;
}
enqueueHandlerHook(commandType, multiDispatchInfo);
bool clearDependenciesForSubCapture = false;
@@ -262,7 +220,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (multiDispatchInfo.empty() == false) {
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(),
timestampPacketDependencies);
} else if (isCacheFlushCommand(commandType)) {
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
@@ -310,9 +268,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
}
}
if (!blockQueue) {
if (parentKernel) {
processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
}
if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
csrDeps.makeResident(computeCommandStreamReceiver);
@@ -332,15 +287,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
taskLevel,
printfHandler.get(),
getBcsForAuxTranslation());
if (parentKernel) {
computeCommandStreamReceiver.setMediaVFEStateDirty(true);
if (devQueueHw->getSchedulerReturnInstance() > 0) {
waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
this->runSchedulerSimulation(*devQueueHw, *parentKernel);
}
}
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
completionStamp = enqueueCommandWithoutKernel(
surfacesForResidency,
@@ -438,7 +384,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
Event *event,
TagNodeBase *&hwTimeStamps,
bool blockQueue,
DeviceQueueHw<GfxFamily> *devQueueHw,
CsrDependencies &csrDeps,
KernelOperation *blockedCommandsData,
TimestampPacketDependencies &timestampPacketDependencies) {
@@ -474,10 +419,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
if (!blockQueue) {
devQueueHw->resetDeviceQueue();
devQueueHw->acquireEMCriticalSection();
}
}
if (event && this->isPerfCountersEnabled()) {
@@ -651,56 +592,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress);
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
const MultiDispatchInfo &multiDispatchInfo,
TagNodeBase *hwTimeStamps,
bool &blocking) {
auto parentKernel = multiDispatchInfo.peekParentKernel();
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType());
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
parentKernel,
(uint32_t)multiDispatchInfo.size(),
getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(),
taskCount,
hwTimeStamps,
isCcsUsed);
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
scheduler.setArgs(devQueueHw->getQueueBuffer(),
devQueueHw->getStackBuffer(),
devQueueHw->getEventPoolBuffer(),
devQueueHw->getSlbBuffer(),
devQueueHw->getDshBuffer(),
parentKernel->getKernelReflectionSurface(),
devQueueHw->getQueueStorageBuffer(),
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
devQueueHw->getDebugQueue());
auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo);
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
*this->commandStream,
*devQueueHw,
preemptionMode,
scheduler,
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
isCcsUsed);
scheduler.makeResident(getGpgpuCommandStreamReceiver());
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getGpgpuCommandStreamReceiver());
if (parentKernel->isAuxTranslationRequired()) {
blocking = true;
}
}
template <typename GfxFamily>
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
auto isQueueBlockedStatus = isQueueBlocked();
@@ -842,17 +733,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
IndirectHeap *dsh = nullptr;
IndirectHeap *ioh = nullptr;
if (multiDispatchInfo.peekParentKernel()) {
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
DEBUG_BREAK_IF(pDevQueue == nullptr);
dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
ioh = dsh;
implicitFlush = true;
} else {
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
}
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
auto allocNeedsFlushDC = false;
if (!device->isFullRangeSvm()) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -20,7 +20,6 @@
#include "opencl/source/command_queue/cl_local_work_size.h"
#include "opencl/source/command_queue/command_queue.h"
#include "opencl/source/context/context.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/helpers/hardware_commands_helper.h"
#include "opencl/source/helpers/task_information.h"
@@ -84,15 +83,6 @@ class GpgpuWalkerHelper {
TagNodeBase *timestampPacketNode,
const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed);
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
private:
@@ -142,11 +132,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
if (heapType == IndirectHeap::SURFACE_STATE) {
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
{
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
DEBUG_BREAK_IF(pDevQueue == nullptr);
ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
}
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -191,17 +191,12 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
return expectedSizeCS;
}
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
for (auto &dispatchInfo : multiDispatchInfo) {
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo);
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
}
if (parentKernel) {
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler, DispatchInfo{});
}
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();

View File

@@ -1,22 +0,0 @@
/*
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/gpgpu_walker.h"
namespace NEO {
template <typename GfxFamily>
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
LinearStream &commandStream,
DeviceQueueHw<GfxFamily> &devQueueHw,
PreemptionMode preemptionMode,
SchedulerKernel &scheduler,
IndirectHeap *ssh,
IndirectHeap *dsh,
bool isCcsUsed) {
UNRECOVERABLE_IF(true);
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,7 +16,6 @@
#include "shared/source/helpers/simd_helper.h"
#include "opencl/source/command_queue/gpgpu_walker_base.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/platform/platform.h"
namespace NEO {

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2018-2021 Intel Corporation
# Copyright (C) 2018-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -8,10 +8,6 @@ set(RUNTIME_SRCS_DEVICE_QUEUE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_skl_and_later.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl
)
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE})
set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE})

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,7 +12,6 @@
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/dispatch_info.h"
#include "opencl/source/helpers/queue_helpers.h"
@@ -186,10 +185,6 @@ void DeviceQueue::resetDeviceQueue() {
return;
}
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
return;
}
IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) {
return nullptr;
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -83,7 +83,6 @@ class DeviceQueue : public BaseObject<_device_queue> {
}
virtual void resetDeviceQueue();
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
void acquireEMCriticalSection() {

View File

@@ -1,96 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/kernel/kernel.h"
#include "opencl/source/program/program.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include "hw_cmds.h"
namespace NEO {
template <typename GfxFamily>
class DeviceQueueHw : public DeviceQueue {
using BaseClass = DeviceQueue;
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename GfxFamily::MI_MATH;
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
public:
DeviceQueueHw(Context *context,
ClDevice *device,
cl_queue_properties &properties) : BaseClass(context, device, properties) {
allocateSlbBuffer();
offsetDsh = colorCalcStateSize + (uint32_t)sizeof(INTERFACE_DESCRIPTOR_DATA) * interfaceDescriptorEntries * numberOfIDTables;
igilQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
}
static DeviceQueue *create(Context *context,
ClDevice *device,
cl_queue_properties &properties) {
return new (std::nothrow) DeviceQueueHw<GfxFamily>(context, device, properties);
}
IndirectHeap *getIndirectHeap(IndirectHeap::Type type) override;
LinearStream *getSlbCS() { return &slbCS; }
void resetDSH();
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override;
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override;
void resetDeviceQueue() override;
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override;
uint32_t getSchedulerReturnInstance() {
return igilQueue->m_controls.m_SchedulerEarlyReturn;
}
static size_t getCSPrefetchSize();
protected:
void allocateSlbBuffer();
size_t getMinimumSlbSize();
size_t getWaCommandsSize();
void addArbCheckCmdWa();
void addMiAtomicCmdWa(uint64_t atomicOpPlaceholder);
void addLriCmdWa(bool setArbCheck);
void addLriCmd(bool setArbCheck);
void addPipeControlCmdWa(bool isNoopCmd = false);
void initPipeControl(PIPE_CONTROL *pc);
void buildSlbDummyCommands();
void addDcFlushToPipeControlWa(PIPE_CONTROL *pc);
void addProfilingEndCmds(uint64_t timestampAddress);
static size_t getProfilingEndCmdsSize();
MOCKABLE_VIRTUAL void addMediaStateClearCmds();
static size_t getMediaStateClearCmdsSize();
static size_t getExecutionModelCleanupSectionSize();
static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed);
LinearStream slbCS;
IGIL_CommandQueue *igilQueue = nullptr;
};
} // namespace NEO

View File

@@ -1,29 +0,0 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl/source/command_queue/gpgpu_walker.h"
#include "opencl/source/device_queue/device_queue_hw.h"
namespace NEO {
template <typename GfxFamily>
void DeviceQueueHw<GfxFamily>::addProfilingEndCmds(uint64_t timestampAddress) {
auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL));
auto pipeControlCmd = GfxFamily::cmdInitPipeControl;
pipeControlCmd.setCommandStreamerStallEnable(true);
*pipeControlSpace = pipeControlCmd;
//low part
auto mICmdLowSpace = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM));
auto mICmdLow = GfxFamily::cmdInitStoreRegisterMem;
GpgpuWalkerHelper<GfxFamily>::adjustMiStoreRegMemMode(&mICmdLow);
mICmdLow.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
mICmdLow.setMemoryAddress(timestampAddress);
*mICmdLowSpace = mICmdLow;
}
} // namespace NEO

View File

@@ -10,7 +10,6 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
namespace NEO {

View File

@@ -8,7 +8,6 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"

View File

@@ -8,7 +8,6 @@
#include "shared/source/gen11/hw_info.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -9,7 +9,6 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "command_queue_helpers_gen12lp.inl"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,7 +8,6 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -11,7 +11,6 @@
#include "shared/source/memory_manager/graphics_allocation.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -10,7 +10,6 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
namespace NEO {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,7 +8,6 @@
#include "shared/source/gen8/hw_info.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@@ -10,7 +10,6 @@
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
namespace NEO {

View File

@@ -8,7 +8,6 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"

View File

@@ -8,7 +8,6 @@
#include "shared/source/gen9/hw_cmds_base.h"
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
namespace NEO {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -145,7 +145,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation();
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
bool isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
if (executionModelKernel) {
while (!devQueue->isEMCriticalSectionFree())
@@ -175,39 +174,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
}
makeTimestampPacketsResident(commandStreamReceiver);
if (executionModelKernel) {
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
scheduler.setArgs(devQueue->getQueueBuffer(),
devQueue->getStackBuffer(),
devQueue->getEventPoolBuffer(),
devQueue->getSlbBuffer(),
dsh->getGraphicsAllocation(),
kernel->getKernelReflectionSurface(),
devQueue->getQueueStorageBuffer(),
ssh->getGraphicsAllocation(),
devQueue->getDebugQueue());
devQueue->dispatchScheduler(
*kernelOperation->commandStream,
scheduler,
preemptionMode,
ssh,
dsh,
isCcsUsed);
scheduler.makeResident(commandStreamReceiver);
// Update SLM usage
slmUsed |= scheduler.getSlmTotalSize() > 0;
this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver);
}
if (kernelOperation->blitPropertiesContainer.size() > 0) {
CsrDependencies csrDeps;
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::All);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,7 +8,6 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,7 +8,6 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,7 +8,6 @@
#include "shared/source/helpers/populate_factory.h"
#include "opencl/source/command_queue/command_queue_hw.h"
#include "opencl/source/device_queue/device_queue_hw.h"
#include "opencl/source/helpers/cl_hw_helper.h"
#include "opencl/source/mem_obj/buffer.h"
#include "opencl/source/mem_obj/image.h"