mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Remove device enqueue part 1
Remove: -tests with matcher for device enqueue -classes: MockDeviceQueueHw, DeviceQueueHw, SchedulerSimulation, DeviceQueueHwTest, KernelArgDevQueueTest -functions: forceDispatchScheduler, processDeviceEnqueue, dispatchScheduler Related-To: NEO-6559 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
61e5e0687d
commit
a9ebb49fb5
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -8,9 +8,6 @@ set(BUILTIN_KERNELS_SIMULATION_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/opencl_c.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/scheduler_simulation.h
|
||||
)
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "opencl/source/builtin_kernels_simulation/opencl_c.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <thread>
|
||||
namespace NEO {
|
||||
class GraphicsAllocation;
|
||||
}
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
extern bool conditionReady;
|
||||
extern std::thread threads[];
|
||||
|
||||
template <typename GfxFamily>
|
||||
class SchedulerSimulation {
|
||||
public:
|
||||
void runSchedulerSimulation(NEO::GraphicsAllocation *queue,
|
||||
NEO::GraphicsAllocation *commandsStack,
|
||||
NEO::GraphicsAllocation *eventsPool,
|
||||
NEO::GraphicsAllocation *secondaryBatchBuffer,
|
||||
NEO::GraphicsAllocation *dsh,
|
||||
NEO::GraphicsAllocation *reflectionSurface,
|
||||
NEO::GraphicsAllocation *queueStorageBuffer,
|
||||
NEO::GraphicsAllocation *ssh,
|
||||
NEO::GraphicsAllocation *debugQueue);
|
||||
|
||||
void cleanSchedulerSimulation();
|
||||
|
||||
static void startScheduler(uint32_t index,
|
||||
NEO::GraphicsAllocation *queue,
|
||||
NEO::GraphicsAllocation *commandsStack,
|
||||
NEO::GraphicsAllocation *eventsPool,
|
||||
NEO::GraphicsAllocation *secondaryBatchBuffer,
|
||||
NEO::GraphicsAllocation *dsh,
|
||||
NEO::GraphicsAllocation *reflectionSurface,
|
||||
NEO::GraphicsAllocation *queueStorageBuffer,
|
||||
NEO::GraphicsAllocation *ssh,
|
||||
NEO::GraphicsAllocation *debugQueue);
|
||||
|
||||
void initializeSchedulerSimulation(NEO::GraphicsAllocation *queue,
|
||||
NEO::GraphicsAllocation *commandsStack,
|
||||
NEO::GraphicsAllocation *eventsPool,
|
||||
NEO::GraphicsAllocation *secondaryBatchBuffer,
|
||||
NEO::GraphicsAllocation *dsh,
|
||||
NEO::GraphicsAllocation *reflectionSurface,
|
||||
NEO::GraphicsAllocation *queueStorageBuffer,
|
||||
NEO::GraphicsAllocation *ssh,
|
||||
NEO::GraphicsAllocation *debugQueue);
|
||||
|
||||
static void patchGpGpuWalker(uint secondLevelBatchOffset,
|
||||
__global uint *secondaryBatchBuffer,
|
||||
uint interfaceDescriptorOffset,
|
||||
uint simdSize,
|
||||
uint totalLocalWorkSize,
|
||||
uint3 dimSize,
|
||||
uint3 startPoint,
|
||||
uint numberOfHwThreadsPerWg,
|
||||
uint indirectPayloadSize,
|
||||
uint ioHoffset);
|
||||
static bool enabled;
|
||||
static bool simulationRun;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::enabled = true;
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool SchedulerSimulation<GfxFamily>::simulationRun = false;
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
@@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
namespace BuiltinKernelsSimulation {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::cleanSchedulerSimulation() {
|
||||
threadIDToLocalIDmap.clear();
|
||||
delete pGlobalBarrier;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::initializeSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
|
||||
localSize[0] = NUM_OF_THREADS;
|
||||
localSize[1] = 1;
|
||||
localSize[2] = 1;
|
||||
|
||||
threadIDToLocalIDmap.clear();
|
||||
pGlobalBarrier = new SynchronizationBarrier(NUM_OF_THREADS);
|
||||
|
||||
// Spawn Thread ID == 0 on main thread
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++) {
|
||||
threads[i] = std::thread(startScheduler, i, queue, commandsStack, eventsPool, secondaryBatchBuffer, dsh, reflectionSurface, queueStorageBuffer, ssh, debugQueue);
|
||||
}
|
||||
|
||||
conditionReady = true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void SchedulerSimulation<GfxFamily>::runSchedulerSimulation(GraphicsAllocation *queue,
|
||||
GraphicsAllocation *commandsStack,
|
||||
GraphicsAllocation *eventsPool,
|
||||
GraphicsAllocation *secondaryBatchBuffer,
|
||||
GraphicsAllocation *dsh,
|
||||
GraphicsAllocation *reflectionSurface,
|
||||
GraphicsAllocation *queueStorageBuffer,
|
||||
GraphicsAllocation *ssh,
|
||||
GraphicsAllocation *debugQueue) {
|
||||
simulationRun = true;
|
||||
if (enabled) {
|
||||
initializeSchedulerSimulation(queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// start main thread with LID == 0
|
||||
startScheduler(0,
|
||||
queue,
|
||||
commandsStack,
|
||||
eventsPool,
|
||||
secondaryBatchBuffer,
|
||||
dsh,
|
||||
reflectionSurface,
|
||||
queueStorageBuffer,
|
||||
ssh,
|
||||
debugQueue);
|
||||
|
||||
// Wait for all threads on main thread
|
||||
if (threadIDToLocalIDmap[std::this_thread::get_id()] == 0) {
|
||||
|
||||
for (uint32_t i = 1; i < NUM_OF_THREADS; i++)
|
||||
threads[i].join();
|
||||
|
||||
cleanSchedulerSimulation();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace BuiltinKernelsSimulation
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -13,8 +13,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_disabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/command_queue_hw_enabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/copy_engine_state.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_data_transfer_handler.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csr_selection_args.h
|
||||
@@ -43,8 +41,6 @@ set(RUNTIME_SRCS_COMMAND_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_bdw_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_disabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gpgpu_walker_enabled_device_enqueue.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/hardware_interface_bdw_and_later.inl
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/queue_helpers.h"
|
||||
#include "opencl/source/mem_obj/mem_obj.h"
|
||||
@@ -502,8 +501,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
|
||||
bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType);
|
||||
void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override;
|
||||
void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo);
|
||||
void runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel);
|
||||
static void computeOffsetsValueForRectCommands(size_t *bufferOffset,
|
||||
size_t *hostOffset,
|
||||
const size_t *bufferOrigin,
|
||||
@@ -513,10 +510,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
size_t bufferSlicePitch,
|
||||
size_t hostRowPitch,
|
||||
size_t hostSlicePitch);
|
||||
void processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TagNodeBase *hwTimeStamps,
|
||||
bool &blocking);
|
||||
|
||||
template <uint32_t commandType>
|
||||
void processDispatchForKernels(const MultiDispatchInfo &multiDispatchInfo,
|
||||
@@ -524,7 +517,6 @@ class CommandQueueHw : public CommandQueue {
|
||||
Event *event,
|
||||
TagNodeBase *&hwTimeStamps,
|
||||
bool blockQueue,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
CsrDependencies &csrDeps,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TimestampPacketDependencies ×tampPacketDependencies);
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -1,25 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::runSchedulerSimulation(DeviceQueueHw<GfxFamily> &devQueueHw, Kernel &parentKernel) {
|
||||
BuiltinKernelsSimulation::SchedulerSimulation<GfxFamily> simulation;
|
||||
simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(),
|
||||
devQueueHw.getStackBuffer(),
|
||||
devQueueHw.getEventPoolBuffer(),
|
||||
devQueueHw.getSlbBuffer(),
|
||||
devQueueHw.getDshBuffer(),
|
||||
parentKernel.getKernelReflectionSurface(),
|
||||
devQueueHw.getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
|
||||
devQueueHw.getDebugQueue());
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_base.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/memory_manager/resource_surface.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
|
||||
#include "opencl/source/built_ins/builtins_dispatch_builder.h"
|
||||
#include "opencl/source/builtin_kernels_simulation/scheduler_simulation.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/command_queue/hardware_interface.h"
|
||||
@@ -67,45 +66,40 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
|
||||
auto auxTranslationMode = AuxTranslationMode::None;
|
||||
|
||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
forceDispatchScheduler(multiDispatchInfo);
|
||||
kernel->updateAuxTranslationRequired();
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
|
||||
if (!kernelObjsForAuxTranslation.empty()) {
|
||||
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
|
||||
}
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
|
||||
builtInLock.takeOwnership(builder, this->context);
|
||||
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
||||
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
||||
builder.setKernel(kernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
} else {
|
||||
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
||||
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
|
||||
|
||||
kernel->updateAuxTranslationRequired();
|
||||
if (kernel->isAuxTranslationRequired()) {
|
||||
kernel->fillWithKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
multiDispatchInfo.setKernelObjsForAuxTranslation(kernelObjsForAuxTranslation);
|
||||
|
||||
if (!kernelObjsForAuxTranslation.empty()) {
|
||||
auxTranslationMode = HwHelperHw<GfxFamily>::get().getAuxTranslationMode(device->getHardwareInfo());
|
||||
}
|
||||
if (multiDispatchInfo.size() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
auto &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::AuxTranslation, getClDevice());
|
||||
builtInLock.takeOwnership(builder, this->context);
|
||||
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::AuxToNonAux);
|
||||
}
|
||||
|
||||
if (kernel->getKernelInfo().builtinDispatchBuilder == nullptr) {
|
||||
DispatchInfoBuilder<SplitDispatch::Dim::d3D, SplitDispatch::SplitMode::WalkerSplit> builder(getClDevice());
|
||||
builder.setDispatchGeometry(workDim, workItems, enqueuedWorkSizes, globalOffsets, Vec3<size_t>{0, 0, 0}, localWorkSizesIn);
|
||||
builder.setKernel(kernel);
|
||||
builder.bake(multiDispatchInfo);
|
||||
} else {
|
||||
auto builder = kernel->getKernelInfo().builtinDispatchBuilder;
|
||||
builder->buildDispatchInfos(multiDispatchInfo, kernel, workDim, workItems, enqueuedWorkSizes, globalOffsets);
|
||||
|
||||
if (multiDispatchInfo.size() == 0) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
UNRECOVERABLE_IF(kernel->isParentKernel);
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
|
||||
}
|
||||
if (AuxTranslationMode::Builtin == auxTranslationMode) {
|
||||
UNRECOVERABLE_IF(kernel->isParentKernel);
|
||||
dispatchAuxTranslationBuiltin(multiDispatchInfo, AuxTranslationDirection::NonAuxToAux);
|
||||
}
|
||||
|
||||
if (AuxTranslationMode::Blit == auxTranslationMode) {
|
||||
@@ -115,35 +109,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface *(&surfaces)[surfaceCount
|
||||
enqueueHandler<commandType>(surfaces, blocking, multiDispatchInfo, numEventsInWaitList, eventWaitList, event);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo) {
|
||||
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
|
||||
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObjectOrAbort<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
DispatchInfo dispatchInfo(devQueue->getClDevice(), &scheduler, 1, Vec3<size_t>(scheduler.getGws(), 1, 1), Vec3<size_t>(scheduler.getLws(), 1, 1), Vec3<size_t>(0, 0, 0));
|
||||
Vec3<size_t> workGroupCount = generateWorkgroupsNumber(dispatchInfo.getGWS(), dispatchInfo.getEnqueuedWorkgroupSize());
|
||||
dispatchInfo.setTotalNumberOfWorkgroups(workGroupCount);
|
||||
dispatchInfo.setNumberOfWorkgroups(workGroupCount);
|
||||
|
||||
scheduler.createReflectionSurface();
|
||||
GraphicsAllocation *reflectionSurface = scheduler.getKernelReflectionSurface();
|
||||
|
||||
devQueueHw->resetDeviceQueue();
|
||||
|
||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
reflectionSurface,
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation());
|
||||
|
||||
multiDispatchInfo.push(dispatchInfo);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
template <uint32_t commandType>
|
||||
void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
@@ -163,8 +128,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
auto devQueue = this->getContext().getDefaultDeviceQueue();
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw = castToObject<DeviceQueueHw<GfxFamily>>(devQueue);
|
||||
|
||||
TagNodeBase *hwTimeStamps = nullptr;
|
||||
CommandStreamReceiver &computeCommandStreamReceiver = getGpgpuCommandStreamReceiver();
|
||||
@@ -183,11 +146,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
auto taskLevel = 0u;
|
||||
obtainTaskLevelAndBlockedStatus(taskLevel, numEventsInWaitList, eventWaitList, blockQueue, commandType);
|
||||
|
||||
if (parentKernel && !blockQueue) {
|
||||
while (!devQueueHw->isEMCriticalSectionFree())
|
||||
;
|
||||
}
|
||||
|
||||
enqueueHandlerHook(commandType, multiDispatchInfo);
|
||||
|
||||
bool clearDependenciesForSubCapture = false;
|
||||
@@ -262,7 +220,7 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
|
||||
if (multiDispatchInfo.empty() == false) {
|
||||
processDispatchForKernels<commandType>(multiDispatchInfo, printfHandler, eventBuilder.getEvent(),
|
||||
hwTimeStamps, blockQueue, devQueueHw, csrDeps, blockedCommandsData.get(),
|
||||
hwTimeStamps, blockQueue, csrDeps, blockedCommandsData.get(),
|
||||
timestampPacketDependencies);
|
||||
} else if (isCacheFlushCommand(commandType)) {
|
||||
processDispatchForCacheFlush(surfacesForResidency, numSurfaceForResidency, &commandStream, csrDeps);
|
||||
@@ -310,9 +268,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
}
|
||||
if (!blockQueue) {
|
||||
if (parentKernel) {
|
||||
processDeviceEnqueue(devQueueHw, multiDispatchInfo, hwTimeStamps, blocking);
|
||||
}
|
||||
|
||||
if (enqueueProperties.operation == EnqueueProperties::Operation::GpuKernel) {
|
||||
csrDeps.makeResident(computeCommandStreamReceiver);
|
||||
@@ -332,15 +287,6 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
taskLevel,
|
||||
printfHandler.get(),
|
||||
getBcsForAuxTranslation());
|
||||
|
||||
if (parentKernel) {
|
||||
computeCommandStreamReceiver.setMediaVFEStateDirty(true);
|
||||
|
||||
if (devQueueHw->getSchedulerReturnInstance() > 0) {
|
||||
waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
||||
this->runSchedulerSimulation(*devQueueHw, *parentKernel);
|
||||
}
|
||||
}
|
||||
} else if (enqueueProperties.isFlushWithoutKernelRequired()) {
|
||||
completionStamp = enqueueCommandWithoutKernel(
|
||||
surfacesForResidency,
|
||||
@@ -438,7 +384,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
Event *event,
|
||||
TagNodeBase *&hwTimeStamps,
|
||||
bool blockQueue,
|
||||
DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
CsrDependencies &csrDeps,
|
||||
KernelOperation *blockedCommandsData,
|
||||
TimestampPacketDependencies ×tampPacketDependencies) {
|
||||
@@ -474,10 +419,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForKernels(const MultiDispatchInf
|
||||
parentKernel->patchDefaultDeviceQueue(context->getDefaultDeviceQueue());
|
||||
parentKernel->patchEventPool(context->getDefaultDeviceQueue());
|
||||
parentKernel->patchReflectionSurface(context->getDefaultDeviceQueue(), printfHandler.get());
|
||||
if (!blockQueue) {
|
||||
devQueueHw->resetDeviceQueue();
|
||||
devQueueHw->acquireEMCriticalSection();
|
||||
}
|
||||
}
|
||||
|
||||
if (event && this->isPerfCountersEnabled()) {
|
||||
@@ -651,56 +592,6 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::processDeviceEnqueue(DeviceQueueHw<GfxFamily> *devQueueHw,
|
||||
const MultiDispatchInfo &multiDispatchInfo,
|
||||
TagNodeBase *hwTimeStamps,
|
||||
bool &blocking) {
|
||||
auto parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
size_t minSizeSSHForEM = HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
bool isCcsUsed = EngineHelpers::isCcs(gpgpuEngine->osContext->getEngineType());
|
||||
|
||||
uint32_t taskCount = getGpgpuCommandStreamReceiver().peekTaskCount() + 1;
|
||||
devQueueHw->setupExecutionModelDispatch(getIndirectHeap(IndirectHeap::SURFACE_STATE, minSizeSSHForEM),
|
||||
*devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
parentKernel,
|
||||
(uint32_t)multiDispatchInfo.size(),
|
||||
getGpgpuCommandStreamReceiver().getTagAllocation()->getGpuAddress(),
|
||||
taskCount,
|
||||
hwTimeStamps,
|
||||
isCcsUsed);
|
||||
|
||||
SchedulerKernel &scheduler = getContext().getSchedulerKernel();
|
||||
|
||||
scheduler.setArgs(devQueueHw->getQueueBuffer(),
|
||||
devQueueHw->getStackBuffer(),
|
||||
devQueueHw->getEventPoolBuffer(),
|
||||
devQueueHw->getSlbBuffer(),
|
||||
devQueueHw->getDshBuffer(),
|
||||
parentKernel->getKernelReflectionSurface(),
|
||||
devQueueHw->getQueueStorageBuffer(),
|
||||
this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(),
|
||||
devQueueHw->getDebugQueue());
|
||||
|
||||
auto preemptionMode = ClPreemptionHelper::taskPreemptionMode(getDevice(), multiDispatchInfo);
|
||||
GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
*this->commandStream,
|
||||
*devQueueHw,
|
||||
preemptionMode,
|
||||
scheduler,
|
||||
&getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u),
|
||||
devQueueHw->getIndirectHeap(IndirectHeap::DYNAMIC_STATE),
|
||||
isCcsUsed);
|
||||
|
||||
scheduler.makeResident(getGpgpuCommandStreamReceiver());
|
||||
|
||||
parentKernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(getGpgpuCommandStreamReceiver());
|
||||
|
||||
if (parentKernel->isAuxTranslationRequired()) {
|
||||
blocking = true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandQueueHw<GfxFamily>::obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) {
|
||||
auto isQueueBlockedStatus = isQueueBlocked();
|
||||
@@ -842,17 +733,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
IndirectHeap *dsh = nullptr;
|
||||
IndirectHeap *ioh = nullptr;
|
||||
|
||||
if (multiDispatchInfo.peekParentKernel()) {
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(this->getContext().getDefaultDeviceQueue());
|
||||
DEBUG_BREAK_IF(pDevQueue == nullptr);
|
||||
dsh = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
// In ExecutionModel IOH is the same as DSH to eliminate StateBaseAddress reprogramming for scheduler kernel and blocks.
|
||||
ioh = dsh;
|
||||
implicitFlush = true;
|
||||
} else {
|
||||
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
}
|
||||
dsh = &getIndirectHeap(IndirectHeap::DYNAMIC_STATE, 0u);
|
||||
ioh = &getIndirectHeap(IndirectHeap::INDIRECT_OBJECT, 0u);
|
||||
|
||||
auto allocNeedsFlushDC = false;
|
||||
if (!device->isFullRangeSvm()) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -20,7 +20,6 @@
|
||||
#include "opencl/source/command_queue/cl_local_work_size.h"
|
||||
#include "opencl/source/command_queue/command_queue.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/hardware_commands_helper.h"
|
||||
#include "opencl/source/helpers/task_information.h"
|
||||
@@ -84,15 +83,6 @@ class GpgpuWalkerHelper {
|
||||
TagNodeBase *timestampPacketNode,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
static void dispatchScheduler(
|
||||
LinearStream &commandStream,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed);
|
||||
|
||||
static void adjustMiStoreRegMemMode(MI_STORE_REG_MEM<GfxFamily> *storeCmd);
|
||||
|
||||
private:
|
||||
@@ -142,11 +132,6 @@ IndirectHeap &getIndirectHeap(CommandQueue &commandQueue, const MultiDispatchInf
|
||||
if (Kernel *parentKernel = multiDispatchInfo.peekParentKernel()) {
|
||||
if (heapType == IndirectHeap::SURFACE_STATE) {
|
||||
expectedSize += HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(*parentKernel);
|
||||
} else //if (heapType == IndirectHeap::DYNAMIC_STATE || heapType == IndirectHeap::INDIRECT_OBJECT)
|
||||
{
|
||||
DeviceQueueHw<GfxFamily> *pDevQueue = castToObject<DeviceQueueHw<GfxFamily>>(commandQueue.getContext().getDefaultDeviceQueue());
|
||||
DEBUG_BREAK_IF(pDevQueue == nullptr);
|
||||
ih = pDevQueue->getIndirectHeap(IndirectHeap::DYNAMIC_STATE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -191,17 +191,12 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
|
||||
return expectedSizeCS;
|
||||
}
|
||||
|
||||
Kernel *parentKernel = multiDispatchInfo.peekParentKernel();
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, dispatchInfo.getKernel(), dispatchInfo);
|
||||
size_t kernelObjAuxCount = multiDispatchInfo.getKernelObjsForAuxTranslation() != nullptr ? multiDispatchInfo.getKernelObjsForAuxTranslation()->size() : 0;
|
||||
expectedSizeCS += dispatchInfo.dispatchInitCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
||||
expectedSizeCS += dispatchInfo.dispatchEpilogueCommands.estimateCommandsSize(kernelObjAuxCount, hwInfo, commandQueueHw.isCacheFlushForBcsRequired());
|
||||
}
|
||||
if (parentKernel) {
|
||||
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredCS(eventType, reserveProfilingCmdsSpace, reservePerfCounters, commandQueue, &scheduler, DispatchInfo{});
|
||||
}
|
||||
if (commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
expectedSizeCS += TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDeps);
|
||||
expectedSizeCS += EnqueueOperation<GfxFamily>::getSizeRequiredForTimestampPacketWrite();
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename GfxFamily>
|
||||
void GpgpuWalkerHelper<GfxFamily>::dispatchScheduler(
|
||||
LinearStream &commandStream,
|
||||
DeviceQueueHw<GfxFamily> &devQueueHw,
|
||||
PreemptionMode preemptionMode,
|
||||
SchedulerKernel &scheduler,
|
||||
IndirectHeap *ssh,
|
||||
IndirectHeap *dsh,
|
||||
bool isCcsUsed) {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -16,7 +16,6 @@
|
||||
#include "shared/source/helpers/simd_helper.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_base.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/platform/platform.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2018-2021 Intel Corporation
|
||||
# Copyright (C) 2018-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -8,10 +8,6 @@ set(RUNTIME_SRCS_DEVICE_QUEUE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_base.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_skl_and_later.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_queue_hw_profiling.inl
|
||||
)
|
||||
target_sources(${NEO_STATIC_LIB_NAME} PRIVATE ${RUNTIME_SRCS_DEVICE_QUEUE})
|
||||
set_property(GLOBAL PROPERTY RUNTIME_SRCS_DEVICE_QUEUE ${RUNTIME_SRCS_DEVICE_QUEUE})
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/context/context.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/dispatch_info.h"
|
||||
#include "opencl/source/helpers/queue_helpers.h"
|
||||
|
||||
@@ -186,10 +185,6 @@ void DeviceQueue::resetDeviceQueue() {
|
||||
return;
|
||||
}
|
||||
|
||||
void DeviceQueue::dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) {
|
||||
return;
|
||||
}
|
||||
|
||||
IndirectHeap *DeviceQueue::getIndirectHeap(IndirectHeap::Type type) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -83,7 +83,6 @@ class DeviceQueue : public BaseObject<_device_queue> {
|
||||
}
|
||||
|
||||
virtual void resetDeviceQueue();
|
||||
virtual void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed);
|
||||
virtual IndirectHeap *getIndirectHeap(IndirectHeap::Type type);
|
||||
|
||||
void acquireEMCriticalSection() {
|
||||
|
||||
@@ -1,96 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/kernel/kernel.h"
|
||||
#include "opencl/source/program/program.h"
|
||||
#include "opencl/source/scheduler/scheduler_kernel.h"
|
||||
|
||||
#include "hw_cmds.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
class DeviceQueueHw : public DeviceQueue {
|
||||
using BaseClass = DeviceQueue;
|
||||
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
|
||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
|
||||
public:
|
||||
DeviceQueueHw(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) : BaseClass(context, device, properties) {
|
||||
allocateSlbBuffer();
|
||||
offsetDsh = colorCalcStateSize + (uint32_t)sizeof(INTERFACE_DESCRIPTOR_DATA) * interfaceDescriptorEntries * numberOfIDTables;
|
||||
igilQueue = reinterpret_cast<IGIL_CommandQueue *>(queueBuffer->getUnderlyingBuffer());
|
||||
}
|
||||
|
||||
static DeviceQueue *create(Context *context,
|
||||
ClDevice *device,
|
||||
cl_queue_properties &properties) {
|
||||
return new (std::nothrow) DeviceQueueHw<GfxFamily>(context, device, properties);
|
||||
}
|
||||
|
||||
IndirectHeap *getIndirectHeap(IndirectHeap::Type type) override;
|
||||
|
||||
LinearStream *getSlbCS() { return &slbCS; }
|
||||
void resetDSH();
|
||||
|
||||
size_t setSchedulerCrossThreadData(SchedulerKernel &scheduler);
|
||||
|
||||
void setupIndirectState(IndirectHeap &surfaceStateHeap, IndirectHeap &dynamicStateHeap, Kernel *parentKernel, uint32_t parentIDCount, bool isCcsUsed) override;
|
||||
|
||||
void addExecutionModelCleanUpSection(Kernel *parentKernel, TagNodeBase *hwTimeStamp, uint64_t tagAddress, uint32_t taskCount) override;
|
||||
void resetDeviceQueue() override;
|
||||
void dispatchScheduler(LinearStream &commandStream, SchedulerKernel &scheduler, PreemptionMode preemptionMode, IndirectHeap *ssh, IndirectHeap *dsh, bool isCcsUsed) override;
|
||||
|
||||
uint32_t getSchedulerReturnInstance() {
|
||||
return igilQueue->m_controls.m_SchedulerEarlyReturn;
|
||||
}
|
||||
|
||||
static size_t getCSPrefetchSize();
|
||||
|
||||
protected:
|
||||
void allocateSlbBuffer();
|
||||
size_t getMinimumSlbSize();
|
||||
size_t getWaCommandsSize();
|
||||
void addArbCheckCmdWa();
|
||||
void addMiAtomicCmdWa(uint64_t atomicOpPlaceholder);
|
||||
void addLriCmdWa(bool setArbCheck);
|
||||
void addLriCmd(bool setArbCheck);
|
||||
void addPipeControlCmdWa(bool isNoopCmd = false);
|
||||
void initPipeControl(PIPE_CONTROL *pc);
|
||||
void buildSlbDummyCommands();
|
||||
void addDcFlushToPipeControlWa(PIPE_CONTROL *pc);
|
||||
|
||||
void addProfilingEndCmds(uint64_t timestampAddress);
|
||||
static size_t getProfilingEndCmdsSize();
|
||||
|
||||
MOCKABLE_VIRTUAL void addMediaStateClearCmds();
|
||||
static size_t getMediaStateClearCmdsSize();
|
||||
|
||||
static size_t getExecutionModelCleanupSectionSize();
|
||||
static uint64_t getBlockKernelStartPointer(const Device &device, const KernelInfo *blockInfo, bool isCcsUsed);
|
||||
|
||||
LinearStream slbCS;
|
||||
IGIL_CommandQueue *igilQueue = nullptr;
|
||||
};
|
||||
} // namespace NEO
|
||||
@@ -1,29 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
void DeviceQueueHw<GfxFamily>::addProfilingEndCmds(uint64_t timestampAddress) {
|
||||
|
||||
auto pipeControlSpace = (PIPE_CONTROL *)slbCS.getSpace(sizeof(PIPE_CONTROL));
|
||||
auto pipeControlCmd = GfxFamily::cmdInitPipeControl;
|
||||
pipeControlCmd.setCommandStreamerStallEnable(true);
|
||||
*pipeControlSpace = pipeControlCmd;
|
||||
|
||||
//low part
|
||||
auto mICmdLowSpace = (MI_STORE_REGISTER_MEM *)slbCS.getSpace(sizeof(MI_STORE_REGISTER_MEM));
|
||||
auto mICmdLow = GfxFamily::cmdInitStoreRegisterMem;
|
||||
GpgpuWalkerHelper<GfxFamily>::adjustMiStoreRegMemMode(&mICmdLow);
|
||||
mICmdLow.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
mICmdLow.setMemoryAddress(timestampAddress);
|
||||
*mICmdLowSpace = mICmdLow;
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/gen11/hw_info.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -9,7 +9,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
|
||||
#include "command_queue_helpers_gen12lp.inl"
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -11,7 +11,6 @@
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/gen8/hw_info.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/command_queue/command_queue_hw_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/command_queue_hw_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/enqueue_resource_barrier.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/gen9/hw_cmds_base.h"
|
||||
|
||||
#include "opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl"
|
||||
#include "opencl/source/command_queue/gpgpu_walker_disabled_device_enqueue.inl"
|
||||
#include "opencl/source/command_queue/hardware_interface_bdw_and_later.inl"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -145,7 +145,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
auto bcsCsrForAuxTranslation = commandQueue.getBcsForAuxTranslation();
|
||||
|
||||
auto commandStreamReceiverOwnership = commandStreamReceiver.obtainUniqueOwnership();
|
||||
bool isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType());
|
||||
|
||||
if (executionModelKernel) {
|
||||
while (!devQueue->isEMCriticalSectionFree())
|
||||
@@ -175,39 +174,6 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
}
|
||||
makeTimestampPacketsResident(commandStreamReceiver);
|
||||
|
||||
if (executionModelKernel) {
|
||||
uint32_t taskCount = commandStreamReceiver.peekTaskCount() + 1;
|
||||
devQueue->setupExecutionModelDispatch(*ssh, *dsh, kernel, kernelCount,
|
||||
commandStreamReceiver.getTagAllocation()->getGpuAddress(), taskCount, timestamp, isCcsUsed);
|
||||
|
||||
SchedulerKernel &scheduler = commandQueue.getContext().getSchedulerKernel();
|
||||
|
||||
scheduler.setArgs(devQueue->getQueueBuffer(),
|
||||
devQueue->getStackBuffer(),
|
||||
devQueue->getEventPoolBuffer(),
|
||||
devQueue->getSlbBuffer(),
|
||||
dsh->getGraphicsAllocation(),
|
||||
kernel->getKernelReflectionSurface(),
|
||||
devQueue->getQueueStorageBuffer(),
|
||||
ssh->getGraphicsAllocation(),
|
||||
devQueue->getDebugQueue());
|
||||
|
||||
devQueue->dispatchScheduler(
|
||||
*kernelOperation->commandStream,
|
||||
scheduler,
|
||||
preemptionMode,
|
||||
ssh,
|
||||
dsh,
|
||||
isCcsUsed);
|
||||
|
||||
scheduler.makeResident(commandStreamReceiver);
|
||||
|
||||
// Update SLM usage
|
||||
slmUsed |= scheduler.getSlmTotalSize() > 0;
|
||||
|
||||
this->kernel->getProgram()->getBlockKernelManager()->makeInternalAllocationsResident(commandStreamReceiver);
|
||||
}
|
||||
|
||||
if (kernelOperation->blitPropertiesContainer.size() > 0) {
|
||||
CsrDependencies csrDeps;
|
||||
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, *bcsCsrForAuxTranslation, CsrDependencies::DependenciesType::All);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,7 +8,6 @@
|
||||
#include "shared/source/helpers/populate_factory.h"
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
#include "opencl/source/device_queue/device_queue_hw.h"
|
||||
#include "opencl/source/helpers/cl_hw_helper.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
#include "opencl/source/mem_obj/image.h"
|
||||
|
||||
Reference in New Issue
Block a user