diff --git a/runtime/builtin_kernels_simulation/CMakeLists.txt b/runtime/builtin_kernels_simulation/CMakeLists.txt index 9c26ee506c..ce761c08f1 100644 --- a/runtime/builtin_kernels_simulation/CMakeLists.txt +++ b/runtime/builtin_kernels_simulation/CMakeLists.txt @@ -37,7 +37,9 @@ endif() macro(macro_for_each_gen) list(APPEND DEFAULT_GEN_PLATFORMS_DEFITIONS DEFAULT_${GEN_TYPE}_PLATFORM=${DEFAULT_SUPPORTED_${GEN_TYPE}_PLATFORM}) - list(APPEND BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_TYPE_LOWER}/scheduler_simulation.cpp) + if(${SUPPORT_DEVICE_ENQUEUE_${GEN_TYPE}}) + list(APPEND BUILTIN_KERNELS_SIMULATION_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/${GEN_TYPE_LOWER}/scheduler_simulation.cpp) + endif() endmacro() apply_macro_for_each_gen("SUPPORTED") diff --git a/runtime/command_queue/command_queue_hw.h b/runtime/command_queue/command_queue_hw.h index 255f08219b..99afd16dc4 100644 --- a/runtime/command_queue/command_queue_hw.h +++ b/runtime/command_queue/command_queue_hw.h @@ -417,6 +417,7 @@ class CommandQueueHw : public CommandQueue { bool isTaskLevelUpdateRequired(const uint32_t &taskLevel, const cl_event *eventWaitList, const cl_uint &numEventsInWaitList, unsigned int commandType); void obtainTaskLevelAndBlockedStatus(unsigned int &taskLevel, cl_uint &numEventsInWaitList, const cl_event *&eventWaitList, bool &blockQueueStatus, unsigned int commandType) override; void forceDispatchScheduler(NEO::MultiDispatchInfo &multiDispatchInfo); + void runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel); static void computeOffsetsValueForRectCommands(size_t *bufferOffset, size_t *hostOffset, const size_t *bufferOrigin, diff --git a/runtime/command_queue/command_queue_hw_bdw_plus.inl b/runtime/command_queue/command_queue_hw_bdw_plus.inl index 21238b707a..d80c6aaebe 100644 --- a/runtime/command_queue/command_queue_hw_bdw_plus.inl +++ b/runtime/command_queue/command_queue_hw_bdw_plus.inl @@ -9,6 +9,20 @@ namespace NEO { +template +void CommandQueueHw::runSchedulerSimulation(DeviceQueueHw &devQueueHw, Kernel &parentKernel) { + BuiltinKernelsSimulation::SchedulerSimulation simulation; + simulation.runSchedulerSimulation(devQueueHw.getQueueBuffer(), + devQueueHw.getStackBuffer(), + devQueueHw.getEventPoolBuffer(), + devQueueHw.getSlbBuffer(), + devQueueHw.getDshBuffer(), + parentKernel.getKernelReflectionSurface(), + devQueueHw.getQueueStorageBuffer(), + this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), + devQueueHw.getDebugQueue()); +} + template void CommandQueueHw::submitCacheFlush(Surface **surfaces, size_t numSurfaces, diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 0674d5473e..14cc7405f0 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -277,17 +277,7 @@ void CommandQueueHw::enqueueHandler(Surface **surfacesForResidency, if (devQueueHw->getSchedulerReturnInstance() > 0) { waitUntilComplete(completionStamp.taskCount, completionStamp.flushStamp, false); - - BuiltinKernelsSimulation::SchedulerSimulation simulation; - simulation.runSchedulerSimulation(devQueueHw->getQueueBuffer(), - devQueueHw->getStackBuffer(), - devQueueHw->getEventPoolBuffer(), - devQueueHw->getSlbBuffer(), - devQueueHw->getDshBuffer(), - parentKernel->getKernelReflectionSurface(), - devQueueHw->getQueueStorageBuffer(), - this->getIndirectHeap(IndirectHeap::SURFACE_STATE, 0u).getGraphicsAllocation(), - devQueueHw->getDebugQueue()); + this->runSchedulerSimulation(*devQueueHw, *parentKernel); } } } else if (enqueueProperties.isFlushWithoutKernelRequired()) {